https://github.com/mupq/pqm4
Raw File
Tip revision: 7f05252b1b093ce317d62d8798323b3f801c43e1 authored by Richard Petri on 16 January 2024, 22:34:10 UTC
Run only on push to master, and when PR review requested
Tip revision: 7f05252
keccakf1600.S
@
@ Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni,
@ Joan Daemen, Michaƫl Peeters, Gilles Van Assche and Ronny Van Keer, hereby
@ denoted as "the implementer".
@ Additional optimizations by Alexandre Adomnicai.
@
@ For more information, feedback or questions, please refer to our websites:
@ http://keccak.noekeon.org/
@ http://keyak.noekeon.org/
@ http://ketje.noekeon.org/
@
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ http://creativecommons.org/publicdomain/zero/1.0/
@

@ WARNING: These functions work only on little endian CPU with@ ARMv7m architecture (ARM Cortex-M3, ...).


	.thumb
	.syntax unified
.text

	@ Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
.macro	toBitInterleaving	x0,x1,s0,s1,t,over

	and		\t,\x0,#0x55555555
	orr		\t,\t,\t, LSR #1
	and		\t,\t,#0x33333333
	orr		\t,\t,\t, LSR #2
	and		\t,\t,#0x0F0F0F0F
	orr		\t,\t,\t, LSR #4
	and		\t,\t,#0x00FF00FF
	bfi		\t,\t,#8, #8
	.if \over != 0
	lsr		\s0,\t, #8
	.else
	eor		\s0,\s0,\t, LSR #8
	.endif

	and		\t,\x1,#0x55555555
	orr		\t,\t,\t, LSR #1
	and		\t,\t,#0x33333333
	orr		\t,\t,\t, LSR #2
	and		\t,\t,#0x0F0F0F0F
	orr		\t,\t,\t, LSR #4
	and		\t,\t,#0x00FF00FF
	orr		\t,\t,\t, LSR #8
	eor		\s0,\s0,\t, LSL #16

	and		\t,\x0,#0xAAAAAAAA
	orr		\t,\t,\t, LSL #1
	and		\t,\t,#0xCCCCCCCC
	orr		\t,\t,\t, LSL #2
	and		\t,\t,#0xF0F0F0F0
	orr		\t,\t,\t, LSL #4
	and		\t,\t,#0xFF00FF00
	orr		\t,\t,\t, LSL #8
	.if \over != 0
	lsr		\s1,\t, #16
	.else
	eor		\s1,\s1,\t, LSR #16
	.endif

	and		\t,\x1,#0xAAAAAAAA
	orr		\t,\t,\t, LSL #1
	and		\t,\t,#0xCCCCCCCC
	orr		\t,\t,\t, LSL #2
	and		\t,\t,#0xF0F0F0F0
	orr		\t,\t,\t, LSL #4
	and		\t,\t,#0xFF00FF00
	orr		\t,\t,\t, LSL #8
	bfc		\t, #0, #16
	eors	\s1,\s1,\t
	.endm

	@ Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
.macro	fromBitInterleaving		x0, x1, t

	movs	\t, \x0					@ t = x0@
	bfi		\x0, \x1, #16, #16		@ x0 = (x0 & 0x0000FFFF) | (x1 << 16)@
	bfc		\x1, #0, #16			@	x1 = (t >> 16) | (x1 & 0xFFFF0000)@
	orr		\x1, \x1, \t, LSR #16

    eor		\t, \x0, \x0, LSR #8    @ t = (x0 ^ (x0 >>  8)) & 0x0000FF00UL@  x0 = x0 ^ t ^ (t <<  8)@
	and		\t, #0x0000FF00
    eors	\x0, \x0, \t
    eor		\x0, \x0, \t, LSL #8

    eor		\t, \x0, \x0, LSR #4	@ t = (x0 ^ (x0 >>  4)) & 0x00F000F0UL@  x0 = x0 ^ t ^ (t <<  4)@
	and		\t, #0x00F000F0
    eors	\x0, \x0, \t
    eor		\x0, \x0, \t, LSL #4

    eor		\t, \x0, \x0, LSR #2	@ t = (x0 ^ (x0 >>  2)) & 0x0C0C0C0CUL@  x0 = x0 ^ t ^ (t <<  2)@
	and		\t, #0x0C0C0C0C
    eors	\x0, \x0, \t
    eor		\x0, \x0, \t, LSL #2

    eor		\t, \x0, \x0, LSR #1	@ t = (x0 ^ (x0 >>  1)) & 0x22222222UL@  x0 = x0 ^ t ^ (t <<  1)@
	and		\t, #0x22222222
    eors	\x0, \x0, \t
    eor		\x0, \x0, \t, LSL #1

    eor		\t, \x1, \x1, LSR #8    @ t = (x1 ^ (x1 >>  8)) & 0x0000FF00UL@  x1 = x1 ^ t ^ (t <<  8)@
	and		\t, #0x0000FF00
    eors	\x1, \x1, \t
    eor		\x1, \x1, \t, LSL #8

    eor		\t, \x1, \x1, LSR #4	@ t = (x1 ^ (x1 >>  4)) & 0x00F000F0UL@  x1 = x1 ^ t ^ (t <<  4)@
	and		\t, #0x00F000F0
    eors	\x1, \x1, \t
    eor		\x1, \x1, \t, LSL #4

    eor		\t, \x1, \x1, LSR #2	@ t = (x1 ^ (x1 >>  2)) & 0x0C0C0C0CUL@  x1 = x1 ^ t ^ (t <<  2)@
	and		\t, #0x0C0C0C0C
    eors	\x1, \x1, \t
    eor		\x1, \x1, \t, LSL #2

    eor		\t, \x1, \x1, LSR #1	@ t = (x1 ^ (x1 >>  1)) & 0x22222222UL@  x1 = x1 ^ t ^ (t <<  1)@
	and		\t, #0x22222222
    eors	\x1, \x1, \t
    eor		\x1, \x1, \t, LSL #1
	.endm

@	--- offsets in state
.equ Aba0, 0*4
.equ Aba1, 1*4
.equ Abe0, 2*4
.equ Abe1, 3*4
.equ Abi0, 4*4
.equ Abi1, 5*4
.equ Abo0, 6*4
.equ Abo1, 7*4
.equ Abu0, 8*4
.equ Abu1, 9*4
.equ Aga0, 10*4
.equ Aga1, 11*4
.equ Age0, 12*4
.equ Age1, 13*4
.equ Agi0, 14*4
.equ Agi1, 15*4
.equ Ago0, 16*4
.equ Ago1, 17*4
.equ Agu0, 18*4
.equ Agu1, 19*4
.equ Aka0, 20*4
.equ Aka1, 21*4
.equ Ake0, 22*4
.equ Ake1, 23*4
.equ Aki0, 24*4
.equ Aki1, 25*4
.equ Ako0, 26*4
.equ Ako1, 27*4
.equ Aku0, 28*4
.equ Aku1, 29*4
.equ Ama0, 30*4
.equ Ama1, 31*4
.equ Ame0, 32*4
.equ Ame1, 33*4
.equ Ami0, 34*4
.equ Ami1, 35*4
.equ Amo0, 36*4
.equ Amo1, 37*4
.equ Amu0, 38*4
.equ Amu1, 39*4
.equ Asa0, 40*4
.equ Asa1, 41*4
.equ Ase0, 42*4
.equ Ase1, 43*4
.equ Asi0, 44*4
.equ Asi1, 45*4
.equ Aso0, 46*4
.equ Aso1, 47*4
.equ Asu0, 48*4
.equ Asu1, 49*4

@	--- offsets on stack
.equ mDa0, 0*4
.equ mDa1, 1*4
.equ mDo0, 2*4
.equ mDo1, 3*4
.equ mDi0, 4*4
.equ mRC	, 5*4
.equ mSize, 6*4

/******************************************************************************
 * Bitwise exclusive-OR where both operands are misaligned (i.e. src1 and src2 
 * are rotated by rot1 and rot2, respectively).
 * The output result is also misaligned (i.e. dst is rotated by rot1-rot2).
 *  - dst           destination register
 *  - src1-src2     source registers
 *  - rot1-rot2     rotation values
 *****************************************************************************/
.macro eorror   dst, src1, src2, rot1, rot2
.if \rot1 >= \rot2
    eor  \dst, \src1, \src2, ror \rot1-\rot2
.else
    eor  \dst, \src1, \src2, ror 32+\rot1-\rot2
.endif
.endm


/******************************************************************************
 * Bit clear instruction where both operands are misaligned (i.e. src1 and src2 
 * are rotated by rot1 and rot2, respectively).
 * The output result is also misaligned (i.e. dst is rotated by rot1-rot2).
 *  - dst           destination register
 *  - src1-src2     source registers
 *  - rot1-rot2     rotation values
 *****************************************************************************/
.macro bicror   dst, src1, src2, rot1, rot2
.if \rot1 >= \rot2
    bic  \dst, \src1, \src2, ror \rot1-\rot2
.else
    bic  \dst, \src1, \src2, ror 32+\rot1-\rot2
.endif
.endm


/******************************************************************************
 * Load 5 words from memory and XOR them all together. It is used to compute
 * the parity columns for the Theta step.
 * Note that all operands may be misaligned (i.e. rotated by a certain amount
 * of bits), as well as the result.
 *  - dst           destination register
 *  - src1-src5     source registers
 *  - rot1-rot5     rotation values
 *****************************************************************************/
.macro xor5   dst, src1, src2, src3, src4, src5, rot1, rot2, rot3, rot4, rot5
    ldr.w   \dst, [r0, #\src1]
    ldr.w     r1, [r0, #\src2]
    ldr.w     r5, [r0, #\src3]
    ldr      r11, [r0, #\src4]
    ldr      r12, [r0, #\src5]
    eorror  \dst, \dst,  r1, \rot1, \rot2
    eorror  \dst, \dst,  r5, \rot1, \rot3
    eorror  \dst, \dst, r11, \rot1, \rot4
    eorror  \dst, \dst, r12, \rot1, \rot5
.endm


/******************************************************************************
 * Same as xor5, except that a previous result is stored on the stack after the
 * loads from memory. This allows to have the str instruction for free.
 *  - dst           destination register
 *  - src1-src5     source registers
 *  - rot1-rot5     rotation values
 *  - strreg        register from previous calculations to be stored in memory
 *  - stradr        register holding the address to store `prev`
 *  - strofs        stack pointer memory offset for the str instruction
 *****************************************************************************/
.macro xor5str   dst, src1, src2, src3, src4, src5, rot1, rot2, rot3, rot4, rot5, strreg, stradr, strofs
    ldr.w    \dst, [r0, #\src1]
    ldr.w      r1, [r0, #\src2]
    ldr.w      r5, [r0, #\src3]
    ldr       r11, [r0, #\src4]
    ldr       r12, [r0, #\src5]
    str.w \strreg, [\stradr, #\strofs]
    eorror   \dst, \dst,  r1, \rot1, \rot2
    eorror   \dst, \dst,  r5, \rot1, \rot3
    eorror   \dst, \dst, r11, \rot1, \rot4
    eorror   \dst, \dst, r12, \rot1, \rot5
.endm


/******************************************************************************
 * Exclusive-OR where the 2nd operand is rotated by 1 bit to the left.
 *  - dst           destination register
 *  - src1-src2     source registers
 *  - rot           differential rotation btw src1 & src2 (i.e. rot=rot1-rot2)
 *****************************************************************************/
.macro xorrol   dst, src1, src2, rot
    eor  \dst, \src1, \src2, ror \rot-1
.endm


/******************************************************************************
 * Bitslice implementation of the Chi step with misaligned operands.
 *  - resofs        memory offset within the internal state to store the result
 *  - src1-src3     source registers
 *  - rot1-rot3     rotation values
 *****************************************************************************/
.macro xandnotlazystr   resofs, src1, src2, src3, rot1, rot2, rot3
    bicror  r1, \src3, \src2, \rot3, \rot2
    eorror  r1, r1, \src1, \rot3, \rot1
    str.w   r1, [r0, #\resofs]
.endm


/******************************************************************************
 * Same as xandnotlazystr but without the str instruction which will be carried
 * out later in order to take advantage of future ldr instructions.
 *  - src1-src3     source registers
 *  - rot1-rot3     rotation values
 *****************************************************************************/
.macro xandnotlazy  src1, src2, src3, rot1, rot2, rot3
    bicror  r1, \src3, \src2, \rot3, \rot2
    eorror  r1, r1, \src1, \rot3, \rot1
.endm


/******************************************************************************
 * Same as xandnotlazystr with an additional rotation in order to explictly
 * compute the Rho step. It is useful in KeccakRound3 in order to return to the
 * classical representation every 4 rounds.
 *  - resofs        memory offset within the internal state to store the result
 *  - src1-src3     source registers
 *  - rot1-rot3     rotation values
 *****************************************************************************/
.macro xandnotstr  resofs, src1, src2, src3, rot1, rot2, rot3
    bicror  r1, \src3, \src2, \rot3, \rot2
    eorror  r1,    r1, \src1, \rot3, \rot1
.if \rot3 > 0
    ror     r1, r1, #32-\rot3
.endif
    str.w   r1, [r0, #\resofs]
.endm


/******************************************************************************
 * Same as xandnotstr but without the str instruction which will be carried
 * out later in order to take advantage of future ldr instructions.
 *  - src1-src3     source registers
 *  - rot1-rot3     rotation values
 *****************************************************************************/
.macro xandnot  src1, src2, src3, rot1, rot2, rot3
    bicror  r1, \src3, \src2, \rot3, \rot2
    eorror  r1,    r1, \src1, \rot3, \rot1
.if \rot3 > 0
    ror     r1, r1, #32-\rot3
.endif
.endm


/******************************************************************************
 * Same as xandnot followed by the Iota step. Note that the source registers 
 * are not specified since they are always r3, r4 and r5.
 *  - out           output reg (useful to store the result in the next round)
 *  - rot2-rot3     rotation values
 *  - rcofs         memory offset to load the round constant
 *  - last          Boolean to indicate whether its the last round of the
 *                  quadruple round routine
 *****************************************************************************/
.macro xandnotiota    out, rot3, rot2, rcofs, last
    bicror  r5, r5, r4, \rot3, \rot2
    ldr     r1, [sp, #mRC]
    ldr     r4, [r1, #\rcofs]
.if  \last == 1
    ldr     r7, [r1, #32]!
    str     r1, [sp, #mRC]
    cmp     r7, #0xFF
.endif
.if \rot3 > 0
    eor    r3, r3, r5, ror 32-\rot3
.else
    eor.w  r3, r3, r5
.endif
    eor.w  \out, r4, r3
.endm


/******************************************************************************
 * Add the parity bits to the state registers r3-r7. If the state registers are
 * not properly aligned due to previous lazy rotations, use the barrel shifter
 * to fix the misalignment when adding the parity bits.
 *  - par1-par5     registers containing the parity bits
 *  - dly1-dly5     rotation values to compute the (delayed) Rho step
 *****************************************************************************/
.macro addparity par1, dly1, par2, dly2, par3, dly3, par4, dly4, par5, dly5
.if \dly1 > 0
    eor    r3, \par1, r3, ror 32-\dly1
.else
    eor.w  r3, \par1, r3
.endif
.if \dly2 > 0
    eor    r4, \par2, r4, ror 32-\dly2
.else
    eor.w  r4, \par2, r4
.endif
.if \dly3 > 0
    eor    r5, \par3, r5, ror 32-\dly3
.else
    eor.w  r5, \par3, r5
.endif
.if \dly4 > 0
    eor    r6, \par4, r6, ror 32-\dly4
.else
    eor.w  r6, \par4, r6
.endif
.if \dly5 > 0
    eor    r7, \par5, r7, ror 32-\dly5
.else
    eor.w  r7, \par5, r7
.endif
.endm


/******************************************************************************
 * Apply Theta, Pi, Chi and Iota steps to half a plane (i.e. 5 32-bit words) of
 * the internal state.
 * Note that the Rho step is calculated if and only if \lazy == 0, otherwise it
 * is delayed until the next round using ''lazy reductions'' thanks to the 
 * inline barrel shifter.
 *  - src1-src5     source registers
 *  - par1-par5     registers containing the parity bits
 *  - rot2-rot5     rotation values to compute the current Rho step
 *  - dly1-dly5     rotation values to compute the delayed Rho step
 *  - prev          register from previous calculations to be stored in memory
 *  - strofs        stack pointer memory offset for the str instruction
 *  - reg           output reg related to the Iota step (to be stored later)
 *****************************************************************************/
.macro    KeccakThetaRhoPiChiIota   src1, par1,       dly1, \
                                    src2, par2, rot2, dly2, \
                                    src3, par3, rot3, dly3, \
                                    src4, par4, rot4, dly4, \
                                    src5, par5, rot5, dly5, \
                                    ofs,  last, lazy, strofs, reg
    ldr.w       r3, [r0, #\src1]
    ldr       r4, [r0, #\src2]
    ldr       r5, [r0, #\src3]
    ldr       r6, [r0, #\src4]
    ldr       r7, [r0, #\src5]
    str.w       r1, [r0, #\strofs]
    addparity   \par1, \dly1, \par2, \dly2, \par3, \dly3, \par4, \dly4, \par5, \dly5
.if \lazy == 1
    xandnotlazystr  \src2, r4, r5, r6, \rot2, \rot3, \rot4
    xandnotlazystr  \src3, r5, r6, r7, \rot3, \rot4, \rot5
    xandnotlazystr  \src4, r6, r7, r3, \rot4, \rot5,     0
    xandnotlazystr  \src5, r7, r3, r4, \rot5,     0, \rot2
.else
    xandnotstr     \src2, r4, r5, r6, \rot2, \rot3, \rot4
    xandnotstr     \src3, r5, r6, r7, \rot3, \rot4, \rot5
    xandnotstr     \src4, r6, r7, r3, \rot4, \rot5,     0
    xandnotstr     \src5, r7, r3, r4, \rot5,     0, \rot2
.endif
    xandnotiota    \reg, \rot3, \rot2, \ofs, \last
.endm


/******************************************************************************
 * Apply Theta, Pi, and Chi steps to half a plane (i.e. 5 32-bit words) of the
 * internal state.
 * Note that the Rho step is calculated if and only if \lazy == 0, otherwise it
 * is delayed until the next round using ''lazy reductions'' thanks to the 
 * inline barrel shifter.
 *  - src1-src5     source registers
 *  - dst1-dst5     memory offsets to store the output registers
 *  - par1-par5     registers containing the parity bits
 *  - rot2-rot5     rotation values to compute the current Rho step
 *  - dly1-dly5     rotation values to compute the delayed Rho step
 *  - lazy          Boolean to indicate whether lazy rotations are used or not
 *  - strofs        stack pointer memory offset to store the last output of the 
 *                  previous round.
 *****************************************************************************/
.macro    KeccakThetaRhoPiChi   src1, dst1, par1, rot1, dly1, \
                                src2, dst2, par2, rot2, dly2, \
                                src3, dst3, par3, rot3, dly3, \
                                src4, dst4, par4, rot4, dly4, \
                                src5, dst5, par5, rot5, dly5, \
                                lazy, strofs
    ldr.w       r3, [r0, #\src1]
    ldr.w       r4, [r0, #\src2]
    ldr.w       r5, [r0, #\src3]
    ldr.w       r6, [r0, #\src4]
    ldr.w       r7, [r0, #\src5]
    str.w       r1, [r0, #\strofs]
    addparity   \par1, \dly1, \par2, \dly2, \par3, \dly3, \par4, \dly4, \par5, \dly5
.if \lazy == 1
    xandnotlazystr  \dst1, r3, r4, r5, \rot1, \rot2, \rot3
    xandnotlazystr  \dst2, r4, r5, r6, \rot2, \rot3, \rot4
    xandnotlazystr  \dst3, r5, r6, r7, \rot3, \rot4, \rot5
    xandnotlazystr  \dst4, r6, r7, r3, \rot4, \rot5, \rot1
    xandnotlazy            r7, r3, r4, \rot5, \rot1, \rot2
.else
    xandnotstr      \dst1, r3, r4, r5, \rot1, \rot2, \rot3
    xandnotstr      \dst2, r4, r5, r6, \rot2, \rot3, \rot4
    xandnotstr      \dst3, r5, r6, r7, \rot3, \rot4, \rot5
    xandnotstr      \dst4, r6, r7, r3, \rot4, \rot5, \rot1
    xandnot                r7, r3, r4, \rot5, \rot1, \rot2
.endif
.endm


/******************************************************************************
 * 1st round of the 4 unrolled rounds routine due to in-place processing.
 * At the beginning of such rounds, the internal state is expected to match the
 * classical representation (i.e. without transition and no delayed Rho step).
 *****************************************************************************/
.macro    KeccakRound0
    xor5      r3, Abu0, Agu0, Aku0, Amu0, Asu0, 0, 0, 0, 0, 0
    xor5      r7, Abe1, Age1, Ake1, Ame1, Ase1, 0, 0, 0, 0, 0
    xorrol    r6, r3, r7, 32
    xor5str   r4, Abi1, Agi1, Aki1, Ami1, Asi1, 0, 0, 0, 0, 0, r6, sp, mDa0
    eor.w     r6, r3, r4
    xor5str   r3, Abo0, Ago0, Ako0, Amo0, Aso0, 0, 0, 0, 0, 0, r6, sp, mDo1
    eor.w     r2, r7, r3
    xor5      r7, Aba0, Aga0, Aka0, Ama0, Asa0, 0, 0, 0, 0, 0
    xorrol   r10, r7, r4, 32
    xor5      r4, Abo1, Ago1, Ako1, Amo1, Aso1, 0, 0, 0, 0, 0
    eor      r14, r4, r7
    xor5      r7, Abe0, Age0, Ake0, Ame0, Ase0, 0, 0, 0, 0, 0
    xorrol    r6, r7, r4, 32
    xor5str   r4, Abu1, Agu1, Aku1, Amu1, Asu1, 0, 0, 0, 0, 0, r6, sp, mDi0
    eor.w     r8, r4, r7
    xor5str   r7, Abi0, Agi0, Aki0, Ami0, Asi0, 0, 0, 0, 0, 0, r8, sp, mDa1
    xorrol    r9, r7, r4, 32
    xor5str   r4, Aba1, Aga1, Aka1, Ama1, Asa1, 0, 0, 0, 0, 0, r9, sp, mDo0
    eor      r11, r4, r7
    xorrol   r12, r3, r4, 32
    KeccakThetaRhoPiChi Abo0, Aka1,  r9, 14, 0, \
                        Agu0, Ame1, r12, 10, 0, \
                        Aka1, Asi1,  r8,  2, 0, \
                        Ame1, Abo0, r11, 23, 0, \
                        Asi1, Agu0,  r2, 31, 0, \
                        1, Aka1
    KeccakThetaRhoPiChi Abe0, Asa1, r10,  0, 0, \
                        Agi1, Abe0,  r2,  3, 0, \
                        Ako0, Agi1,  r9, 12, 0, \
                        Amu1, Ako0, r14,  4, 0, \
                        Asa1, Amu1,  r8,  9, 0, \
                        1, Agu0
    ldr         r8, [sp, #mDa0]
    KeccakThetaRhoPiChi Abu1, Aga0, r14, 14, 0, \
                        Aga0, Ake0,  r8, 18, 0, \
                        Ake0, Ami1, r10,  5, 0, \
                        Ami1, Aso0,  r2,  8, 0, \
                        Aso0, Abu1,  r9, 28, 0, \
                        1, Amu1
    KeccakThetaRhoPiChi Abi1, Ama0,  r2, 31, 0, \
                        Ago0, Ase1,  r9, 27, 0, \
                        Aku0, Abi1, r12, 19, 0, \
                        Ama0, Ago0,  r8, 20, 0, \
                        Ase1, Aku0, r11,  1, 0, \
                        1, Abu1
    ldr         r9, [sp, #mDo1]
    KeccakThetaRhoPiChiIota Aba0,  r8,  0,    \
                            Age0, r10, 22, 0, \
                            Aki1,  r2, 22, 0, \
                            Amo1,  r9, 11, 0, \
                            Asu0, r12,  7, 0, \
                            0, 0, 1, Aku0, r1
    ldr.w       r2, [sp, #mDi0]
    KeccakThetaRhoPiChi Abo1, Aka0,  r9, 14, 0, \
                        Agu1, Ame0, r14, 10, 0, \
                        Aka0, Asi0,  r8,  1, 0, \
                        Ame0, Abo1, r10, 22, 0, \
                        Asi0, Agu1,  r2, 30, 0, \
                        1, Aba0
    KeccakThetaRhoPiChi Abe1, Asa0, r11,  1, 0, \
                        Agi0, Abe1,  r2,  3, 0, \
                        Ako1, Agi0,  r9, 13, 0, \
                        Amu0, Ako1, r12,  4, 0, \
                        Asa0, Amu0,  r8,  9, 0, \
                        1, Agu1
    ldr         r8, [sp, #mDa1]
    KeccakThetaRhoPiChi Abu0, Aga1, r12, 13, 0, \
                        Aga1, Ake1,  r8, 18, 0, \
                        Ake1, Ami0, r11,  5, 0, \
                        Ami0, Aso1,  r2,  7, 0, \
                        Aso1, Abu0,  r9, 28, 0, \
                        1, Amu0
    KeccakThetaRhoPiChi Abi0, Ama1,  r2, 31, 0, \
                        Ago1, Ase0,  r9, 28, 0, \
                        Aku1, Abi0, r14, 20, 0, \
                        Ama1, Ago1,  r8, 21, 0, \
                        Ase0, Aku1, r10,  1, 0, \
                        1, Abu0
    ldr         r9, [sp, #mDo0]
    KeccakThetaRhoPiChiIota Aba1,  r8,  0,    \
                            Age1, r11, 22, 0, \
                            Aki0,  r2, 21, 0, \
                            Amo0,  r9, 10, 0, \
                            Asu1, r14,  7, 0, \
                            4, 0, 1, Aku1, r14
.endm



/******************************************************************************
 * 2nd round of the 4 unrolled rounds routine due to in-place processing.
 *****************************************************************************/
.macro    KeccakRound1
    xor5str     r3, Asu0, Agu0, Amu0, Abu1, Aku1, 22, 10,  3, 18, 28, r14, r0, Aba1
    xor5        r7, Age1, Ame0, Abe0, Ake1, Ase1, 10, 22,  4,  7, 20
    ror         r3, 32-22
    xorrol      r6, r3, r7, 32-10
    xor5str     r4, Aki0, Asi0, Agi1, Ami0, Abi1,  7, 30,  9, 28,  1, r6, sp, mDa0
    eor         r6, r3, r4, ror 32-7
    xor5str     r3, Amo1, Abo0, Ako1, Aso0, Ago1,  0, 14,  1, 14, 31, r6, sp, mDo1
    eor         r2, r3, r7, ror 32-10
    xor5        r7, Aba0, Aka1, Asa0, Aga0, Ama1,  0,  2, 13,  5, 20
    xorrol     r10, r7, r4, 32-7
    xor5        r4, Amo0, Abo1, Ako0, Aso1, Ago0,  0, 14,  0, 13, 31
    eor        r14, r4, r7
    xor5        r7, Age0, Ame1, Abe1, Ake0, Ase0, 11, 23,  4,  8, 21
    ror         r7, 32-11
    xorrol      r6, r7, r4, 32
    xor5str     r4, Asu1, Agu1, Amu1, Abu0, Aku0, 22, 10,  3, 18, 27, r6, sp, mDi0
    eor         r8, r7, r4, ror 32-22
    xor5str     r7, Aki1, Asi1, Agi0, Ami1, Abi0,  7, 31,  9, 28,  1, r8, sp, mDa1
    ror         r7, 32-7
    xorrol      r9, r7, r4, 32-22
    xor5str     r4, Aba1, Aka0, Asa1, Aga1, Ama0,  0,  1, 12,  5, 19, r9, sp, mDo0
    eor        r11, r4, r7
    xorrol     r12, r3, r4, 32
    KeccakThetaRhoPiChi Amo1, Asa1,  r9, 14,  0, \
                        Agu0, Ake1, r12, 10, 10, \
                        Asa1, Abi1,  r8,  2, 12, \
                        Ake1, Amo1, r11, 23,  7, \
                        Abi1, Agu0,  r2, 31,  1, \
                        1, Asa1
    KeccakThetaRhoPiChi Age0, Ama0, r10,  0, 11, \
                        Asi0, Age0,  r2,  3, 30, \
                        Ako1, Asi0,  r9, 12,  1, \
                        Abu0, Ako1, r14,  4, 18, \
                        Ama0, Abu0,  r8,  9, 19, \
                        1, Agu0
    ldr         r8, [sp, #mDa0]
    KeccakThetaRhoPiChi Asu1, Aka1, r14, 14, 22, \
                        Aka1, Abe1,  r8, 18,  2, \
                        Abe1, Ami0, r10,  5,  4, \
                        Ami0, Ago1,  r2,  8, 28, \
                        Ago1, Asu1,  r9, 28, 31, \
                        1, Abu0
    KeccakThetaRhoPiChi Aki0, Aga0,  r2, 31,  7, \
                        Abo0, Ase1,  r9, 27, 14, \
                        Amu0, Aki0, r12, 19,  3, \
                        Aga0, Abo0,  r8, 20,  5, \
                        Ase1, Amu0, r11,  1, 20, \
                        1, Asu1
    ldr         r9, [sp, #mDo1]
    KeccakThetaRhoPiChiIota Aba0,  r8,  0,     \
                            Ame1, r10, 22, 23, \
                            Agi1,  r2, 22,  9, \
                            Aso1,  r9, 11, 13, \
                            Aku1, r12,  7, 28, \
                            8, 0, 1, Amu0, r1
    ldr.w         r2, [sp, #mDi0]
    KeccakThetaRhoPiChi Amo0, Asa0,  r9, 14,  0, \
                        Agu1, Ake0, r14, 10, 10, \
                        Asa0, Abi0,  r8,  1, 13, \
                        Ake0, Amo0, r10, 22,  8, \
                        Abi0, Agu1,  r2, 30,  1, \
                        1, Aba0
    KeccakThetaRhoPiChi Age1, Ama1, r11,  1, 10, \
                        Asi1, Age1,  r2,  3, 31, \
                        Ako0, Asi1,  r9, 13,  0, \
                        Abu1, Ako0, r12,  4, 18, \
                        Ama1, Abu1,  r8,  9, 20, \
                        1, Agu1
    ldr         r8, [sp, #mDa1]
    KeccakThetaRhoPiChi Asu0, Aka0, r12, 13, 22, \
                        Aka0, Abe0,  r8, 18,  1, \
                        Abe0, Ami1, r11,  5,  4, \
                        Ami1, Ago0,  r2,  7, 28, \
                        Ago0, Asu0,  r9, 28, 31, \
                        1, Abu1
    KeccakThetaRhoPiChi Aki1, Aga1,  r2, 31,  7, \
                        Abo1, Ase0,  r9, 28, 14, \
                        Amu1, Aki1, r14, 20,  3, \
                        Aga1, Abo1,  r8, 21,  5, \
                        Ase0, Amu1, r10,  1, 21, \
                        1, Asu0
    ldr         r9, [sp, #mDo0]
    KeccakThetaRhoPiChiIota Aba1,  r8,  0,     \
                            Ame0, r11, 22, 22, \
                            Agi0,  r2, 21,  9, \
                            Aso0,  r9, 10, 14, \
                            Aku0, r14,  7, 27, \
                            12, 0, 1, Amu1, r14
.endm

/******************************************************************************
 * 3rd round of the 4 unrolled rounds routine due to in-place processing.
 *****************************************************************************/
.macro    KeccakRound2
    xor5str     r3, Aku1, Agu0, Abu1, Asu1, Amu1, 22, 10,  3, 18, 28, r14, r0, Aba1
    xor5        r7, Ame0, Ake0, Age0, Abe0, Ase1, 10, 22,  4,  7, 20
    ror         r3, 32-22
    xorrol      r6, r3, r7, 32-10
    xor5str     r4, Agi0, Abi0, Asi0, Ami1, Aki0,  7, 30,  9, 28,  1, r6, sp, mDa0
    eor         r6, r3, r4, ror 32-7
    xor5str     r3, Aso1, Amo1, Ako0, Ago1, Abo1,  0, 14,  1, 14, 31, r6, sp, mDo1
    eor         r2, r3, r7, ror 32-10
    xor5        r7, Aba0, Asa1, Ama1, Aka1, Aga1,  0,  2, 13,  5, 20
    xorrol     r10, r7, r4, 32-7
    xor5        r4, Aso0, Amo0, Ako1, Ago0, Abo0,  0, 14,  0, 13, 31
    eor        r14, r4, r7
    xor5        r7, Ame1, Ake1, Age1, Abe1, Ase0, 11, 23,  4,  8, 21
    ror         r7, 32-11
    xorrol      r6, r7, r4, 32
    xor5str     r4, Aku0, Agu1, Abu0, Asu0, Amu0, 22, 10,  3, 18, 27, r6, sp, mDi0
    eor         r8, r7, r4, ror 32-22
    xor5str     r7, Agi1, Abi1, Asi1, Ami0, Aki1,  7, 31,  9, 28,  1, r8, sp, mDa1
    ror         r7, 32-7
    xorrol      r9, r7, r4, 32-22
    xor5str     r4, Aba1, Asa0, Ama0, Aka0, Aga0,  0,  1, 12,  5, 19, r9, sp, mDo0
    eor        r11, r4, r7
    xorrol     r12, r3, r4, 32
    KeccakThetaRhoPiChi Aso1, Ama0,  r9, 14,  0, \
                        Agu0, Abe0, r12, 10, 10, \
                        Ama0, Aki0,  r8,  2, 12, \
                        Abe0, Aso1, r11, 23,  7, \
                        Aki0, Agu0,  r2, 31,  1, \
                        1, Ama0
    KeccakThetaRhoPiChi Ame1, Aga0, r10,  0, 11, \
                        Abi0, Ame1,  r2,  3, 30, \
                        Ako0, Abi0,  r9, 12,  1, \
                        Asu0, Ako0, r14,  4, 18, \
                        Aga0, Asu0,  r8,  9, 19, \
                        1, Agu0
    ldr     r8, [sp, #mDa0]
    KeccakThetaRhoPiChi Aku0, Asa1, r14, 14, 22, \
                        Asa1, Age1,  r8, 18,  2, \
                        Age1, Ami1, r10,  5,  4, \
                        Ami1, Abo1,  r2,  8, 28, \
                        Abo1, Aku0,  r9, 28, 31, \
                        1, Asu0
    KeccakThetaRhoPiChi Agi0, Aka1,  r2, 31,  7, \
                        Amo1, Ase1,  r9, 27, 14, \
                        Abu1, Agi0, r12, 19,  3, \
                        Aka1, Amo1,  r8, 20,  5, \
                        Ase1, Abu1, r11,  1, 20, \
                        1, Aku0
    ldr     r9, [sp, #mDo1]
    KeccakThetaRhoPiChiIota Aba0, r8,  0,     \
                            Ake1, r10,22, 23, \
                            Asi0, r2, 22,  9, \
                            Ago0, r9, 11, 13, \
                            Amu1, r12, 7, 28, \
                            16, 0, 1, Abu1, r1
    ldr.w   r2, [sp, #mDi0]
    KeccakThetaRhoPiChi Aso0, Ama1,  r9, 14,  0, \
                        Agu1, Abe1, r14, 10, 10, \
                        Ama1, Aki1,  r8,  1, 13, \
                        Abe1, Aso0, r10, 22,  8, \
                        Aki1, Agu1,  r2, 30,  1, \
                        1, Aba0
    KeccakThetaRhoPiChi Ame0, Aga1, r11,  1, 10, \
                        Abi1, Ame0,  r2,  3, 31, \
                        Ako1, Abi1,  r9, 13,  0, \
                        Asu1, Ako1, r12,  4, 18, \
                        Aga1, Asu1,  r8,  9, 20, \
                        1, Agu1
    ldr     r8, [sp, #mDa1]
    KeccakThetaRhoPiChi Aku1, Asa0, r12, 13, 22, \
                        Asa0, Age0,  r8, 18,  1, \
                        Age0, Ami0, r11,  5,  4, \
                        Ami0, Abo0,  r2,  7, 28, \
                        Abo0, Aku1,  r9, 28, 31, \
                        1, Asu1
    KeccakThetaRhoPiChi Agi1, Aka0,  r2, 31,  7, \
                        Amo0, Ase0,  r9, 28, 14, \
                        Abu0, Agi1, r14, 20,  3, \
                        Aka0, Amo0,  r8, 21,  5, \
                        Ase0, Abu0, r10,  1, 21, \
                        1, Aku1
    ldr     r9, [sp, #mDo0]
    KeccakThetaRhoPiChiIota Aba1,  r8,  0,     \
                            Ake0, r11, 22, 22, \
                            Asi1,  r2, 21,  9, \
                            Ago1,  r9, 10, 14, \
                            Amu0, r14,  7, 27, \
                            20, 0, 1, Abu0, r14

.endm


/******************************************************************************
 * 4th round of the 4 unrolled rounds routine due to in-place processing.
 * Note that the Rho step is *not* delayed so that the internal state is
 * compliant w/ the classical representation at the end of the routine. 
 *****************************************************************************/
.macro    KeccakRound3
    xor5str     r3, Amu1, Agu0, Asu1, Aku0, Abu0, 22, 10,  3, 18, 28, r14, r0, Aba1
    xor5        r7, Ake0, Abe1, Ame1, Age0, Ase1, 10, 22,  4,  7, 20
    ror         r3, 32-22
    xorrol      r6, r3, r7, 32-10
    xor5str     r4, Asi1, Aki1, Abi0, Ami0, Agi0,  7, 30,  9, 28,  1, r6, sp, mDa0
    eor         r6, r3, r4, ror 32-7
    xor5str     r3, Ago0, Aso1, Ako1, Abo1, Amo0,  0, 14,  1, 14, 31, r6, sp, mDo1
    eor         r2, r3, r7, ror 32-10
    xor5        r7, Aba0, Ama0, Aga1, Asa1, Aka0,  0,  2, 13,  5, 20
    xorrol     r10, r7, r4, 32-7
    xor5        r4, Ago1, Aso0, Ako0, Abo0, Amo1,  0, 14,  0, 13, 31
    eor        r14, r4, r7
    xor5        r7, Ake1, Abe0, Ame0, Age1, Ase0, 11, 23,  4,  8, 21
    ror         r7, #32-11
    xorrol      r6, r7, r4, 32
    xor5str     r4, Amu0, Agu1, Asu0, Aku1, Abu1, 22, 10,  3, 18, 27, r6, sp, mDi0
    eor         r8, r7, r4, ror 32-22
    xor5str     r7, Asi0, Aki0, Abi1, Ami1, Agi1,  7, 31,  9, 28,  1, r8, sp, mDa1
    ror         r7, 32-7
    xorrol      r9, r7, r4, 32-22
    xor5str     r4, Aba1, Ama1, Aga0, Asa0, Aka1,  0,  1, 12,  5, 19, r9, sp, mDo0
    eor        r11, r4, r7
    xorrol     r12, r3, r4, 32
    KeccakThetaRhoPiChi     Ago0, Aga0,  r9, 14,  0, \
                            Agu0, Age0, r12, 10, 10, \
                            Aga0, Agi0,  r8,  2, 12, \
                            Age0, Ago0, r11, 23,  7, \
                            Agi0, Agu0,  r2, 31,  1, \
                            0, Aga0
    KeccakThetaRhoPiChi     Ake1, Aka1, r10,  0, 11, \
                            Aki1, Ake1,  r2,  3, 30, \
                            Ako1, Aki1,  r9, 12,  1, \
                            Aku1, Ako1, r14,  4, 18, \
                            Aka1, Aku1,  r8,  9, 19, \
                            0, Agu0
    ldr     r8, [sp, #mDa0]
    KeccakThetaRhoPiChi     Amu0, Ama0, r14, 14, 22, \
                            Ama0, Ame0,  r8, 18,  2, \
                            Ame0, Ami0, r10,  5,  4, \
                            Ami0, Amo0,  r2,  8, 28, \
                            Amo0, Amu0,  r9, 28, 31, \
                            0, Aku1
    KeccakThetaRhoPiChi     Asi1, Asa1,  r2, 31,  7, \
                            Aso1, Ase1,  r9, 27, 14, \
                            Asu1, Asi1, r12, 19,  3, \
                            Asa1, Aso1,  r8, 20,  5, \
                            Ase1, Asu1, r11,  1, 20, \
                            0, Amu0
    ldr     r9, [sp, #mDo1]
    KeccakThetaRhoPiChiIota Aba0,  r8,  0,     \
                            Abe0, r10, 22, 23, \
                            Abi0,  r2, 22,  9, \
                            Abo0,  r9, 11, 13, \
                            Abu0, r12,  7, 28, \
                            24, 0, 0, Asu1, r1
    ldr.w   r2, [sp, #mDi0]
    KeccakThetaRhoPiChi     Ago1, Aga1,  r9, 14,  0, \
                            Agu1, Age1, r14, 10, 10, \
                            Aga1, Agi1,  r8,  1, 13, \
                            Age1, Ago1, r10, 22,  8, \
                            Agi1, Agu1,  r2, 30,  1, \
                            0, Aba0
    KeccakThetaRhoPiChi     Ake0, Aka0, r11,  1, 10, \
                            Aki0, Ake0,  r2,  3, 31, \
                            Ako0, Aki0,  r9, 13,  0, \
                            Aku0, Ako0, r12,  4, 18, \
                            Aka0, Aku0,  r8,  9, 20, \
                            0, Agu1
    ldr     r8, [sp, #mDa1]
    KeccakThetaRhoPiChi     Amu1, Ama1, r12, 13, 22, \
                            Ama1, Ame1,  r8, 18,  1, \
                            Ame1, Ami1, r11,  5,  4, \
                            Ami1, Amo1,  r2,  7, 28, \
                            Amo1, Amu1,  r9, 28, 31, \
                            0, Aku0
    KeccakThetaRhoPiChi     Asi0, Asa0,  r2, 31,  7, \
                            Aso0, Ase0,  r9, 28, 14, \
                            Asu0, Asi0, r14, 20,  3, \
                            Asa0, Aso0,  r8, 21,  5, \
                            Ase0, Asu0, r10,  1, 21, \
                            0, Amu1
    ldr     r9, [sp, #mDo0]
    KeccakThetaRhoPiChiIota Aba1,  r8, 0,      \
                            Abe1, r11, 22, 22, \
                            Abi1,  r2, 21,  9, \
                            Abo1,  r9, 10, 14, \
                            Abu1, r14,  7, 27, \
                            28, 1, 0, Asu0, r1
    str.w r1, [r0, #Aba1]
.endm


@----------------------------------------------------------------------------
@
@ void KeccakF1600_Initialize( void )
@
.align 8
.global   KeccakF1600_Initialize
KeccakF1600_Initialize:
	bx		lr



@----------------------------------------------------------------------------
@
@ void KeccakF1600_StateXORBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 8
.global   KeccakF1600_StateXORBytes
KeccakF1600_StateXORBytes:
	cbz		r3, KeccakF1600_StateXORBytes_Exit1
	push	{r4 - r8, lr}							@ then
	bic		r4, r2, #7								@ offset &= ~7
	adds	r0, r0, r4								@ add whole lane offset to state pointer
	ands	r2, r2, #7								@ offset &= 7 (part not lane aligned)
	beq		KeccakF1600_StateXORBytes_CheckLanes	@ .if offset != 0
	movs	r4, r3									@ then, do remaining bytes in first lane
	rsb		r5, r2, #8								@ max size in lane = 8 - offset
	cmp		r4, r5
	ble		KeccakF1600_StateXORBytes_BytesAlign
	movs	r4, r5
KeccakF1600_StateXORBytes_BytesAlign:
	sub		r8, r3, r4								@ size left
	movs	r3, r4
	bl		__KeccakF1600_StateXORBytesInLane
	mov		r3, r8
KeccakF1600_StateXORBytes_CheckLanes:
	lsrs	r2, r3, #3								@ .if length >= 8
	beq		KeccakF1600_StateXORBytes_Bytes
	mov		r8, r3
	bl		__KeccakF1600_StateXORLanes
	and		r3, r8, #7
KeccakF1600_StateXORBytes_Bytes:
	cbz		r3, KeccakF1600_StateXORBytes_Exit
	movs	r2, #0
	bl		__KeccakF1600_StateXORBytesInLane
KeccakF1600_StateXORBytes_Exit:
	pop		{r4 - r8, pc}
KeccakF1600_StateXORBytes_Exit1:
	bx		lr


@----------------------------------------------------------------------------
@
@ __KeccakF1600_StateXORLanes
@
@ Input:
@  r0 state pointer
@  r1 data pointer
@  r2 laneCount
@
@ Output:
@  r0 state pointer next lane
@  r1 data pointer next byte to input
@
@ Changed: r2-r7
@
.align 8
__KeccakF1600_StateXORLanes:
__KeccakF1600_StateXORLanes_LoopAligned:
	ldr		r4, [r1], #4
	ldr		r5, [r1], #4
	ldrd    r6, r7, [r0]
	toBitInterleaving	r4, r5, r6, r7, r3, 0
	strd	r6, r7, [r0], #8
	subs	r2, r2, #1
	bne		__KeccakF1600_StateXORLanes_LoopAligned
	bx		lr


@----------------------------------------------------------------------------
@
@ __KeccakF1600_StateXORBytesInLane
@
@ Input:
@  r0 state pointer
@  r1 data pointer
@  r2 offset in lane
@  r3 length
@
@ Output:
@  r0 state pointer next lane
@  r1 data pointer next byte to input
@
@  Changed: r2-r7
@
.align 8
__KeccakF1600_StateXORBytesInLane:
	movs	r4, #0
	movs	r5, #0
	push	{ r4 - r5 }
	add		r2, r2, sp
__KeccakF1600_StateXORBytesInLane_Loop:
	ldrb	r5, [r1], #1
	strb	r5, [r2], #1
	subs	r3, r3, #1
	bne		__KeccakF1600_StateXORBytesInLane_Loop
	pop		{ r4 - r5 }
	ldrd    r6, r7, [r0]
	toBitInterleaving	r4, r5, r6, r7, r3, 0
	strd	r6, r7, [r0], #8
	bx		lr




@----------------------------------------------------------------------------
@
@ void KeccakF1600_StateExtractBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length)
@
.align 8
.global   KeccakF1600_StateExtractBytes
KeccakF1600_StateExtractBytes:
	cbz		r3, KeccakF1600_StateExtractBytes_Exit1	@ .if length != 0
	push	{r4 - r8, lr}							@ then
	bic		r4, r2, #7								@ offset &= ~7
	adds	r0, r0, r4								@ add whole lane offset to state pointer
	ands	r2, r2, #7								@ offset &= 7 (part not lane aligned)
	beq		KeccakF1600_StateExtractBytes_CheckLanes	@ .if offset != 0
	movs	r4, r3									@ then, do remaining bytes in first lane
	rsb		r5, r2, #8								@ max size in lane = 8 - offset
	cmp		r4, r5
	ble		KeccakF1600_StateExtractBytes_BytesAlign
	movs	r4, r5
KeccakF1600_StateExtractBytes_BytesAlign:
	sub		r8, r3, r4								@ size left
	movs	r3, r4
	bl		__KeccakF1600_StateExtractBytesInLane
	mov		r3, r8
KeccakF1600_StateExtractBytes_CheckLanes:
	lsrs	r2, r3, #3								@ .if length >= 8
	beq		KeccakF1600_StateExtractBytes_Bytes
	mov		r8, r3
	bl		__KeccakF1600_StateExtractLanes
	and		r3, r8, #7
KeccakF1600_StateExtractBytes_Bytes:
	cbz		r3, KeccakF1600_StateExtractBytes_Exit
	movs	r2, #0
	bl		__KeccakF1600_StateExtractBytesInLane
KeccakF1600_StateExtractBytes_Exit:
	pop		{r4 - r8, pc}
KeccakF1600_StateExtractBytes_Exit1:
	bx		lr


@----------------------------------------------------------------------------
@
@ __KeccakF1600_StateExtractLanes
@
@ Input:
@  r0 state pointer
@  r1 data pointer
@  r2 laneCount
@
@ Output:
@  r0 state pointer next lane
@  r1 data pointer next byte to input
@
@ Changed: r2-r5
@
.align 8
__KeccakF1600_StateExtractLanes:
__KeccakF1600_StateExtractLanes_LoopAligned:
	ldrd	r4, r5, [r0], #8
	fromBitInterleaving	r4, r5, r3
	str		r4, [r1], #4
	subs	r2, r2, #1
	str		r5, [r1], #4
	bne		__KeccakF1600_StateExtractLanes_LoopAligned
	bx		lr


@----------------------------------------------------------------------------
@
@ __KeccakF1600_StateExtractBytesInLane
@
@ Input:
@  r0 state pointer
@  r1 data pointer
@  r2 offset in lane
@  r3 length
@
@ Output:
@  r0 state pointer next lane
@  r1 data pointer next byte to input
@
@  Changed: r2-r6
@
.align 8
__KeccakF1600_StateExtractBytesInLane:
	ldrd	r4, r5, [r0], #8
	fromBitInterleaving	r4, r5, r6
	push	{r4, r5}
	add		r2, sp, r2
__KeccakF1600_StateExtractBytesInLane_Loop:
	ldrb	r4, [r2], #1
	subs	r3, r3, #1
	strb	r4, [r1], #1
	bne		__KeccakF1600_StateExtractBytesInLane_Loop
	add		sp, #8
	bx		lr



.align 8
KeccakF1600_StatePermute_RoundConstantsWithTerminator:
	@		0			1
		.long 		0x00000001,	0x00000000
		.long 		0x00000000,	0x00000089
		.long 		0x00000000,	0x8000008b
		.long 		0x00000000,	0x80008080

		.long 		0x00000001,	0x0000008b
		.long 		0x00000001,	0x00008000
		.long 		0x00000001,	0x80008088
		.long 		0x00000001,	0x80000082

		.long 		0x00000000,	0x0000000b
		.long 		0x00000000,	0x0000000a
		.long 		0x00000001,	0x00008082
		.long 		0x00000000,	0x00008003

		.long 		0x00000001,	0x0000808b
		.long 		0x00000001,	0x8000000b
		.long 		0x00000001,	0x8000008a
		.long 		0x00000001,	0x80000081

		.long 		0x00000000,	0x80000081
		.long 		0x00000000,	0x80000008
		.long 		0x00000000,	0x00000083
		.long 		0x00000000,	0x80008003

		.long 		0x00000001,	0x80008088
		.long 		0x00000000,	0x80000088
		.long 		0x00000001,	0x00008000
		.long 		0x00000000,	0x80008082

		.long 		0x000000FF	@terminator

@----------------------------------------------------------------------------
@
@ void KeccakF1600_StatePermute( void *state )
@
.align 8
.global   KeccakF1600_StatePermute
KeccakF1600_StatePermute:
	adr		r1, KeccakF1600_StatePermute_RoundConstantsWithTerminator
	push	{ r4 - r12, lr }
	sub		sp, #mSize
	str		r1, [sp, #mRC]
KeccakF1600_StatePermute_RoundLoop:
	KeccakRound0
	KeccakRound1
	KeccakRound2
	KeccakRound3
	bne		KeccakF1600_StatePermute_RoundLoop
	add		sp, #mSize
	pop		{ r4 - r12, pc }

back to top