Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

Revision 605bdf68df0f239ac2c74b752cc65b3e3bbe4275 authored by dependabot[bot] on 01 November 2024, 13:22:26 UTC, committed by GitHub on 01 November 2024, 13:22:26 UTC
Bump libopencm3 from `1f3abd4` to `201f5bc`
Bumps [libopencm3](https://github.com/libopencm3/libopencm3) from `1f3abd4` to `201f5bc`.
- [Commits](https://github.com/libopencm3/libopencm3/compare/1f3abd43763fa39d23e737602b6d0011a45c70b2...201f5bcfb3fa70ee34818152463e7139f24db377)

---
updated-dependencies:
- dependency-name: libopencm3
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
1 parent 3200c9e
  • Files
  • Changes
  • 91eae56
  • /
  • common
  • /
  • aes-encrypt.S
Raw File Download
Permalinks

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • revision
  • directory
  • content
revision badge
swh:1:rev:605bdf68df0f239ac2c74b752cc65b3e3bbe4275
directory badge Iframe embedding
swh:1:dir:d376b3f4e65914f88ce37b5814b27eec41c0664d
content badge Iframe embedding
swh:1:cnt:2f19ff7b37a28a77fb148244fd43ab353c81ff50
Citations

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • revision
  • directory
  • content
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
aes-encrypt.S
/******************************************************************************
* Assembly fixsliced implementation of AES-128 and AES-256 (encryption only).
*
* Fully-fixsliced implementation runs faster than the semi-fixsliced variant
* at the cost of a larger code size.
*
* See the paper at https://eprint.iacr.org/2020/1123.pdf for more details.
*
* @author   Alexandre Adomnicai, Nanyang Technological University, Singapore
*           alexandre.adomnicai@ntu.edu.sg
*
* @date     October 2020
******************************************************************************/

.syntax unified
.thumb

/******************************************************************************
* Macro to compute the SWAPMOVE technique: swap the bits in 'in1' masked by 'm'
* by the bits in 'in0' masked by 'm << n' and put the results in 'out0', 'out1'
******************************************************************************/
.macro swpmv out0, out1, in0, in1, m, n, tmp
    eor     \tmp, \in1, \in0, lsr \n
    and     \tmp, \m
    eor     \out1, \in1, \tmp
    eor     \out0, \in0, \tmp, lsl \n
.endm

/******************************************************************************
* Rotate all bytes in 'in' by 'n0' bits to the rights and put the results in
* 'out'. 'm' refers to the appropriate bitmask and 'n1' = 8-'n0'.
******************************************************************************/
.macro byteror  out, in, m, n0, n1, tmp
    and     \out, \m, \in, lsr \n0
    bic     \tmp, \in, \m, ror \n1
    orr     \out, \out, \tmp, lsl \n1
.endm

/******************************************************************************
* Compute the MixColumns for rounds i st i%4 == 0 or 2.
* Between the two versions, only the masks and the shifts for the 'byteror' are
* differing.
******************************************************************************/
.macro mc_0_2   m, n0, n1, n2, n3
    byteror r14, r1, \m, \n0, \n1, r9   // r14 <- BYTE_ROR_n0(S0)
    eor     r4, r1, r14, ror #8         // r4 <- S0 ^ (BYTE_ROR_6(S0) >>> 8)
    movw    r1, #0x0f0f
    movt    r1, #0x0f0f                 // r1 <- 0x0f0f0f0f (for BYTE_ROR)
    byteror r5, r11, \m, \n0, \n1, r9   // r5 <- BYTE_ROR_n0(S7)
    eor     r10, r11, r5, ror #8        // r10<- S7 ^ BYTE_ROR_n0(S7 >>> 8)
    byteror r11, r10, r1, 4, 4, r9      // r11<- BYTE_ROR_4(r10)
    eor     r11, r4, r11, ror #16       // r11<- BYTE_ROR_4(r10) ^ (r10 >>> 16)
    eor     r11, r11, r5, ror #8        // r11<- S'7
    byteror r5, r2, \m, \n0, \n1, r9    // r5 <- BYTE_ROR_n0(S6)
    eor     r2, r2, r5, ror #8          // r2 <- S6 ^ BYTE_ROR_n0(S6 >>> 8)
    eor     r10, r10, r5, ror #8        // r10<- r10 ^ (BYTE_ROR_n0(S6) >>> 8)
    byteror r5, r2, r1, 4, 4, r9        // r5 <- BYTE_ROR_4(r2)
    eor     r10, r10, r5, ror #16       // r10<- r10 ^ (r5 >>> 16)
    eor     r10, r10, r4                // r10<- S'6
    byteror r5, r0, \m, \n0, \n1, r9    // r5 <- BYTE_ROR_n0(S5)
    eor     r0, r0, r5, ror #8          // r0 <- S5 ^ BYTE_ROR_6(S5 >>> 8)
    eor     r9, r2, r5, ror #8          // r9 <- r2 ^ (BYTE_ROR_n0(S5) >>> 8)
    byteror r5, r0, r1, 4, 4, r2        // r5 <- BYTE_ROR_4(r0)
    eor     r9, r9, r5, ror #16         // r9 <- S'5
    byteror r5, r8, \m, \n0, \n1, r2    // r5 <- BYTE_ROR_n0(S4)
    eor     r2, r8, r5, ror #8          // r2 <- S4 ^ BYTE_ROR_6(S4 >>> 8)
    eor     r8, r0, r5, ror #8          // r8 <- r0 ^ (BYTE_ROR_n0(S4) >>> 8)
    byteror r5, r2, r1, 4, 4, r0        // r5 <- BYTE_ROR_4(r2)
    eor     r8, r8, r5, ror #16         // r8 <- r8 ^ (r5 >>> 16)
    eor     r8, r8, r4                  // r8 <- S'4
    byteror r5, r7, \m, \n0, \n1, r0    // r5 <- BYTE_ROR_n0(S3)
    eor     r0, r7, r5, ror #8          // r0 <- S3 ^ BYTE_ROR_6(S3 >>> 8)
    eor     r7, r2, r5, ror #8          // r2 ^ (BYTE_ROR_n0(S3) >>> 8)
    byteror r5, r0, r1, 4, 4, r2        // r5 <- BYTE_ROR_4(r0)
    eor     r7, r7, r5, ror #16         // r7 <- r7 ^ (r5 >>> 16)
    eor     r7, r7, r4                  // r7 <- S'3
    byteror r5, r6, \m, \n0, \n1, r2    // r5 <- BYTE_ROR_n0(S2)
    eor     r2, r6, r5, ror #8          // r2 <- S2 ^ BYTE_ROR_6(S2 >>> 8)
    eor     r6, r0, r5, ror #8          // r6 <- r0 ^ (BYTE_ROR_n0(S2) >>> 8)
    byteror r5, r2, r1, 4, 4, r0        // r5 <- BYTE_ROR_4(r2)
    eor     r6, r6, r5, ror #16         // r6 <- S'2
    byteror r5, r3, \m, \n0, \n1, r0    // r5 <- BYTE_ROR_n0(S1)
    eor     r0, r3, r5, ror #8          // r0 <- S1 ^ BYTE_ROR_6(S1 >>> 8)
    eor     r3, r2, r5, ror #8          // r3 <- r0 ^ (BYTE_ROR_n0(S1) >>> 8)
    byteror r5, r0, r1, 4, 4, r2        // r5 <- BYTE_ROR_4(r0)
    eor     r5, r3, r5, ror #16         // r5 <- S'1
    eor     r14, r0, r14, ror #8        // r14<- r0 ^ (BYTE_ROR_n0(S0) >>> 8)
    byteror r0, r4, r1, 4, 4, r2        // r0 <- BYTE_ROR_4(r4)
    eor     r4, r14, r0, ror #16        // r4 <- S'0
.endm

/******************************************************************************
* Packs two 128-bit input blocs stored in r4-r7 and r8-r11, respectively, into
* the 256-bit internal state where the bits are packed as follows:
* r4 = b_24 b_56 b_88 b_120 || ... || b_0 b_32 b_64 b_96
* r5 = b_25 b_57 b_89 b_121 || ... || b_1 b_33 b_65 b_97
* r6 = b_26 b_58 b_90 b_122 || ... || b_2 b_34 b_66 b_98
* r7 = b_27 b_59 b_91 b_123 || ... || b_3 b_35 b_67 b_99
* r8 = b_28 b_60 b_92 b_124 || ... || b_4 b_36 b_68 b_100
* r9 = b_29 b_61 b_93 b_125 || ... || b_5 b_37 b_69 b_101
* r10 = b_30 b_62 b_94 b_126 || ... || b_6 b_38 b_70 b_102
* r11 = b_31 b_63 b_95 b_127 || ... || b_7 b_39 b_71 b_103
******************************************************************************/
.align 2
packing:
    movw    r3, #0x0f0f
    movt    r3, #0x0f0f             // r3 <- 0x0f0f0f0f (mask for SWAPMOVE)
    eor     r2, r3, r3, lsl #2      // r2 <- 0x33333333 (mask for SWAPMOVE)
    eor     r1, r2, r2, lsl #1      // r1 <- 0x55555555 (mask for SWAPMOVE)
    swpmv   r8, r4, r8, r4, r1, #1, r12
    swpmv   r9, r5, r9, r5, r1, #1, r12
    swpmv   r10, r6, r10, r6, r1, #1, r12
    swpmv   r11, r7, r11, r7, r1, #1, r12
    swpmv   r0, r4, r5, r4, r2, #2, r12
    swpmv   r9, r5, r9, r8, r2, #2, r12
    swpmv   r7, r8, r7, r6, r2, #2, r12
    swpmv   r11, r2, r11, r10, r2, #2, r12
    swpmv   r8, r4, r8, r4, r3, #4, r12
    swpmv   r10, r6, r7, r0, r3, #4, r12
    swpmv   r11, r7, r11, r9, r3, #4, r12
    swpmv   r9, r5, r2, r5, r3, #4, r12
    bx      lr

/******************************************************************************
* Unpacks the 256-bit internal state in two 128-bit blocs.
******************************************************************************/
.align 2
unpacking:
    movw    r3, #0x0f0f
    movt    r3, #0x0f0f                 // r3 <- 0x0f0f0f0f (mask for SWAPMOVE)
    swpmv   r2, r5, r9, r5, r3, #4, r12
    swpmv   r11, r9, r11, r7, r3, #4, r12
    swpmv   r7, r1, r10, r6, r3, #4, r12
    swpmv   r8, r4, r8, r4, r3, #4, r12
    eor     r3, r3, r3, lsl #2          // r3 <- 0x33333333 (mask for SWAPMOVE)
    swpmv   r11, r10,r11, r2, r3, #2, r12
    swpmv   r7, r6, r7, r8, r3, #2, r12
    swpmv   r9, r8, r9, r5, r3, #2, r12
    swpmv   r5, r4, r1, r4, r3, #2, r12
    eor     r1, r3, r3, lsl #1          // r1 <- 0x55555555 (mask for SWAPMOVE)
    swpmv   r8, r4, r8, r4, r1, #1, r12
    swpmv   r9, r5,r9, r5, r1, #1, r12
    swpmv   r10, r6, r10, r6, r1, #1, r12
    swpmv   r11, r7, r11, r7, r1, #1, r12
    bx      lr

/******************************************************************************
* Subroutine that computes the AddRoundKey and the S-box.
* Credits to https://github.com/Ko-/aes-armcortexm for the S-box implementation
******************************************************************************/
.align 2
ark_sbox:
    // add round key
    ldr.w   r1, [sp, #48]
    ldmia   r1!, {r0,r2,r3,r12}
    eor     r4, r0
    eor     r5, r2
    eor     r6, r3
    eor     r7, r12
    ldmia   r1!, {r0,r2,r3,r12}
    eor     r8, r0
    eor     r9, r2
    eor     r10, r3
    eor     r11, r12
    str.w   r1, [sp, #48]
    str     r14, [sp, #52]
    // sbox: credits to https://github.com/Ko-/aes-armcortexm
    eor     r1, r7, r9              //Exec y14 = U3 ^ U5; into r1
    eor     r3, r4, r10             //Exec y13 = U0 ^ U6; into r3
    eor     r2, r3, r1              //Exec y12 = y13 ^ y14; into r2
    eor     r0, r8, r2              //Exec t1 = U4 ^ y12; into r0
    eor     r14, r0, r9             //Exec y15 = t1 ^ U5; into r14
    and     r12, r2, r14            //Exec t2 = y12 & y15; into r12
    eor     r8, r14, r11            //Exec y6 = y15 ^ U7; into r8
    eor     r0, r0, r5              //Exec y20 = t1 ^ U1; into r0
    str.w   r2, [sp, #44]           //Store r2/y12 on stack
    eor     r2, r4, r7              //Exec y9 = U0 ^ U3; into r2
    str     r0, [sp, #40]           //Store r0/y20 on stack
    eor     r0, r0, r2              //Exec y11 = y20 ^ y9; into r0
    str     r2, [sp, #36]           //Store r2/y9 on stack
    and     r2, r2, r0              //Exec t12 = y9 & y11; into r2
    str     r8, [sp, #32]           //Store r8/y6 on stack
    eor     r8, r11, r0             //Exec y7 = U7 ^ y11; into r8
    eor     r9, r4, r9              //Exec y8 = U0 ^ U5; into r9
    eor     r6, r5, r6              //Exec t0 = U1 ^ U2; into r6
    eor     r5, r14, r6             //Exec y10 = y15 ^ t0; into r5
    str     r14, [sp, #28]          //Store r14/y15 on stack
    eor     r14, r5, r0             //Exec y17 = y10 ^ y11; into r14
    str.w   r1, [sp, #24]           //Store r1/y14 on stack
    and     r1, r1, r14             //Exec t13 = y14 & y17; into r1
    eor     r1, r1, r2              //Exec t14 = t13 ^ t12; into r1
    str     r14, [sp, #20]          //Store r14/y17 on stack
    eor     r14, r5, r9             //Exec y19 = y10 ^ y8; into r14
    str.w   r5, [sp, #16]           //Store r5/y10 on stack
    and     r5, r9, r5              //Exec t15 = y8 & y10; into r5
    eor     r2, r5, r2              //Exec t16 = t15 ^ t12; into r2
    eor     r5, r6, r0              //Exec y16 = t0 ^ y11; into r5
    str.w   r0, [sp, #12]           //Store r0/y11 on stack
    eor     r0, r3, r5              //Exec y21 = y13 ^ y16; into r0
    str     r3, [sp, #8]            //Store r3/y13 on stack
    and     r3, r3, r5              //Exec t7 = y13 & y16; into r3
    str     r5, [sp, #4]            //Store r5/y16 on stack
    str     r11, [sp, #0]           //Store r11/U7 on stack
    eor     r5, r4, r5              //Exec y18 = U0 ^ y16; into r5
    eor     r6, r6, r11             //Exec y1 = t0 ^ U7; into r6
    eor     r7, r6, r7              //Exec y4 = y1 ^ U3; into r7
    and     r11, r7, r11            //Exec t5 = y4 & U7; into r11
    eor     r11, r11, r12           //Exec t6 = t5 ^ t2; into r11
    eor     r11, r11, r2            //Exec t18 = t6 ^ t16; into r11
    eor     r14, r11, r14           //Exec t22 = t18 ^ y19; into r14
    eor     r4, r6, r4              //Exec y2 = y1 ^ U0; into r4
    and     r11, r4, r8             //Exec t10 = y2 & y7; into r11
    eor     r11, r11, r3            //Exec t11 = t10 ^ t7; into r11
    eor     r2, r11, r2             //Exec t20 = t11 ^ t16; into r2
    eor     r2, r2, r5              //Exec t24 = t20 ^ y18; into r2
    eor     r10, r6, r10            //Exec y5 = y1 ^ U6; into r10
    and     r11, r10, r6            //Exec t8 = y5 & y1; into r11
    eor     r3, r11, r3             //Exec t9 = t8 ^ t7; into r3
    eor     r3, r3, r1              //Exec t19 = t9 ^ t14; into r3
    eor     r3, r3, r0              //Exec t23 = t19 ^ y21; into r3
    eor     r0, r10, r9             //Exec y3 = y5 ^ y8; into r0
    ldr     r11, [sp, #32]          //Load y6 into r11
    and     r5, r0, r11             //Exec t3 = y3 & y6; into r5
    eor     r12, r5, r12            //Exec t4 = t3 ^ t2; into r12
    ldr     r5, [sp, #40]           //Load y20 into r5
    str     r7, [sp, #32]           //Store r7/y4 on stack
    eor     r12, r12, r5            //Exec t17 = t4 ^ y20; into r12
    eor     r1, r12, r1             //Exec t21 = t17 ^ t14; into r1
    and     r12, r1, r3             //Exec t26 = t21 & t23; into r12
    eor     r5, r2, r12             //Exec t27 = t24 ^ t26; into r5
    eor     r12, r14, r12           //Exec t31 = t22 ^ t26; into r12
    eor     r1, r1, r14             //Exec t25 = t21 ^ t22; into r1
    and     r7, r1, r5              //Exec t28 = t25 & t27; into r7
    eor     r14, r7, r14            //Exec t29 = t28 ^ t22; into r14
    and     r4, r14, r4             //Exec z14 = t29 & y2; into r4
    and     r8, r14, r8             //Exec z5 = t29 & y7; into r8
    eor     r7, r3, r2              //Exec t30 = t23 ^ t24; into r7
    and     r12, r12, r7            //Exec t32 = t31 & t30; into r12
    eor     r12, r12, r2            //Exec t33 = t32 ^ t24; into r12
    eor     r7, r5, r12             //Exec t35 = t27 ^ t33; into r7
    and     r2, r2, r7              //Exec t36 = t24 & t35; into r2
    eor     r5, r5, r2              //Exec t38 = t27 ^ t36; into r5
    and     r5, r14, r5             //Exec t39 = t29 & t38; into r5
    eor     r1, r1, r5              //Exec t40 = t25 ^ t39; into r1
    eor     r5, r14, r1             //Exec t43 = t29 ^ t40; into r5
    ldr.w   r7, [sp, #4]            //Load y16 into r7
    and     r7, r5, r7              //Exec z3 = t43 & y16; into r7
    eor     r8, r7, r8              //Exec tc12 = z3 ^ z5; into r8
    str     r8, [sp, #40]           //Store r8/tc12 on stack
    ldr     r8, [sp, #8]            //Load y13 into r8
    and     r8, r5, r8              //Exec z12 = t43 & y13; into r8
    and     r10, r1, r10            //Exec z13 = t40 & y5; into r10
    and     r6, r1, r6              //Exec z4 = t40 & y1; into r6
    eor     r6, r7, r6              //Exec tc6 = z3 ^ z4; into r6
    eor     r3, r3, r12             //Exec t34 = t23 ^ t33; into r3
    eor     r3, r2, r3              //Exec t37 = t36 ^ t34; into r3
    eor     r1, r1, r3              //Exec t41 = t40 ^ t37; into r1
    ldr.w   r5, [sp, #16]           //Load y10 into r5
    and     r2, r1, r5              //Exec z8 = t41 & y10; into r2
    and     r9, r1, r9              //Exec z17 = t41 & y8; into r9
    str     r9, [sp, #16]           //Store r9/z17 on stack
    eor     r5, r12, r3             //Exec t44 = t33 ^ t37; into r5
    ldr     r9, [sp, #28]           //Load y15 into r9
    ldr.w   r7, [sp, #44]           //Load y12 into r7
    and     r9, r5, r9              //Exec z0 = t44 & y15; into r9
    and     r7, r5, r7              //Exec z9 = t44 & y12; into r7
    and     r0, r3, r0              //Exec z10 = t37 & y3; into r0
    and     r3, r3, r11             //Exec z1 = t37 & y6; into r3
    eor     r3, r3, r9              //Exec tc5 = z1 ^ z0; into r3
    eor     r3, r6, r3              //Exec tc11 = tc6 ^ tc5; into r3
    ldr     r11, [sp, #32]          //Load y4 into r11
    ldr.w   r5, [sp, #20]           //Load y17 into r5
    and     r11, r12, r11           //Exec z11 = t33 & y4; into r11
    eor     r14, r14, r12           //Exec t42 = t29 ^ t33; into r14
    eor     r1, r14, r1             //Exec t45 = t42 ^ t41; into r1
    and     r5, r1, r5              //Exec z7 = t45 & y17; into r5
    eor     r6, r5, r6              //Exec tc8 = z7 ^ tc6; into r6
    ldr     r5, [sp, #24]           //Load y14 into r5
    str     r4, [sp, #32]           //Store r4/z14 on stack
    and     r1, r1, r5              //Exec z16 = t45 & y14; into r1
    ldr     r5, [sp, #12]           //Load y11 into r5
    ldr     r4, [sp, #36]           //Load y9 into r4
    and     r5, r14, r5             //Exec z6 = t42 & y11; into r5
    eor     r5, r5, r6              //Exec tc16 = z6 ^ tc8; into r5
    and     r4, r14, r4             //Exec z15 = t42 & y9; into r4
    eor     r14, r4, r5             //Exec tc20 = z15 ^ tc16; into r14
    eor     r4, r4, r1              //Exec tc1 = z15 ^ z16; into r4
    eor     r1, r0, r4              //Exec tc2 = z10 ^ tc1; into r1
    eor     r0, r1, r11             //Exec tc21 = tc2 ^ z11; into r0
    eor     r7, r7, r1              //Exec tc3 = z9 ^ tc2; into r7
    eor     r1, r7, r5              //Exec S0 = tc3 ^ tc16; into r1
    eor     r7, r7, r3              //Exec S3 = tc3 ^ tc11; into r7
    eor     r3, r7, r5              //Exec S1 = S3 ^ tc16 ^ 1; into r3
    eor     r11, r10, r4            //Exec tc13 = z13 ^ tc1; into r11
    ldr.w   r4, [sp, #0]            //Load U7 into r4
    and     r12, r12, r4            //Exec z2 = t33 & U7; into r12
    eor     r9, r9, r12             //Exec tc4 = z0 ^ z2; into r9
    eor     r12, r8, r9             //Exec tc7 = z12 ^ tc4; into r12
    eor     r2, r2, r12             //Exec tc9 = z8 ^ tc7; into r2
    eor     r2, r6, r2              //Exec tc10 = tc8 ^ tc9; into r2
    ldr.w   r4, [sp, #32]           //Load z14 into r4
    eor     r12, r4, r2             //Exec tc17 = z14 ^ tc10; into r12
    eor     r0, r0, r12             //Exec S5 = tc21 ^ tc17; into r0
    eor     r6, r12, r14            //Exec tc26 = tc17 ^ tc20; into r6
    ldr.w   r4, [sp, #16]           //Load z17 into r4
    ldr     r12, [sp, #40]          //Load tc12 into r12
    eor     r6, r6, r4              //Exec S2 = tc26 ^ z17 ^ 1; into r6
    eor     r12, r9, r12            //Exec tc14 = tc4 ^ tc12; into r12
    eor     r14, r11, r12           //Exec tc18 = tc13 ^ tc14; into r14
    eor     r2, r2, r14             //Exec S6 = tc10 ^ tc18 ^ 1; into r2
    eor     r11, r8, r14            //Exec S7 = z12 ^ tc18 ^ 1; into r11
    ldr     r14, [sp, #52]          // restore link register
    eor     r8, r12, r7             //Exec S4 = tc14 ^ S3; into r8
    bx      lr
    // [('r0', 'S5'), ('r1', 'S0'), ('r2', 'S6'), ('r3', 'S1'),
    // ('r6', 'S2'),('r7', 'S3'), ('r8', 'S4'), ('r11', 'S7')]

/******************************************************************************
* Computation of the MixColumns transformation in the fixsliced representation.
* For fully-fixsliced implementations, it is used for rounds i s.t. (i%4) == 0.
* For semi-fixsliced implementations, it is used for rounds i s.t. (i%2) == 0.
******************************************************************************/
.align 2
mixcolumns_0:
    str     r14, [sp, #52]          // store link register
    movw    r12, #0x0303
    movt    r12, #0x0303
    mc_0_2  r12, 6, 2, 26, 18
    ldr     r14, [sp, #52]          // restore link register
    bx      lr

/******************************************************************************
* Computation of the MixColumns transformation in the fixsliced representation.
* For fully-fixsliced implementations only, for round i s.t. (i%4) == 1.
******************************************************************************/
.align 2
mixcolumns_1:
    str     r14, [sp, #52]          // store link register
    movw    r14, #0x0f0f
    movt    r14, #0x0f0f            // r14<- 0x0f0f0f0f (mask for BYTE_ROR_4)
    and     r5, r14, r1, lsr #4     // r5 <- (S0 >> 4) & 0x0f0f0f0f
    and     r9, r14, r1             // r9 <- S0 & 0x0f0f0f0f
    orr     r5, r5, r9, lsl #4      // r5 <- BYTE_ROR_4(S0)
    eor     r4, r1, r5, ror #8      // r4 <- S0 ^ (BYTE_ROR_4(S0) >>> 8)
    mov.w   r1, r5, ror #8          // r1 <- (BYTE_ROR_4(S0) >>> 8)
    and     r5, r14, r11, lsr #4    // r5 <- (S7 >> 4) & 0x0f0f0f0f
    and     r9, r14, r11            // r9 <- S7 & 0x0f0f0f0f
    orr     r5, r5, r9, lsl #4      // r5 <- BYTE_ROR_4(S7)
    eor     r12, r11, r5, ror #8    // r12<- S7 ^ (BYTE_ROR_4(S7) >>> 8)
    eor     r10, r4, r12            // r10<- r4 ^ r12
    eor     r11, r10                // r11<- S7 ^ r4 ^ r12
    eor     r11, r11, r12, ror #16  // r11<- r11 ^ (r12 >>> 16)
    and     r5, r14, r2, lsr #4     // r5 <- (S6 >> 4) & 0x0f0f0f0f
    and     r9, r14, r2             // r9 <- S6 & 0x0f0f0f0f
    orr     r5, r5, r9, lsl #4      // r5 <- BYTE_ROR_4(S6)
    eor     r10, r10, r5, ror #8    // r10<- r10 ^ (BYTE_ROR_4(S6) >>> 8)
    eor     r12, r2, r5, ror #8     // r12<- S6 ^ (BYTE_ROR_4(S6) >>> 8)
    eor     r10, r10, r12, ror #16  // r10<- r10 ^ (r12 >>> 16)
    and     r5, r14, r0, lsr #4     // r5 <- (S5 >> 4) & 0x0f0f0f0f
    and     r9, r14, r0             // r9 <- S5 & 0x0f0f0f0f
    orr     r5, r5, r9, lsl #4      // r5 <- BYTE_ROR_4(S5)
    eor     r9, r12, r5, ror #8     // r9 <- r12 ^ (BYTE_ROR_4(S5) >>> 8)
    eor     r12, r0, r5, ror #8     // r12<- S5 ^ (BYTE_ROR_4(S5) >>> 8)
    eor     r9, r9, r12, ror #16    // r9 <- (r9 ^ r12 >>> 16)
    eor     r0, r4, r12             // r0 <- r12 ^ S0 ^ (BYTE_ROR_4(S0) >>> 8)
    and     r5, r14, r8, lsr #4     // r5 <- (S4 >> 4) & 0x0f0f0f0f
    and     r2, r14, r8             // r2 <- S4 & 0x0f0f0f0f
    orr     r2, r5, r2, lsl #4      // r2 <- BYTE_ROR_4(S4)
    eor     r0, r0, r2, ror #8      // r0 <- r0 ^ (BYTE_ROR_4(S4) >>> 8)
    eor     r2, r8, r2, ror #8      // r2 <- S4 ^ (BYTE_ROR_4(S4) >>> 8)
    eor     r8, r0, r2, ror #16     // r8 <- r0 ^ (r2 >>> 16)
    eor     r2, r4                  // r2 <- r2 ^ S0 ^ (BYTE_ROR_4(S0) >>> 8)
    and     r5, r14, r7, lsr #4     // r5 <- (S3 >> 4) & 0x0f0f0f0f
    and     r0, r14, r7             // r0 <- S3 & 0x0f0f0f0f
    orr     r0, r5, r0, lsl #4      // r0 <- BYTE_ROR_4(S3)
    eor     r2, r2, r0, ror #8      // r2 <- r2 ^ (BYTE_ROR_4(S3) >>> 8)
    eor     r0, r7, r0, ror #8      // r0 <- S3 ^ (BYTE_ROR_4(S3) >>> 8)
    eor     r7, r2, r0, ror #16     // r7 <- r2 ^ (r0 >>> 16)
    and     r5, r14, r6, lsr #4     // r5 <- (S2 >> 4) & 0x0f0f0f0f
    and     r2, r14, r6             // r2 <- S2 & 0x0f0f0f0f
    orr     r2, r5, r2, lsl #4      // r2 <- BYTE_ROR_4(S2)
    eor     r0, r0, r2, ror #8      // r0 <- r0 ^ (BYTE_ROR_4(S2) >>> 8)
    eor     r2, r6, r2, ror #8      // r2 <- S2 ^ (BYTE_ROR_4(S2) >>> 8)
    eor     r6, r0, r2, ror #16     // r6 <- r0 ^ (r2 >>> 16)
    and     r5, r14, r3, lsr #4     // r5 <- (S1 >> 4) & 0x0f0f0f0f
    and     r0, r14, r3             // r0 <- S1 & 0x0f0f0f0f
    orr     r0, r5, r0, lsl #4      // r0 <- BYTE_ROR_4(S1)
    ldr     r14, [sp, #52]          // restore link register
    eor     r2, r2, r0, ror #8      // r2 <- r2 ^ (BYTE_ROR_4(S1) >>> 8)
    eor     r0, r3, r0, ror #8      // r0 <- S1 ^ (BYTE_ROR_4(S1) >>> 8)
    eor     r5, r2, r0, ror #16     // r5 <- r2 <- (r0 >>> 16)
    eor     r1, r0, r1              // r1 <- r0 ^ BYTE_ROR_4(S0) >>> 8
    eor     r4, r1, r4, ror #16     // r4 <- r4 ^ (r0 >>> 16)
    bx      lr

/******************************************************************************
* Computation of the MixColumns transformation in the fixsliced representation.
* For fully-fixsliced implementations only, for rounds i s.t. (i%4) == 2.
******************************************************************************/
.align 2
mixcolumns_2:
    str     r14, [sp, #52]          // store link register
    movw    r12, #0x3f3f
    movt    r12, #0x3f3f
    mc_0_2  r12, 2, 6, 30, 22
    ldr     r14, [sp, #52]          // restore link register
    bx      lr

/******************************************************************************
* Computation of the MixColumns transformation in the fixsliced representation.
* For fully-fixsliced implementations, it is used for rounds i s.t. (i%4) == 3.
* For semi-fixsliced implementations, it is used for rounds i s.t. (i%2) == 1.
* Based on Käsper-Schwabe, similar to https://github.com/Ko-/aes-armcortexm.
******************************************************************************/
.align 2
mixcolumns_3:
    eor     r12, r11, r11, ror #8   // r12<- S7 ^ (S7 >>> 8)
    eor     r4, r1, r1, ror #8      // r4 <- S0 ^ (S0 >>> 8)
    eor     r11, r4, r11, ror #8    // r11<- S0 ^ (S0 >>> 8) ^ (S7 >>> 8)
    eor     r11, r11, r12, ror #16  // r11<- r11 ^ (S7 >>> 16) ^ (S7 >>> 24)
    eor     r10, r12, r2, ror #8    // r10<- S7 ^ (S7 >>> 8) ^ (S6 >>> 8)
    eor     r12, r2, r2, ror #8     // r12<- S6 ^ (S6 >>> 8)
    eor     r10, r10, r12, ror #16  // r10<- r10 ^ (S6 >>> 16) ^ (S6 >>> 24)
    eor     r10, r4                 // r10<- r10 ^ S0 ^ (S0 >>> 8)
    eor     r9, r12, r0, ror #8     // r9 <- S6 ^ (S6 >>> 8) ^ (S5 >>> 8)
    eor     r12, r0, r0, ror #8     // r12<- S5 ^ (S5 >>> 8)
    eor     r9, r9, r12, ror #16    // r9 <- r9 ^ (S5 >>> 16) ^ (S5 >>> 24)
    eor     r2, r8, r8, ror #8      // r2 <- S4 ^ (S4 >>> 8)
    eor     r8, r12, r8, ror #8     // r8 <- S5 ^ (S5 >>> 8) ^ (S4 >>> 8)
    eor     r8, r4                  // r8 <- r8 ^ S0 ^ (S0 >>> 8)
    eor     r8, r8, r2, ror #16     // r8 <- r8 ^ (S4 >>> 16) ^ (S4 >>> 24)
    eor     r12, r7, r7, ror #8     // r12<- S3 ^ (S3 >>> 8)
    eor     r7, r2, r7, ror #8      // r7 <- S4 ^ (S4 >>> 8) ^ (S3 >>> 8)
    eor     r7, r4                  // r7 <- r7 ^ S0 ^ (S0 >>> 8)
    eor     r7, r7, r12, ror #16    // r7 <- r7 ^ (S3 >>> 16) ^ (S3 >>> 24)
    eor     r2, r6, r6, ror #8      // r2 <- S2 ^ (S2 >>> 8)
    eor     r6, r12, r6, ror #8     // r6 <- S3 ^ (S3 >>> 8) ^ (S2 >>> 8)
    eor     r6, r6, r2, ror #16     // r6 <- r6 ^ (S2 >>> 16) ^ (S2 >>> 24)
    eor     r12, r3, r3, ror #8     // r12<- S1 ^ (S1 >>> 8)
    eor     r5, r2, r3, ror #8      // r5 <- S2 ^ (S2 >>> 8) ^ (S1 >>> 8)
    eor     r5, r5, r12, ror #16    // r5 <- r5 ^ (S1 >>> 16) ^ (S1 >>> 24)
    eor     r4, r12, r4, ror #16    // r4 <- S1 ^ (S1 >>> 8) ^ (r4 >>> 16)
    eor     r4, r4, r1, ror #8      // r4 <- r4 ^ (S0 >>> 8)
    bx      lr

/******************************************************************************
* Applies the ShiftRows transformation twice (i.e. SR^2) on the internal state.
******************************************************************************/
.align 2
double_shiftrows:
    movw    r10, #0x0f00
    movt    r10, #0x0f00            // r10<- 0x0f000f00 (mask)
    swpmv   r0, r0, r0, r0, r10, #4, r12
    swpmv   r1, r1, r1, r1, r10, #4, r12
    swpmv   r2, r2, r2, r2, r10, #4, r12
    swpmv   r3, r3, r3, r3, r10, #4, r12
    swpmv   r6, r6, r6, r6, r10, #4, r12
    swpmv   r7, r7, r7, r7, r10, #4, r12
    swpmv   r8, r8, r8, r8, r10, #4, r12
    swpmv   r11, r11, r11, r11, r10, #4, r12
    bx      lr

/******************************************************************************
* Fully-fixsliced implementation of AES-128.
*
* Two blocks are encrypted in parallel, without any operating mode.
*
* Note that additional 4 bytes are allocated on the stack as the function takes
* 5 arguments as input.
******************************************************************************/
@ void aes128_encrypt_ffs(u8* ctext, u8* ctext_bis, const u8* ptext,
@                   const u8* ptext_bis, const u32* rkey);
.global aes128_encrypt_ffs
.type   aes128_encrypt_ffs,%function
.align 2
aes128_encrypt_ffs:
    push    {r0-r12,r14}
    sub.w   sp, #56                 // allow space on the stack for tmp var
    ldr.w   r4, [r2]                // load the 1st 128-bit blocks in r4-r7
    ldr     r5, [r2, #4]
    ldr     r6, [r2, #8]
    ldr     r7, [r2, #12]
    ldr.w   r8, [r3]                // load the 2nd 128-bit blocks in r8-r11
    ldr     r9, [r3, #4]
    ldr     r10,[r3, #8]
    ldr     r11,[r3, #12]
    ldr.w   r1, [sp, #112]          // load 'rkey' argument from the stack
    str.w   r1, [sp, #48]           // store it there for 'add_round_key'
    bl      packing                 // pack the 2 input blocks
    bl      ark_sbox                // ark + sbox (round 0)
    bl      mixcolumns_0            // mixcolumns (round 0)
    bl      ark_sbox                // ark + sbox (round 1)
    bl      mixcolumns_1            // mixcolumns (round 1)
    bl      ark_sbox                // ark + sbox (round 2)
    bl      mixcolumns_2            // mixcolumns (round 2)
    bl      ark_sbox                // ark + sbox (round 3)
    bl      mixcolumns_3            // mixcolumns (round 3)
    bl      ark_sbox                // ark + sbox (round 4)
    bl      mixcolumns_0            // mixcolumns (round 4)
    bl      ark_sbox                // ark + sbox (round 5)
    bl      mixcolumns_1            // mixcolumns (round 5)
    bl      ark_sbox                // ark + sbox (round 6)
    bl      mixcolumns_2            // mixcolumns (round 6)
    bl      ark_sbox                // ark + sbox (round 7)
    bl      mixcolumns_3            // mixcolumns (round 7)
    bl      ark_sbox                // ark + sbox (round 8)
    bl      mixcolumns_0            // mixcolumns (round 8)
    bl      ark_sbox                // ark + sbox (round 9)
    bl      double_shiftrows        // to resynchronize with the classical rep
    ldr     r14, [sp, #48]          // ---------------------------------------
    ldmia   r14!, {r4,r5,r10,r12}   //
    eor     r4, r1                  //
    eor     r5, r3                  //
    eor     r6, r10                 //
    eor     r7, r12                 //  Last add_round_key
    ldmia   r14!, {r1,r3,r10,r12}   //
    eor     r8, r1                  //
    eor     r9, r0, r3              //
    eor     r10, r2                 //
    eor     r11, r12                // ---------------------------------------
    bl      unpacking               // unpack the internal state
    ldrd    r0, r1, [sp, #56]       // restore the addr to store the ciphertext
    add.w   sp, #64                 // restore the stack pointer
    str.w   r4, [r0]                // store the ciphertext
    str     r5, [r0, #4]
    str     r6, [r0, #8]
    str     r7, [r0, #12]
    str.w   r8, [r1]               // store the ciphertext
    str     r9, [r1, #4]
    str     r10,[r1, #8]
    str     r11,[r1, #12]
    pop     {r2-r12, r14}           // restore context
    bx      lr

/******************************************************************************
* Fully-fixsliced implementation of AES-256.
*
* Two blocks are encrypted in parallel, without any operating mode.
*
* Note that additional 4 bytes are allocated on the stack as the function takes
* 5 arguments as input.
******************************************************************************/
@ void aes256_encrypt_ffs(u8* ctext, u8* ctext_bis, const u8* ptext,
@                   const u8* ptext_bis, const u32* rkey);
.global aes256_encrypt_ffs
.type   aes256_encrypt_ffs,%function
.align 2
aes256_encrypt_ffs:
    push    {r0-r12,r14}
    sub.w   sp, #56                 // allow space on the stack for tmp var
    ldr.w   r4, [r2]                // load the 1st 128-bit blocks in r4-r7
    ldr     r5, [r2, #4]
    ldr     r6, [r2, #8]
    ldr     r7, [r2, #12]
    ldr.w   r8, [r3]                // load the 2nd 128-bit blocks in r8-r11
    ldr     r9, [r3, #4]
    ldr     r10,[r3, #8]
    ldr     r11,[r3, #12]
    ldr.w   r1, [sp, #112]          // load 'rkey' argument from the stack
    str.w   r1, [sp, #48]           // store it there for 'add_round_key'
    bl      packing                 // pack the 2 input blocks
    bl      ark_sbox                // ark + sbox (round 0)
    bl      mixcolumns_0            // mixcolumns (round 0)
    bl      ark_sbox                // ark + sbox (round 1)
    bl      mixcolumns_1            // mixcolumns (round 1)
    bl      ark_sbox                // ark + sbox (round 2)
    bl      mixcolumns_2            // mixcolumns (round 2)
    bl      ark_sbox                // ark + sbox (round 3)
    bl      mixcolumns_3            // mixcolumns (round 3)
    bl      ark_sbox                // ark + sbox (round 4)
    bl      mixcolumns_0            // mixcolumns (round 4)
    bl      ark_sbox                // ark + sbox (round 5)
    bl      mixcolumns_1            // mixcolumns (round 5)
    bl      ark_sbox                // ark + sbox (round 6)
    bl      mixcolumns_2            // mixcolumns (round 6)
    bl      ark_sbox                // ark + sbox (round 7)
    bl      mixcolumns_3            // mixcolumns (round 7)
    bl      ark_sbox                // ark + sbox (round 8)
    bl      mixcolumns_0            // mixcolumns (round 8)
    bl      ark_sbox                // ark + sbox (round 9)
    bl      mixcolumns_1            // mixcolumns (round 9)
    bl      ark_sbox                // ark + sbox (round 10)
    bl      mixcolumns_2            // mixcolumns (round 10)
    bl      ark_sbox                // ark + sbox (round 11)
    bl      mixcolumns_3            // mixcolumns (round 11)
    bl      ark_sbox                // ark + sbox (round 12)
    bl      mixcolumns_0            // mixcolumns (round 12)
    bl      ark_sbox                // ark + sbox (round 13)
    bl      double_shiftrows        // to resynchronize with the classical rep
    ldr     r14, [sp, #48]          // ---------------------------------------
    ldmia   r14!, {r4,r5,r10,r12}   //
    eor     r4, r1                  //
    eor     r5, r3                  //
    eor     r6, r10                 //
    eor     r7, r12                 //  Last add_round_key
    ldmia   r14!, {r1,r3,r10,r12}   //
    eor     r8, r1                  //
    eor     r9, r0, r3              //
    eor     r10, r2                 //
    eor     r11, r12                // ---------------------------------------
    bl      unpacking               // unpack the internal state
    ldrd    r0, r1, [sp, #56]       // restore the addr to store the ciphertext
    add.w   sp, #64                 // restore the stack pointer
    str.w   r4, [r0]                // store the ciphertext
    str     r5, [r0, #4]
    str     r6, [r0, #8]
    str     r7, [r0, #12]
    str.w   r8, [r1]                // store the ciphertext
    str     r9, [r1, #4]
    str     r10,[r1, #8]
    str     r11,[r1, #12]
    pop     {r2-r12, r14}           // restore context
    bx      lr
The diff you're trying to view is too large. Only the first 1000 changed files have been loaded.
Showing with 0 additions and 0 deletions (0 / 0 diffs computed)
swh spinner

Computing file changes ...

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Contact— JavaScript license information— Web API

back to top