1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851 | /******************************************************************************
* ARM assembly implemetnations of the AES-128 and AES-256 key schedule to
* match fixslicing.
* Note that those implementations are fully bitsliced and do not rely on any
* Look-Up Table (LUT).
*
* See the paper at https://eprint.iacr.org/2020/1123.pdf for more details.
*
* @author Alexandre Adomnicai, Nanyang Technological University, Singapore
* alexandre.adomnicai@ntu.edu.sg
*
* @date October 2020
******************************************************************************/
.syntax unified
.thumb
/******************************************************************************
* Macro to compute the SWAPMOVE technique: swap the bits in 'in1' masked by 'm'
* by the bits in 'in0' masked by 'm << n' and put the results in 'out0', 'out1'
******************************************************************************/
.macro swpmv out0, out1, in0, in1, m, n, tmp
eor \tmp, \in1, \in0, lsr \n
and \tmp, \m
eor \out1, \in1, \tmp
eor \out0, \in0, \tmp, lsl \n
.endm
/******************************************************************************
* Packing routine. Note that it is the same as the one used in the encryption
* function so some code size could be saved by merging the two files.
******************************************************************************/
.align 2
packing:
movw r3, #0x0f0f
movt r3, #0x0f0f // r3 <- 0x0f0f0f0f (mask for SWAPMOVE)
eor r2, r3, r3, lsl #2 // r2 <- 0x33333333 (mask for SWAPMOVE)
eor r1, r2, r2, lsl #1 // r1 <- 0x55555555 (mask for SWAPMOVE)
swpmv r8, r4, r8, r4, r1, #1, r12
swpmv r9, r5, r9, r5, r1, #1, r12
swpmv r10, r6, r10, r6, r1, #1, r12
swpmv r11, r7, r11, r7, r1, #1, r12
swpmv r0, r4, r5, r4, r2, #2, r12
swpmv r9, r5, r9, r8, r2, #2, r12
swpmv r7, r8, r7, r6, r2, #2, r12
swpmv r11, r2, r11, r10, r2, #2, r12
swpmv r8, r4, r8, r4, r3, #4, r12
swpmv r10, r6, r7, r0, r3, #4, r12
swpmv r11, r7, r11, r9, r3, #4, r12
swpmv r9, r5, r2, r5, r3, #4, r12
bx lr
/******************************************************************************
* Subroutine that computes S-box. Note that the same code is used in the
* encryption function, so some code size could be saved by merging the 2 files.
* Credits to https://github.com/Ko-/aes-armcortexm.
******************************************************************************/
.align 2
sbox:
str r14, [sp, #52]
eor r1, r7, r9 //Exec y14 = U3 ^ U5; into r1
eor r3, r4, r10 //Exec y13 = U0 ^ U6; into r3
eor r2, r3, r1 //Exec y12 = y13 ^ y14; into r2
eor r0, r8, r2 //Exec t1 = U4 ^ y12; into r0
eor r14, r0, r9 //Exec y15 = t1 ^ U5; into r14
and r12, r2, r14 //Exec t2 = y12 & y15; into r12
eor r8, r14, r11 //Exec y6 = y15 ^ U7; into r8
eor r0, r0, r5 //Exec y20 = t1 ^ U1; into r0
str.w r2, [sp, #44] //Store r2/y12 on stack
eor r2, r4, r7 //Exec y9 = U0 ^ U3; into r2
str r0, [sp, #40] //Store r0/y20 on stack
eor r0, r0, r2 //Exec y11 = y20 ^ y9; into r0
str r2, [sp, #36] //Store r2/y9 on stack
and r2, r2, r0 //Exec t12 = y9 & y11; into r2
str r8, [sp, #32] //Store r8/y6 on stack
eor r8, r11, r0 //Exec y7 = U7 ^ y11; into r8
eor r9, r4, r9 //Exec y8 = U0 ^ U5; into r9
eor r6, r5, r6 //Exec t0 = U1 ^ U2; into r6
eor r5, r14, r6 //Exec y10 = y15 ^ t0; into r5
str r14, [sp, #28] //Store r14/y15 on stack
eor r14, r5, r0 //Exec y17 = y10 ^ y11; into r14
str.w r1, [sp, #24] //Store r1/y14 on stack
and r1, r1, r14 //Exec t13 = y14 & y17; into r1
eor r1, r1, r2 //Exec t14 = t13 ^ t12; into r1
str r14, [sp, #20] //Store r14/y17 on stack
eor r14, r5, r9 //Exec y19 = y10 ^ y8; into r14
str.w r5, [sp, #16] //Store r5/y10 on stack
and r5, r9, r5 //Exec t15 = y8 & y10; into r5
eor r2, r5, r2 //Exec t16 = t15 ^ t12; into r2
eor r5, r6, r0 //Exec y16 = t0 ^ y11; into r5
str.w r0, [sp, #12] //Store r0/y11 on stack
eor r0, r3, r5 //Exec y21 = y13 ^ y16; into r0
str r3, [sp, #8] //Store r3/y13 on stack
and r3, r3, r5 //Exec t7 = y13 & y16; into r3
str r5, [sp, #4] //Store r5/y16 on stack
str r11, [sp, #0] //Store r11/U7 on stack
eor r5, r4, r5 //Exec y18 = U0 ^ y16; into r5
eor r6, r6, r11 //Exec y1 = t0 ^ U7; into r6
eor r7, r6, r7 //Exec y4 = y1 ^ U3; into r7
and r11, r7, r11 //Exec t5 = y4 & U7; into r11
eor r11, r11, r12 //Exec t6 = t5 ^ t2; into r11
eor r11, r11, r2 //Exec t18 = t6 ^ t16; into r11
eor r14, r11, r14 //Exec t22 = t18 ^ y19; into r14
eor r4, r6, r4 //Exec y2 = y1 ^ U0; into r4
and r11, r4, r8 //Exec t10 = y2 & y7; into r11
eor r11, r11, r3 //Exec t11 = t10 ^ t7; into r11
eor r2, r11, r2 //Exec t20 = t11 ^ t16; into r2
eor r2, r2, r5 //Exec t24 = t20 ^ y18; into r2
eor r10, r6, r10 //Exec y5 = y1 ^ U6; into r10
and r11, r10, r6 //Exec t8 = y5 & y1; into r11
eor r3, r11, r3 //Exec t9 = t8 ^ t7; into r3
eor r3, r3, r1 //Exec t19 = t9 ^ t14; into r3
eor r3, r3, r0 //Exec t23 = t19 ^ y21; into r3
eor r0, r10, r9 //Exec y3 = y5 ^ y8; into r0
ldr r11, [sp, #32] //Load y6 into r11
and r5, r0, r11 //Exec t3 = y3 & y6; into r5
eor r12, r5, r12 //Exec t4 = t3 ^ t2; into r12
ldr r5, [sp, #40] //Load y20 into r5
str r7, [sp, #32] //Store r7/y4 on stack
eor r12, r12, r5 //Exec t17 = t4 ^ y20; into r12
eor r1, r12, r1 //Exec t21 = t17 ^ t14; into r1
and r12, r1, r3 //Exec t26 = t21 & t23; into r12
eor r5, r2, r12 //Exec t27 = t24 ^ t26; into r5
eor r12, r14, r12 //Exec t31 = t22 ^ t26; into r12
eor r1, r1, r14 //Exec t25 = t21 ^ t22; into r1
and r7, r1, r5 //Exec t28 = t25 & t27; into r7
eor r14, r7, r14 //Exec t29 = t28 ^ t22; into r14
and r4, r14, r4 //Exec z14 = t29 & y2; into r4
and r8, r14, r8 //Exec z5 = t29 & y7; into r8
eor r7, r3, r2 //Exec t30 = t23 ^ t24; into r7
and r12, r12, r7 //Exec t32 = t31 & t30; into r12
eor r12, r12, r2 //Exec t33 = t32 ^ t24; into r12
eor r7, r5, r12 //Exec t35 = t27 ^ t33; into r7
and r2, r2, r7 //Exec t36 = t24 & t35; into r2
eor r5, r5, r2 //Exec t38 = t27 ^ t36; into r5
and r5, r14, r5 //Exec t39 = t29 & t38; into r5
eor r1, r1, r5 //Exec t40 = t25 ^ t39; into r1
eor r5, r14, r1 //Exec t43 = t29 ^ t40; into r5
ldr.w r7, [sp, #4] //Load y16 into r7
and r7, r5, r7 //Exec z3 = t43 & y16; into r7
eor r8, r7, r8 //Exec tc12 = z3 ^ z5; into r8
str r8, [sp, #40] //Store r8/tc12 on stack
ldr r8, [sp, #8] //Load y13 into r8
and r8, r5, r8 //Exec z12 = t43 & y13; into r8
and r10, r1, r10 //Exec z13 = t40 & y5; into r10
and r6, r1, r6 //Exec z4 = t40 & y1; into r6
eor r6, r7, r6 //Exec tc6 = z3 ^ z4; into r6
eor r3, r3, r12 //Exec t34 = t23 ^ t33; into r3
eor r3, r2, r3 //Exec t37 = t36 ^ t34; into r3
eor r1, r1, r3 //Exec t41 = t40 ^ t37; into r1
ldr.w r5, [sp, #16] //Load y10 into r5
and r2, r1, r5 //Exec z8 = t41 & y10; into r2
and r9, r1, r9 //Exec z17 = t41 & y8; into r9
str r9, [sp, #16] //Store r9/z17 on stack
eor r5, r12, r3 //Exec t44 = t33 ^ t37; into r5
ldr r9, [sp, #28] //Load y15 into r9
ldr.w r7, [sp, #44] //Load y12 into r7
and r9, r5, r9 //Exec z0 = t44 & y15; into r9
and r7, r5, r7 //Exec z9 = t44 & y12; into r7
and r0, r3, r0 //Exec z10 = t37 & y3; into r0
and r3, r3, r11 //Exec z1 = t37 & y6; into r3
eor r3, r3, r9 //Exec tc5 = z1 ^ z0; into r3
eor r3, r6, r3 //Exec tc11 = tc6 ^ tc5; into r3
ldr r11, [sp, #32] //Load y4 into r11
ldr.w r5, [sp, #20] //Load y17 into r5
and r11, r12, r11 //Exec z11 = t33 & y4; into r11
eor r14, r14, r12 //Exec t42 = t29 ^ t33; into r14
eor r1, r14, r1 //Exec t45 = t42 ^ t41; into r1
and r5, r1, r5 //Exec z7 = t45 & y17; into r5
eor r6, r5, r6 //Exec tc8 = z7 ^ tc6; into r6
ldr r5, [sp, #24] //Load y14 into r5
str r4, [sp, #32] //Store r4/z14 on stack
and r1, r1, r5 //Exec z16 = t45 & y14; into r1
ldr r5, [sp, #12] //Load y11 into r5
ldr r4, [sp, #36] //Load y9 into r4
and r5, r14, r5 //Exec z6 = t42 & y11; into r5
eor r5, r5, r6 //Exec tc16 = z6 ^ tc8; into r5
and r4, r14, r4 //Exec z15 = t42 & y9; into r4
eor r14, r4, r5 //Exec tc20 = z15 ^ tc16; into r14
eor r4, r4, r1 //Exec tc1 = z15 ^ z16; into r4
eor r1, r0, r4 //Exec tc2 = z10 ^ tc1; into r1
eor r0, r1, r11 //Exec tc21 = tc2 ^ z11; into r0
eor r7, r7, r1 //Exec tc3 = z9 ^ tc2; into r7
eor r1, r7, r5 //Exec S0 = tc3 ^ tc16; into r1
eor r7, r7, r3 //Exec S3 = tc3 ^ tc11; into r7
eor r3, r7, r5 //Exec S1 = S3 ^ tc16 ^ 1; into r3
eor r11, r10, r4 //Exec tc13 = z13 ^ tc1; into r11
ldr.w r4, [sp, #0] //Load U7 into r4
and r12, r12, r4 //Exec z2 = t33 & U7; into r12
eor r9, r9, r12 //Exec tc4 = z0 ^ z2; into r9
eor r12, r8, r9 //Exec tc7 = z12 ^ tc4; into r12
eor r2, r2, r12 //Exec tc9 = z8 ^ tc7; into r2
eor r2, r6, r2 //Exec tc10 = tc8 ^ tc9; into r2
ldr.w r4, [sp, #32] //Load z14 into r4
eor r12, r4, r2 //Exec tc17 = z14 ^ tc10; into r12
eor r0, r0, r12 //Exec S5 = tc21 ^ tc17; into r0
eor r6, r12, r14 //Exec tc26 = tc17 ^ tc20; into r6
ldr.w r4, [sp, #16] //Load z17 into r4
ldr r12, [sp, #40] //Load tc12 into r12
eor r6, r6, r4 //Exec S2 = tc26 ^ z17 ^ 1; into r6
eor r12, r9, r12 //Exec tc14 = tc4 ^ tc12; into r12
eor r14, r11, r12 //Exec tc18 = tc13 ^ tc14; into r14
eor r2, r2, r14 //Exec S6 = tc10 ^ tc18 ^ 1; into r2
eor r11, r8, r14 //Exec S7 = z12 ^ tc18 ^ 1; into r11
ldr r14, [sp, #52] // restore link register
eor r8, r12, r7 //Exec S4 = tc14 ^ S3; into r8
bx lr
// [('r0', 'S5'), ('r1', 'S0'), ('r2', 'S6'), ('r3', 'S1'),
// ('r6', 'S2'),('r7', 'S3'), ('r8', 'S4'), ('r11', 'S7')]
/******************************************************************************
* Subroutine that XORs the columns after the S-box during the AES-128 key
* schedule round function, for rounds i such that (i % 4) == 0.
* Note that the code size could be reduced at the cost of some instructions
* since some redundant code is applied on different registers.
******************************************************************************/
.align 2
aes128_xorcolumns_rotword:
ldr r12, [sp, #56] // restore 'rkeys' address
ldr.w r5, [r12, #28] // load rkey word of rkey from prev round
movw r4, #0xc0c0
movt r4, #0xc0c0 // r4 <- 0xc0c0c0c0
eor r11, r5, r11, ror #2 // r11<- r5 ^ (r11 >>> 2)
bic r11, r4, r11 // r11<- ~r11 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #2 // r9 <- r9 & 0x30303030
orr r11, r11, r9 // r11<- r11 | r9
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #4 // r9 <- r9 & 0x0c0c0c0c
orr r11, r11, r9 // r11<- r11 | r9
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #6 // r9 <- r9 & 0x03030303
orr r11, r11, r9 // r11<- r11 | r9
mvn r9, r5 // NOT omitted in sbox
ldr.w r5, [r12, #24] // load rkey word of rkey from prev round
str r9, [r12, #28] // store new rkey word after NOT
str r11, [r12, #60] // store new rkey word in 'rkeys'
eor r10, r5, r2, ror #2 // r10<- r5 ^ (r2 >>> 2)
bic r10, r4, r10 // r10<- ~r10 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #2 // r9 <- r9 & 0x30303030
orr r10, r10, r9 // r10<- r10 | r9
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #4 // r9 <- r9 & 0x0c0c0c0c
orr r10, r10, r9 // r10<- r10 | r9
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #6 // r9 <- r9 & 0x03030303
orr r10, r10, r9 // r10<- r10 | r9
mvn r9, r5 // NOT omitted in sbox
ldr.w r2, [r12, #20] // load rkey word of rkey from prev round
str r9, [r12, #24] // store new rkey word after NOT
str r10, [r12, #56] // store new rkey word in 'rkeys'
eor r9, r2, r0, ror #2 // r9 <- r2 ^ (r9 >>> 2)
and r9, r4, r9 // r9 <- r9 & 0xc0c0c0c0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r9, r9, r0 // r9 <- r9 | r0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r9, r9, r0 // r9 <- r9 | r0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r9, r9, r0 // r9 <- r9 | r0
ldr.w r2, [r12, #16] // load rkey word of rkey from prev round
str.w r9, [r12, #52] // store new rkey word in 'rkeys'
eor r8, r2, r8, ror #2 // r8 <- r2 ^ (r8 >>> 2)
and r8, r4, r8 // r8 <- r8 & 0xc0c0c0c0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r8, r8, r0 // r8 <- r8 | r0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r8, r8, r0 // r8 <- r8 | r0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r8, r8, r0 // r8 <- r8 | r0
ldr.w r2, [r12, #12] // load rkey word of rkey from prev round
str.w r8, [r12, #48] // store new rkey word in 'rkeys'
eor r7, r2, r7, ror #2 // r7 <- r2 ^ (r7 >>> 2)
and r7, r4, r7 // r7 <- r7 & 0xc0c0c0c0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r7, r7, r0 // r7 <- r7 | r0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r7, r7, r0 // r7 <- r7 | r0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r7, r7, r0 // r7 <- r7 | r0
ldr.w r2, [r12, #8] // load rkey word of rkey from prev round
str.w r7, [r12, #44] // store new rkey word in 'rkeys'
eor r6, r2, r6, ror #2 // r6 <- r2 ^ (r6 >>> 2)
bic r6, r4, r6 // r6 <- ~r6 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r6, r6, r0 // r6 <- r6 | r0
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r6, r6, r0 // r6 <- r6 | r0
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r6, r6, r0 // r6 <- r6 | r0
mvn r0, r2 // NOT omitted in sbox
ldr.w r2, [r12, #4] // load rkey word of rkey from prev round
str.w r0, [r12, #8] // store new rkey word after NOT
str.w r6, [r12, #40] // store new rkey word in 'rkeys'
eor r5, r2, r3, ror #2 // r5 <- r2 ^ (r3 >>> 2)
bic r5, r4, r5 // r5 <- ~r5 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r5, r5, r0 // r5 <- r5 | r0
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r5, r5, r0 // r5 <- r5 | r0
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r5, r5, r0 // r5 <- r5 | r0
mvn r0, r2 // NOT omitted in sbox
ldr.w r2, [r12], #32 // load rkey word of rkey from prev round
str.w r0, [r12, #-28] // store new rkey word after NOT
str.w r5, [r12, #4] // store new rkey word in 'rkeys'
eor r3, r2, r1, ror #2 // r3 <- r2 ^ (r1 >>> 2)
and r3, r4, r3 // r3 <- r3 & 0xc0c0c0c0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r3, r3, r0 // r3 <- r3 | r0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r3, r3, r0 // r3 <- r3 | r0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r4, r3, r0 // r4 <- r3 | r0
str.w r4, [r12]
str.w r12, [sp, #56] // store the new rkeys address on the stack
bx lr
/******************************************************************************
* Subroutine that XORs the columns after the S-box during the AES-256 key
* schedule round function, for rounds i such that (i % 4) == 0.
* Differs from 'aes128_xorcolumns_rotword' by the rkeys' indexes to be involved
* in XORs.
******************************************************************************/
.align 2
aes256_xorcolumns_rotword:
ldr r12, [sp, #56] // restore 'rkeys' address
ldr.w r5, [r12, #28] // load rkey word of rkey from prev round
movw r4, #0xc0c0
movt r4, #0xc0c0 // r4 <- 0xc0c0c0c0
eor r11, r5, r11, ror #2 // r11<- r5 ^ (r11 >>> 2)
bic r11, r4, r11 // r11<- ~r11 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #2 // r9 <- r9 & 0x30303030
orr r11, r11, r9 // r11<- r11 | r9
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #4 // r9 <- r9 & 0x0c0c0c0c
orr r11, r11, r9 // r11<- r11 | r9
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #6 // r9 <- r9 & 0x03030303
orr r11, r11, r9 // r11<- r11 | r9
mvn r9, r5 // NOT omitted in sbox
ldr.w r5, [r12, #24] // load rkey word of rkey from prev round
str r9, [r12, #28] // store new rkey word after NOT
str r11, [r12, #92] // store new rkey word in 'rkeys'
eor r10, r5, r2, ror #2 // r10<- r5 ^ (r2 >>> 2)
bic r10, r4, r10 // r10<- ~r10 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #2 // r9 <- r9 & 0x30303030
orr r10, r10, r9 // r10<- r10 | r9
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #4 // r9 <- r9 & 0x0c0c0c0c
orr r10, r10, r9 // r10<- r10 | r9
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #6 // r9 <- r9 & 0x03030303
orr r10, r10, r9 // r10<- r10 | r9
mvn r9, r5 // NOT omitted in sbox
ldr.w r2, [r12, #20] // load rkey word of rkey from prev round
str r9, [r12, #24] // store new rkey word after NOT
str r10, [r12, #88] // store new rkey word in 'rkeys'
eor r9, r2, r0, ror #2 // r9 <- r2 ^ (r9 >>> 2)
and r9, r4, r9 // r9 <- r9 & 0xc0c0c0c0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r9, r9, r0 // r9 <- r9 | r0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r9, r9, r0 // r9 <- r9 | r0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r9, r9, r0 // r9 <- r9 | r0
ldr.w r2, [r12, #16] // load rkey word of rkey from prev round
str.w r9, [r12, #84] // store new rkey word in 'rkeys'
eor r8, r2, r8, ror #2 // r8 <- r2 ^ (r8 >>> 2)
and r8, r4, r8 // r8 <- r8 & 0xc0c0c0c0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r8, r8, r0 // r8 <- r8 | r0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r8, r8, r0 // r8 <- r8 | r0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r8, r8, r0 // r8 <- r8 | r0
ldr.w r2, [r12, #12] // load rkey word of rkey from prev round
str.w r8, [r12, #80] // store new rkey word in 'rkeys'
eor r7, r2, r7, ror #2 // r7 <- r2 ^ (r7 >>> 2)
and r7, r4, r7 // r7 <- r7 & 0xc0c0c0c0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r7, r7, r0 // r7 <- r7 | r0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r7, r7, r0 // r7 <- r7 | r0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r7, r7, r0 // r7 <- r7 | r0
ldr.w r2, [r12, #8] // load rkey word of rkey from prev round
str.w r7, [r12, #76] // store new rkey word in 'rkeys'
eor r6, r2, r6, ror #2 // r6 <- r2 ^ (r6 >>> 2)
bic r6, r4, r6 // r6 <- ~r6 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r6, r6, r0 // r6 <- r6 | r0
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r6, r6, r0 // r6 <- r6 | r0
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r6, r6, r0 // r6 <- r6 | r0
mvn r0, r2 // NOT omitted in sbox
ldr.w r2, [r12, #4] // load rkey word of rkey from prev round
str.w r0, [r12, #8] // store new rkey word after NOT
str.w r6, [r12, #72] // store new rkey word in 'rkeys'
eor r5, r2, r3, ror #2 // r5 <- r2 ^ (r3 >>> 2)
bic r5, r4, r5 // r5 <- ~r5 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r5, r5, r0 // r5 <- r5 | r0
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r5, r5, r0 // r5 <- r5 | r0
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r5, r5, r0 // r5 <- r5 | r0
mvn r0, r2 // NOT omitted in sbox
ldr.w r2, [r12], #32 // load rkey word of rkey from prev round
str.w r0, [r12, #-28] // store new rkey word after NOT
str.w r5, [r12, #36] // store new rkey word in 'rkeys'
eor r3, r2, r1, ror #2 // r3 <- r2 ^ (r1 >>> 2)
and r3, r4, r3 // r3 <- r3 & 0xc0c0c0c0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r3, r3, r0 // r3 <- r3 | r0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r3, r3, r0 // r3 <- r3 | r0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r4, r3, r0 // r4 <- r3 | r0
str.w r4, [r12, #32]
str.w r12, [sp, #56] // store the new rkeys address on the stack
bx lr
/******************************************************************************
* Subroutine that XORs the columns after the S-box during the AES-256 key
* schedule round function, for rounds i such that (i % 4) == 0.
* It differs from 'aes256_xorcolumns_rotword' by the omission of the rotword
* operation (i.e. 'ror #26' instead of 'ror #2').
******************************************************************************/
.align 2
aes256_xorcolumns:
ldr r12, [sp, #56] // restore 'rkeys' address
ldr.w r5, [r12, #28] // load rkey word of rkey from prev round
movw r4, #0xc0c0
movt r4, #0xc0c0 // r4 <- 0xc0c0c0c0
eor r11, r5, r11, ror #26 // r11<- r5 ^ (r11 >>> 26)
bic r11, r4, r11 // r11<- ~r11 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #2 // r9 <- r9 & 0x30303030
orr r11, r11, r9 // r11<- r11 | r9
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #4 // r9 <- r9 & 0x0c0c0c0c
orr r11, r11, r9 // r11<- r11 | r9
eor r9, r5, r11, ror #2 // r9 <- r5 ^ (r11 >>> 2)
and r9, r9, r4, ror #6 // r9 <- r9 & 0x03030303
orr r11, r11, r9 // r11<- r11 | r9
mvn r9, r5 // NOT omitted in sbox
ldr.w r5, [r12, #24] // load rkey word of rkey from prev round
str r9, [r12, #28] // store new rkey word after NOT
str r11, [r12, #92] // store new rkey word in 'rkeys'
eor r10, r5, r2, ror #26 // r10<- r5 ^ (r2 >>> 2)
bic r10, r4, r10 // r10<- ~r10 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #2 // r9 <- r9 & 0x30303030
orr r10, r10, r9 // r10<- r10 | r9
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #4 // r9 <- r9 & 0x0c0c0c0c
orr r10, r10, r9 // r10<- r10 | r9
eor r9, r5, r10, ror #2 // r9 <- r5 ^ (r10 >>> 2)
and r9, r9, r4, ror #6 // r9 <- r9 & 0x03030303
orr r10, r10, r9 // r10<- r10 | r9
mvn r9, r5 // NOT omitted in sbox
ldr.w r2, [r12, #20] // load rkey word of rkey from prev round
str r9, [r12, #24] // store new rkey word after NOT
str r10, [r12, #88] // store new rkey word in 'rkeys'
eor r9, r2, r0, ror #26 // r9 <- r2 ^ (r9 >>> 26)
and r9, r4, r9 // r9 <- r9 & 0xc0c0c0c0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r9, r9, r0 // r9 <- r9 | r0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r9, r9, r0 // r9 <- r9 | r0
eor r0, r2, r9, ror #2 // r0 <- r2 ^ (r9 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r9, r9, r0 // r9 <- r9 | r0
ldr.w r2, [r12, #16] // load rkey word of rkey from prev round
str.w r9, [r12, #84] // store new rkey word in 'rkeys'
eor r8, r2, r8, ror #26 // r8 <- r2 ^ (r8 >>> 26)
and r8, r4, r8 // r8 <- r8 & 0xc0c0c0c0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r8, r8, r0 // r8 <- r8 | r0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r8, r8, r0 // r8 <- r8 | r0
eor r0, r2, r8, ror #2 // r0 <- r2 ^ (r8 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r8, r8, r0 // r8 <- r8 | r0
ldr.w r2, [r12, #12] // load rkey word of rkey from prev round
str.w r8, [r12, #80] // store new rkey word in 'rkeys'
eor r7, r2, r7, ror #26 // r7 <- r2 ^ (r7 >>> 26)
and r7, r4, r7 // r7 <- r7 & 0xc0c0c0c0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r7, r7, r0 // r7 <- r7 | r0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r7, r7, r0 // r7 <- r7 | r0
eor r0, r2, r7, ror #2 // r0 <- r2 ^ (r7 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r7, r7, r0 // r7 <- r7 | r0
ldr.w r2, [r12, #8] // load rkey word of rkey from prev round
str.w r7, [r12, #76] // store new rkey word in 'rkeys'
eor r6, r2, r6, ror #26 // r6 <- r2 ^ (r6 >>> 26)
bic r6, r4, r6 // r6 <- ~r6 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r6, r6, r0 // r6 <- r6 | r0
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r6, r6, r0 // r6 <- r6 | r0
eor r0, r2, r6, ror #2 // r0 <- r2 ^ (r6 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r6, r6, r0 // r6 <- r6 | r0
mvn r0, r2 // NOT omitted in sbox
ldr.w r2, [r12, #4] // load rkey word of rkey from prev round
str.w r0, [r12, #8] // store new rkey word after NOT
str.w r6, [r12, #72] // store new rkey word in 'rkeys'
eor r5, r2, r3, ror #26 // r5 <- r2 ^ (r3 >>> 26)
bic r5, r4, r5 // r5 <- ~r5 & 0xc0c0c0c0 (NOT omitted in sbox)
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r5, r5, r0 // r5 <- r5 | r0
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r5, r5, r0 // r5 <- r5 | r0
eor r0, r2, r5, ror #2 // r0 <- r2 ^ (r5 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r5, r5, r0 // r5 <- r5 | r0
mvn r0, r2 // NOT omitted in sbox
ldr.w r2, [r12], #32 // load rkey word of rkey from prev round
str.w r0, [r12, #-28] // store new rkey word after NOT
str.w r5, [r12, #36] // store new rkey word in 'rkeys'
eor r3, r2, r1, ror #26 // r3 <- r2 ^ (r1 >>> 26)
and r3, r4, r3 // r3 <- r3 & 0xc0c0c0c0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #2 // r0 <- r0 & 0x30303030
orr r3, r3, r0 // r3 <- r3 | r0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #4 // r0 <- r0 & 0x0c0c0c0c
orr r3, r3, r0 // r3 <- r3 | r0
eor r0, r2, r3, ror #2 // r0 <- r2 ^ (r3 >>> 2)
and r0, r0, r4, ror #6 // r0 <- r0 & 0x03030303
orr r4, r3, r0 // r4 <- r3 | r0
str.w r4, [r12, #32]
str.w r12, [sp, #56] // store the new rkeys address on the stack
bx lr
/******************************************************************************
* Applies ShiftRows^(-1) on a round key to match the fixsliced representation.
******************************************************************************/
.align 2
inv_shiftrows_1:
ldr.w r2, [r12, #-32]!
str r14, [sp, #52] // store link register
movw r1, #8
movw r14, #0x0300
movt r14, #0x0c0f // r14<- 0x0c0f0300 for ShiftRows^[-1]
loop_inv_sr_1:
movw r3, #0x3300
movt r3, #0x3300 // r3 <- 0x33003300 for ShiftRows^[-1]
swpmv r2, r2, r2, r2, r14, 4, r0
eor r0, r2, r2, lsr #2
and r0, r3
eor r2, r2, r0
eor r3, r2, r0, lsl #2
ldr.w r2, [r12, #4]!
str.w r3, [r12, #-4]
subs r1, #1
bne loop_inv_sr_1
ldr r14, [sp, #52] // restore link register
bx lr
/******************************************************************************
* Applies ShiftRows^(-2) on a round key to match the fixsliced representation.
* Only needed for the fully-fixsliced (ffs) representation.
******************************************************************************/
.align 2
inv_shiftrows_2:
ldr.w r2, [r12, #-32]!
str r14, [sp, #52] // store link register
movw r1, #8
movw r14, #0x0f00
movt r14, #0x0f00 // r14<- 0x0f000f00 for ShiftRows^[-2]
loop_inv_sr_2:
eor r0, r2, r2, lsr #4
and r0, r14
eor r2, r2, r0
eor r3, r2, r0, lsl #4
ldr.w r2, [r12, #4]!
str.w r3, [r12, #-4]
subs r1, #1
bne loop_inv_sr_2
ldr r14, [sp, #52] // restore link register
bx lr
/******************************************************************************
* Applies ShiftRows^(-3) on a round key to match the fixsliced representation.
* Only needed for the fully-fixsliced (ffs) representation.
******************************************************************************/
.align 2
inv_shiftrows_3:
ldr.w r2, [r12, #-32]!
str r14, [sp, #52] // store link register
movw r1, #8
movw r14, #0x0c00
movt r14, #0x030f // r14<- 0x030f0c00 for ShiftRows^[-3]
loop_inv_sr_3:
movw r3, #0x3300
movt r3, #0x3300 // r3 <- 0x33003300 for ShiftRows^[-3]
swpmv r2, r2, r2, r2, r14, 4, r0
eor r0, r2, r2, lsr #2
and r0, r3
eor r2, r2, r0
eor r3, r2, r0, lsl #2
ldr.w r2, [r12, #4]!
str.w r3, [r12, #-4]
subs r1, #1
bne loop_inv_sr_3
ldr r14, [sp, #52] // restore link register
bx lr
/******************************************************************************
* Fully bitsliced AES-128 key schedule to match the fully-fixsliced (ffs)
* representation. Note that it is possible to pass two different keys as input
* parameters if one wants to encrypt 2 blocks in with two different keys.
******************************************************************************/
@ void aes128_keyschedule_ffs(u32* rkeys, const u8* key);
.global aes128_keyschedule_ffs
.type aes128_keyschedule_ffs,%function
.align 2
aes128_keyschedule_ffs:
push {r0-r12,r14}
sub.w sp, #56 // allow space on the stack for tmp var
ldr.w r4, [r1] // load the 128-bit key in r4-r7
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
ldr.w r8, [r1] // load the 128-bit key in r8-r11
ldr r9, [r1, #4]
ldr r10,[r1, #8]
ldr r11,[r1, #12]
bl packing // pack the master key
ldr.w r0, [sp, #56] // restore 'rkeys' address
stm r0, {r4-r11} // store the packed master key in 'rkeys'
bl sbox // apply the sbox to the master key
eor r11, r11, #0x00000300 // add the 1st rconst
bl aes128_xorcolumns_rotword
bl sbox // apply the sbox to the master key
eor r2, r2, #0x00000300 // add the 2nd rconst
bl aes128_xorcolumns_rotword
bl inv_shiftrows_1
bl sbox // apply the sbox to the master key
eor r0, r0, #0x00000300 // add the 3rd rconst
bl aes128_xorcolumns_rotword
bl inv_shiftrows_2
bl sbox // apply the sbox to the master key
eor r8, r8, #0x00000300 // add the 4th rconst
bl aes128_xorcolumns_rotword
bl inv_shiftrows_3
bl sbox // apply the sbox to the master key
eor r7, r7, #0x00000300 // add the 5th rconst
bl aes128_xorcolumns_rotword
bl sbox // apply the sbox to the master key
eor r6, r6, #0x00000300 // add the 6th rconst
bl aes128_xorcolumns_rotword
bl inv_shiftrows_1
bl sbox // apply the sbox to the master key
eor r3, r3, #0x00000300 // add the 7th rconst
bl aes128_xorcolumns_rotword
bl inv_shiftrows_2
bl sbox // apply the sbox to the master key
eor r1, r1, #0x00000300 // add the 8th rconst
bl aes128_xorcolumns_rotword
bl inv_shiftrows_3
bl sbox // apply the sbox to the master key
eor r11, r11, #0x00000300 // add the 9th rconst
eor r2, r2, #0x00000300 // add the 9th rconst
eor r8, r8, #0x00000300 // add the 9th rconst
eor r7, r7, #0x00000300 // add the 9th rconst
bl aes128_xorcolumns_rotword
bl sbox // apply the sbox to the master key
eor r2, r2, #0x00000300 // add the 10th rconst
eor r0, r0, #0x00000300 // add the 10th rconst
eor r7, r7, #0x00000300 // add the 10th rconst
eor r6, r6, #0x00000300 // add the 10th rconst
bl aes128_xorcolumns_rotword
bl inv_shiftrows_1
mvn r5, r5 // add the NOT for the last rkey
mvn r6, r6 // add the NOT for the last rkey
mvn r10, r10 // add the NOT for the last rkey
mvn r11, r11 // add the NOT for the last rkey
strd r5, r6, [r12, #4]
strd r10, r11, [r12, #24]
ldrd r0, r1, [r12, #-316]
ldrd r2, r3, [r12, #-296]
mvn r0, r0 // remove the NOT for the key whitening
mvn r1, r1 // remove the NOT for the key whitening
mvn r2, r2 // remove the NOT for the key whitening
mvn r3, r3 // remove the NOT for the key whitening
strd r0, r1, [r12, #-316]
strd r2, r3, [r12, #-296]
add.w sp, #56 // restore stack
pop {r0-r12, r14} // restore context
bx lr
/******************************************************************************
* Fully bitsliced AES-256 key schedule to match the fully-fixsliced (ffs)
* representation. Note that it is possible to pass 2 different keys as input
* parameters if one wants to encrypt 2 blocks in with 2 different keys.
******************************************************************************/
@ void aes256_keyschedule_ffs(u32* rkeys, const u8* key);
.global aes256_keyschedule_ffs
.type aes256_keyschedule_ffs,%function
.align 2
aes256_keyschedule_ffs:
push {r0-r12,r14}
sub.w sp, #56 // allow space on the stack for tmp var
ldr.w r4, [r1] // load the 128 first key bits in r4-r7
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
ldr.w r8, [r1] // load the 128 first key bits in r8-r11
ldr r9, [r1, #4]
ldr r10,[r1, #8]
ldr r11,[r1, #12]
bl packing // pack the master key
ldrd r0,r1, [sp, #56] // restore 'rkeys' and 'key' addresses
stm r0, {r4-r11} // store the packed master key in 'rkeys'
add.w r1, #16 // points to the 128 last bits of the key
ldr.w r4, [r1] // load the 128 first key bits in r4-r7
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
ldr.w r8, [r1] // load the 128 first key bits in r8-r11
ldr r9, [r1, #4]
ldr r10,[r1, #8]
ldr r11,[r1, #12]
bl packing // pack the master key
ldr.w r0, [sp, #56] // restore 'rkeys' address
add.w r0, #32 // points to the 128 last bits of the key
stm r0, {r4-r11} // store the packed master key in 'rkeys'
bl sbox // apply the sbox to the master key
eor r11, r11, #0x00000300 // add the 1st rconst
bl aes256_xorcolumns_rotword
bl sbox // apply the sbox to the master key
bl aes256_xorcolumns
bl inv_shiftrows_1
bl sbox // apply the sbox to the master key
eor r2, r2, #0x00000300 // add the 2nd rconst
bl aes256_xorcolumns_rotword
bl inv_shiftrows_2
bl sbox // apply the sbox to the master key
bl aes256_xorcolumns
bl inv_shiftrows_3
bl sbox // apply the sbox to the master key
eor r0, r0, #0x00000300 // add the 3rd rconst
bl aes256_xorcolumns_rotword
bl sbox // apply the sbox to the master key
bl aes256_xorcolumns
bl inv_shiftrows_1
bl sbox // apply the sbox to the master key
eor r8, r8, #0x00000300 // add the 4th rconst
bl aes256_xorcolumns_rotword
bl inv_shiftrows_2
bl sbox // apply the sbox to the master key
bl aes256_xorcolumns
bl inv_shiftrows_3
bl sbox // apply the sbox to the master key
eor r7, r7, #0x00000300 // add the 5th rconst
bl aes256_xorcolumns_rotword
bl sbox // apply the sbox to the master key
bl aes256_xorcolumns
bl inv_shiftrows_1
bl sbox // apply the sbox to the master key
eor r6, r6, #0x00000300 // add the 6th rconst
bl aes256_xorcolumns_rotword
bl inv_shiftrows_2
bl sbox // apply the sbox to the master key
bl aes256_xorcolumns
bl inv_shiftrows_3
bl sbox // apply the sbox to the master key
eor r3, r3, #0x00000300 // add the 6th rconst
bl aes256_xorcolumns_rotword
add r12, #32
bl inv_shiftrows_1
mvn r5, r5 // add the NOT for the last rkey
mvn r6, r6 // add the NOT for the last rkey
mvn r10, r10 // add the NOT for the last rkey
mvn r11, r11 // add the NOT for the last rkey
ldrd r0, r1, [r12, #-28]
ldrd r2, r3, [r12, #-8]
strd r5, r6, [r12, #4]
strd r10, r11, [r12, #24]
mvn r0, r0 // add the NOT for the penultimate rkey
mvn r1, r1 // add the NOT for the penultimate rkey
mvn r2, r2 // add the NOT for the penultimate rkey
mvn r3, r3 // add the NOT for the penultimate rkey
ldrd r5, r6, [r12, #-444]
ldrd r10, r11, [r12, #-424]
strd r0, r1, [r12, #-28]
strd r2, r3, [r12, #-8]
mvn r5, r5 // remove the NOT for the key whitening
mvn r6, r6 // remove the NOT for the key whitening
mvn r10, r10 // remove the NOT for the key whitening
mvn r11, r11 // remove the NOT for the key whitening
strd r5, r6, [r12, #-444]
strd r10, r11, [r12, #-424]
add.w sp, #56 // restore stack
pop {r0-r12, r14} // restore context
bx lr
|