// Implemented by Ming-Shing Chen, Tung Chou and Markus Krausz. // public domain .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v17 .type gft_mul_v17, %function .align 2 gft_mul_v17: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{0, 1, 3, 4, 7, 8, 12, 15, 24, 25, 27, 28, 29, 31}, {0, 2, 3, 5, 6, 7, 9, 13, 14, 15, 24, 26, 27, 28, 30, 31}, {0, 3, 4, 10, 12, 24, 27, 28, 31}, {1, 2, 3, 5, 11, 13, 25, 26, 27, 29, 30, 31}, {0, 2, 3, 5, 6, 8, 10, 11, 14, 15, 25, 27, 28}, {1, 2, 4, 5, 7, 9, 10, 14, 24, 25, 26, 27, 29}, {1, 2, 3, 4, 5, 6, 9, 10, 11, 13, 15, 24, 26, 27, 30}, {0, 1, 2, 4, 7, 8, 9, 10, 12, 13, 14, 15, 25, 26, 31}, {0, 3, 5, 6, 7, 9, 12, 13, 14, 21, 23, 29, 31}, {1, 2, 3, 4, 5, 6, 8, 9, 12, 15, 20, 21, 22, 23, 28, 29, 30, 31}, {0, 5, 11, 12, 13, 20, 22, 23, 28, 30, 31}, {1, 4, 5, 10, 11, 12, 21, 22, 29, 30}, {1, 2, 4, 5, 6, 7, 8, 9, 11, 12, 15, 18, 22, 26, 30}, {0, 1, 3, 4, 6, 8, 10, 11, 13, 14, 15, 19, 23, 27, 31}, {0, 1, 2, 5, 6, 8, 11, 12, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}, {0, 3, 4, 5, 7, 9, 10, 11, 13, 16, 18, 20, 22, 24, 26, 28, 30}, {1, 16, 19, 20, 23, 24, 28, 31}, {0, 1, 17, 18, 19, 21, 22, 23, 25, 29, 30, 31}, {3, 16, 20, 26, 28}, {2, 3, 17, 21, 27, 29}, {5, 16, 18, 19, 22, 24, 26, 27, 30, 31}, {4, 5, 17, 18, 23, 25, 26, 30}, {7, 17, 18, 19, 20, 21, 22, 23, 25, 26, 27, 29, 31}, {6, 7, 16, 17, 18, 20, 22, 24, 25, 26, 28, 29, 30, 31}, {9, 16, 19, 21, 22, 23, 28, 29, 30}, {8, 9, 17, 18, 19, 20, 21, 22, 28, 31}, {11, 16, 21, 28, 29}, {10, 11, 17, 20, 21, 28}, {13, 17, 18, 20, 21, 22, 23, 24, 25, 27, 28, 29, 31}, {12, 13, 16, 17, 19, 20, 22, 24, 26, 27, 28, 30, 31}, {15, 16, 17, 18, 21, 22, 24, 27, 28, 31}, {14, 15, 16, 19, 20, 21, 23, 25, 26, 27, 29, 30, 31}] @ i = 0 @ better circuit found: #XORs = 238 @ better circuit found: #XORs = 225 @ better circuit found: #XORs = 223 @ better circuit found: #XORs = 215 @ i = 50 @ i = 100 @ i = 150 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ better circuit found: #XORs = 214 @ i = 450 @ i = 500 @ better circuit found: #XORs = 211 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{1, 7, 8, 20, 25}, {0, 10, 11, 14, 17, 21}, {3, 4, 14, 15, 25, 27, 31}, {2, 3, 5, 14}, {0, 10, 12, 14, 15, 24, 25, 28}, {1, 20, 21, 24, 31}, {1, 21, 23, 25}, {2, 4, 12, 13, 14, 16, 27, 29}, {2, 7}, {7, 18, 23, 28, 31}, {0, 9, 17, 18, 19, 31}, {4, 5, 10, 12, 19, 22, 25, 30}, {2, 9, 14, 21, 26}, {4, 6, 9, 23}, {2, 11, 12, 15, 30}, {0, 9, 16, 26, 27, 31}, {16, 23, 25, 10, 13}, {16, 1, 19, 25, 28}, {3, 16, 20, 26, 28}, {2, 3, 17, 21, 27, 29}, {5, 17, 22, 24, 30, 31}, {4, 17, 18, 25}, {7, 13, 19, 24, 26, 28}, {1, 18, 6, 25, 14}, {9, 11, 19, 22, 23, 30}, {0, 8, 20, 21, 22, 28}, {11, 16, 21, 28, 29}, {15, 16, 17}, {20, 10, 29, 15}, {5, 12, 13, 17, 18, 20, 28}, {5, 23, 26, 30}, {2, 9, 13, 19, 27, 30, 31}] @[0, 1, 2, 3, 4, 7, 8, 10, 11, 15, 16, 18, 19, 20, 21, 22, 24, 25, 26, 27, 29, 31, 5, 6, 9, 12, 13, 14, 17, 23, 28, 30] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 565) mov r14, r0 mov r12, r1 ldr r0, [r12, #4] ldr r1, [r12, #28] ldr r2, [r12, #32] ldr r3, [r12, #80] ldr r4, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s0, r0 ldr r0, [r12, #0] ldr r1, [r12, #40] ldr r2, [r12, #44] ldr r3, [r12, #56] ldr r4, [r12, #68] ldr r5, [r12, #84] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s1, r0 ldr r0, [r12, #12] ldr r1, [r12, #16] ldr r2, [r12, #56] ldr r3, [r12, #60] ldr r4, [r12, #100] ldr r5, [r12, #108] ldr r6, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s2, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #20] ldr r3, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 vmov s3, r0 ldr r0, [r12, #0] ldr r1, [r12, #40] ldr r2, [r12, #48] ldr r3, [r12, #56] ldr r4, [r12, #60] ldr r5, [r12, #96] ldr r6, [r12, #100] ldr r7, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s4, r0 ldr r0, [r12, #4] ldr r1, [r12, #80] ldr r2, [r12, #84] ldr r3, [r12, #96] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s5, r0 ldr r0, [r12, #4] ldr r1, [r12, #84] ldr r2, [r12, #92] ldr r3, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 vmov s6, r0 ldr r0, [r12, #8] ldr r1, [r12, #16] ldr r2, [r12, #48] ldr r3, [r12, #52] ldr r4, [r12, #56] ldr r5, [r12, #64] ldr r6, [r12, #108] ldr r7, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s7, r0 ldr r0, [r12, #8] ldr r1, [r12, #28] eor r0, r1 vmov s8, r0 ldr r0, [r12, #28] ldr r1, [r12, #72] ldr r2, [r12, #92] ldr r3, [r12, #112] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s9, r0 ldr r0, [r12, #0] ldr r1, [r12, #36] ldr r2, [r12, #68] ldr r3, [r12, #72] ldr r4, [r12, #76] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s10, r0 ldr r0, [r12, #16] ldr r1, [r12, #20] ldr r2, [r12, #40] ldr r3, [r12, #48] ldr r4, [r12, #76] ldr r5, [r12, #88] ldr r6, [r12, #100] ldr r7, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s11, r0 ldr r0, [r12, #8] ldr r1, [r12, #36] ldr r2, [r12, #56] ldr r3, [r12, #84] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s12, r0 ldr r0, [r12, #16] ldr r1, [r12, #24] ldr r2, [r12, #36] ldr r3, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 vmov s13, r0 ldr r0, [r12, #8] ldr r1, [r12, #44] ldr r2, [r12, #48] ldr r3, [r12, #60] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s14, r0 ldr r0, [r12, #0] ldr r1, [r12, #36] ldr r2, [r12, #64] ldr r3, [r12, #104] ldr r4, [r12, #108] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s15, r0 ldr r0, [r12, #64] ldr r1, [r12, #92] ldr r2, [r12, #100] ldr r3, [r12, #40] ldr r4, [r12, #52] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s16, r0 ldr r0, [r12, #64] ldr r1, [r12, #4] ldr r2, [r12, #76] ldr r3, [r12, #100] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s17, r0 ldr r0, [r12, #12] ldr r1, [r12, #64] ldr r2, [r12, #80] ldr r3, [r12, #104] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s18, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #68] ldr r3, [r12, #84] ldr r4, [r12, #108] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s19, r0 ldr r0, [r12, #20] ldr r1, [r12, #68] ldr r2, [r12, #88] ldr r3, [r12, #96] ldr r4, [r12, #120] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s20, r0 ldr r0, [r12, #16] ldr r1, [r12, #68] ldr r2, [r12, #72] ldr r3, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 vmov s21, r0 ldr r0, [r12, #28] ldr r1, [r12, #52] ldr r2, [r12, #76] ldr r3, [r12, #96] ldr r4, [r12, #104] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s22, r0 ldr r0, [r12, #4] ldr r1, [r12, #72] ldr r2, [r12, #24] ldr r3, [r12, #100] ldr r4, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s23, r0 ldr r0, [r12, #36] ldr r1, [r12, #44] ldr r2, [r12, #76] ldr r3, [r12, #88] ldr r4, [r12, #92] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s24, r0 ldr r0, [r12, #0] ldr r1, [r12, #32] ldr r2, [r12, #80] ldr r3, [r12, #84] ldr r4, [r12, #88] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s25, r0 ldr r0, [r12, #44] ldr r1, [r12, #64] ldr r2, [r12, #84] ldr r3, [r12, #112] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s26, r0 ldr r0, [r12, #60] ldr r1, [r12, #64] ldr r2, [r12, #68] eor r0, r1 eor r0, r2 vmov s27, r0 ldr r0, [r12, #80] ldr r1, [r12, #40] ldr r2, [r12, #116] ldr r3, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 vmov s28, r0 ldr r0, [r12, #20] ldr r1, [r12, #48] ldr r2, [r12, #52] ldr r3, [r12, #68] ldr r4, [r12, #72] ldr r5, [r12, #80] ldr r6, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s29, r0 ldr r0, [r12, #20] ldr r1, [r12, #92] ldr r2, [r12, #104] ldr r3, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 vmov s30, r0 ldr r0, [r12, #8] ldr r1, [r12, #36] ldr r2, [r12, #52] ldr r3, [r12, #76] ldr r4, [r12, #108] ldr r5, [r12, #120] ldr r6, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s31, r0 vmov r0, s5; vmov r1, s6; vmov r2, s9; vmov r3, s12; vmov r4, s13; vmov r5, s14; vmov r6, s17; vmov r7, s23; vmov r8, s28; vmov r9, s30; @// g5 ^= g9 eor r5, r9 @// g9 ^= g1 eor r9, r1 @// f21 ^= g1 vmov r10, s21 eor r10, r1 vmov s21, r10 @// g1 ^= f16 vmov r11, s16 eor r1, r11 @// g1 ^= g0 eor r1, r0 @// f1 ^= f21 vmov r10, s1 vmov r11, s21 eor r10, r11 vmov s1, r10 @// f21 ^= g9 vmov r10, s21 eor r10, r9 vmov s21, r10 @// g9 ^= g6 eor r9, r6 @// f31 ^= g3 vmov r10, s31 eor r10, r3 vmov s31, r10 @// g0 ^= f0 vmov r11, s0 eor r0, r11 @// g9 ^= f27 vmov r11, s27 eor r9, r11 @// f1 ^= g2 vmov r10, s1 eor r10, r2 vmov s1, r10 @// f8 ^= g3 vmov r10, s8 eor r10, r3 vmov s8, r10 @// g3 ^= g7 eor r3, r7 @// g7 ^= g6 eor r7, r6 @// f0 ^= g8 vmov r10, s0 eor r10, r8 vmov s0, r10 @// f27 ^= g8 vmov r10, s27 eor r10, r8 vmov s27, r10 @// g8 ^= f16 vmov r11, s16 eor r8, r11 @// f16 ^= g6 vmov r10, s16 eor r10, r6 vmov s16, r10 @// g0 ^= f8 vmov r11, s8 eor r0, r11 @// g0 ^= f2 vmov r11, s2 eor r0, r11 @// g7 ^= g2 eor r7, r2 @// g7 ^= f31 vmov r11, s31 eor r7, r11 @// g7 ^= g8 eor r7, r8 @// g6 ^= f26 vmov r11, s26 eor r6, r11 @// f27 ^= f26 vmov r10, s27 vmov r11, s26 eor r10, r11 vmov s27, r10 @// f16 ^= g1 vmov r10, s16 eor r10, r1 vmov s16, r10 @// g1 ^= g8 eor r1, r8 @// f8 ^= g4 vmov r10, s8 eor r10, r4 vmov s8, r10 @// g1 ^= g4 eor r1, r4 @// f8 ^= f3 vmov r10, s8 vmov r11, s3 eor r10, r11 vmov s8, r10 @// g4 ^= g0 eor r4, r0 @// g0 ^= f3 vmov r11, s3 eor r0, r11 @// g5 ^= g4 eor r5, r4 @// f20 ^= f10 vmov r10, s20 vmov r11, s10 eor r10, r11 vmov s20, r10 @// f20 ^= f15 vmov r10, s20 vmov r11, s15 eor r10, r11 vmov s20, r10 @// g9 ^= f20 vmov r11, s20 eor r9, r11 @// f3 ^= f31 vmov r10, s3 vmov r11, s31 eor r10, r11 vmov s3, r10 @// f3 ^= g6 vmov r10, s3 eor r10, r6 vmov s3, r10 @// f2 ^= f4 vmov r10, s2 vmov r11, s4 eor r10, r11 vmov s2, r10 @// g4 ^= f22 vmov r11, s22 eor r4, r11 @// f11 ^= g6 vmov r10, s11 eor r10, r6 vmov s11, r10 @// f25 ^= f10 vmov r10, s25 vmov r11, s10 eor r10, r11 vmov s25, r10 @// f10 ^= f24 vmov r10, s10 vmov r11, s24 eor r10, r11 vmov s10, r10 @// f7 ^= f15 vmov r10, s7 vmov r11, s15 eor r10, r11 vmov s7, r10 @// f15 ^= g2 vmov r10, s15 eor r10, r2 vmov s15, r10 @// g2 ^= f26 vmov r11, s26 eor r2, r11 @// g3 ^= f0 vmov r11, s0 eor r3, r11 @// f15 ^= f2 vmov r10, s15 vmov r11, s2 eor r10, r11 vmov s15, r10 @// f4 ^= g5 vmov r10, s4 eor r10, r5 vmov s4, r10 @// g5 ^= f18 vmov r11, s18 eor r5, r11 @// g6 ^= f10 vmov r11, s10 eor r6, r11 @// f10 ^= f29 vmov r10, s10 vmov r11, s29 eor r10, r11 vmov s10, r10 @// g0 ^= f0 vmov r11, s0 eor r0, r11 @// g2 ^= f18 vmov r11, s18 eor r2, r11 @// f24 ^= f26 vmov r10, s24 vmov r11, s26 eor r10, r11 vmov s24, r10 @// g3 ^= f11 vmov r11, s11 eor r3, r11 @// f8 ^= f7 vmov r10, s8 vmov r11, s7 eor r10, r11 vmov s8, r10 @// g7 ^= g9 eor r7, r9 @// f15 ^= f10 vmov r10, s15 vmov r11, s10 eor r10, r11 vmov s15, r10 @// g4 ^= f1 vmov r11, s1 eor r4, r11 @// f31 ^= g8 vmov r10, s31 eor r10, r8 vmov s31, r10 @// f7 ^= f0 vmov r10, s7 vmov r11, s0 eor r10, r11 vmov s7, r10 @// g1 ^= f3 vmov r11, s3 eor r1, r11 @// f29 ^= f20 vmov r10, s29 vmov r11, s20 eor r10, r11 vmov s29, r10 @// g8 ^= g9 eor r8, r9 @// f0 ^= f2 vmov r10, s0 vmov r11, s2 eor r10, r11 vmov s0, r10 @// f1 ^= g1 vmov r10, s1 eor r10, r1 vmov s1, r10 @// g2 ^= g3 eor r2, r3 @// f22 ^= g8 vmov r10, s22 eor r10, r8 vmov s22, r10 @// g5 ^= g6 eor r5, r6 vstr.32 s0, [r14, #0] vstr.32 s1, [r14, #4] vstr.32 s2, [r14, #8] vstr.32 s3, [r14, #12] vstr.32 s4, [r14, #16] str r0, [r14, #20] str r1, [r14, #24] vstr.32 s7, [r14, #28] vstr.32 s8, [r14, #32] str r2, [r14, #36] vstr.32 s10, [r14, #40] vstr.32 s11, [r14, #44] str r3, [r14, #48] str r4, [r14, #52] str r5, [r14, #56] vstr.32 s15, [r14, #60] vstr.32 s16, [r14, #64] str r6, [r14, #68] vstr.32 s18, [r14, #72] vstr.32 s19, [r14, #76] vstr.32 s20, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] str r7, [r14, #92] vstr.32 s24, [r14, #96] vstr.32 s25, [r14, #100] vstr.32 s26, [r14, #104] vstr.32 s27, [r14, #108] str r8, [r14, #112] vstr.32 s29, [r14, #116] str r9, [r14, #120] vstr.32 s31, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v17, .-gft_mul_v17 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v18 .type gft_mul_v18, %function .align 2 gft_mul_v18: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{0, 1, 3, 4, 5, 9, 11, 24, 25, 26, 27, 28, 29, 30, 31}, {0, 2, 3, 4, 8, 9, 10, 11, 24, 26, 28, 30}, {0, 3, 6, 7, 8, 10, 11, 25, 26, 29, 30}, {1, 2, 3, 6, 9, 10, 24, 25, 27, 28, 29, 31}, {0, 3, 5, 13, 15, 27, 28, 29, 30}, {1, 2, 3, 4, 5, 12, 13, 14, 15, 26, 27, 28, 31}, {0, 7, 12, 14, 15, 24, 26, 28, 29}, {1, 6, 7, 13, 14, 25, 27, 28}, {1, 6, 8, 11, 12, 13, 14, 23, 31}, {0, 1, 7, 9, 10, 11, 12, 15, 22, 23, 30, 31}, {3, 4, 5, 6, 7, 8, 12, 13, 20, 22, 28, 30}, {2, 3, 4, 6, 9, 12, 21, 23, 29, 31}, {1, 8, 9, 11, 13, 16, 17, 20, 21, 24, 25, 28, 29}, {0, 1, 8, 10, 11, 12, 13, 16, 20, 24, 28}, {3, 8, 11, 15, 18, 19, 22, 23, 26, 27, 30, 31}, {2, 3, 9, 10, 11, 14, 15, 18, 22, 26, 30}, {0, 3, 17, 20, 21, 25, 27}, {1, 2, 3, 16, 17, 20, 24, 25, 26, 27}, {0, 19, 22, 23, 24, 26, 27}, {1, 18, 19, 22, 25, 26}, {4, 7, 16, 19, 20, 21, 23, 29, 31}, {5, 6, 7, 17, 18, 19, 20, 22, 23, 28, 29, 30, 31}, {4, 16, 20, 23, 28, 30, 31}, {5, 17, 21, 22, 23, 29, 30}, {8, 11, 17, 22, 28, 29, 30}, {9, 10, 11, 16, 17, 23, 28, 31}, {8, 19, 20, 21, 22, 23, 28, 29}, {9, 18, 19, 20, 22, 28}, {12, 15, 17, 24, 25, 27, 28, 29, 31}, {13, 14, 15, 16, 17, 24, 26, 27, 28, 30, 31}, {12, 19, 24, 27, 28, 31}, {13, 18, 19, 25, 26, 27, 29, 30, 31}] @ i = 0 @ better circuit found: #XORs = 227 @ better circuit found: #XORs = 226 @ better circuit found: #XORs = 215 @ i = 50 @ i = 100 @ better circuit found: #XORs = 212 @ i = 150 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ better circuit found: #XORs = 208 @ i = 450 @ i = 500 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{2, 19, 20, 5, 24}, {3, 5, 6, 8, 9, 24, 28}, {5, 8, 17, 22, 23, 30}, {2, 3, 10}, {0, 3, 5, 13, 27, 29}, {1, 2, 4, 7, 24, 31}, {4, 6, 7, 10, 12, 26, 29}, {1, 6, 7, 13, 14, 25, 27, 28}, {1, 6, 8, 11, 12, 13, 14, 23, 31}, {13, 14, 16, 17, 26, 31}, {24, 27}, {7, 18, 21, 26, 31}, {19, 5, 9, 12, 29}, {0, 4, 6, 10, 14, 24, 30}, {17, 13, 3, 23, 25}, {19, 20, 26, 11, 14}, {0, 13, 20, 21, 23, 27}, {0, 1, 2, 16, 21, 24, 26}, {0, 20, 26, 28}, {0, 1, 9, 25}, {7, 8, 15, 19}, {6, 18, 20, 29, 30, 31}, {4, 15, 16, 20, 23, 31}, {21, 8, 29}, {11, 15, 17, 21, 22}, {4, 9, 10, 11, 17, 20, 30}, {9, 18, 23}, {19, 20, 22, 23, 28}, {17, 19, 25, 15, 29}, {15, 28, 30}, {19, 12, 28, 31}, {1, 13, 22, 27, 29, 30, 31}] @[1, 4, 5, 6, 7, 8, 13, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 30, 31, 0, 2, 3, 9, 10, 11, 12, 14, 15, 29] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 556) mov r14, r0 mov r12, r1 ldr r0, [r12, #8] ldr r1, [r12, #76] ldr r2, [r12, #80] ldr r3, [r12, #20] ldr r4, [r12, #96] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s0, r0 ldr r0, [r12, #12] ldr r1, [r12, #20] ldr r2, [r12, #24] ldr r3, [r12, #32] ldr r4, [r12, #36] ldr r5, [r12, #96] ldr r6, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s1, r0 ldr r0, [r12, #20] ldr r1, [r12, #32] ldr r2, [r12, #68] ldr r3, [r12, #88] ldr r4, [r12, #92] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s2, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #40] eor r0, r1 eor r0, r2 vmov s3, r0 ldr r0, [r12, #0] ldr r1, [r12, #12] ldr r2, [r12, #20] ldr r3, [r12, #52] ldr r4, [r12, #108] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s4, r0 ldr r0, [r12, #4] ldr r1, [r12, #8] ldr r2, [r12, #16] ldr r3, [r12, #28] ldr r4, [r12, #96] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s5, r0 ldr r0, [r12, #16] ldr r1, [r12, #24] ldr r2, [r12, #28] ldr r3, [r12, #40] ldr r4, [r12, #48] ldr r5, [r12, #104] ldr r6, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s6, r0 ldr r0, [r12, #4] ldr r1, [r12, #24] ldr r2, [r12, #28] ldr r3, [r12, #52] ldr r4, [r12, #56] ldr r5, [r12, #100] ldr r6, [r12, #108] ldr r7, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s7, r0 ldr r0, [r12, #4] ldr r1, [r12, #24] ldr r2, [r12, #32] ldr r3, [r12, #44] ldr r4, [r12, #48] ldr r5, [r12, #52] ldr r6, [r12, #56] ldr r7, [r12, #92] ldr r8, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 eor r0, r8 vmov s8, r0 ldr r0, [r12, #52] ldr r1, [r12, #56] ldr r2, [r12, #64] ldr r3, [r12, #68] ldr r4, [r12, #104] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s9, r0 ldr r0, [r12, #96] ldr r1, [r12, #108] eor r0, r1 vmov s10, r0 ldr r0, [r12, #28] ldr r1, [r12, #72] ldr r2, [r12, #84] ldr r3, [r12, #104] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s11, r0 ldr r0, [r12, #76] ldr r1, [r12, #20] ldr r2, [r12, #36] ldr r3, [r12, #48] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s12, r0 ldr r0, [r12, #0] ldr r1, [r12, #16] ldr r2, [r12, #24] ldr r3, [r12, #40] ldr r4, [r12, #56] ldr r5, [r12, #96] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s13, r0 ldr r0, [r12, #68] ldr r1, [r12, #52] ldr r2, [r12, #12] ldr r3, [r12, #92] ldr r4, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s14, r0 ldr r0, [r12, #76] ldr r1, [r12, #80] ldr r2, [r12, #104] ldr r3, [r12, #44] ldr r4, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s15, r0 ldr r0, [r12, #0] ldr r1, [r12, #52] ldr r2, [r12, #80] ldr r3, [r12, #84] ldr r4, [r12, #92] ldr r5, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s16, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #8] ldr r3, [r12, #64] ldr r4, [r12, #84] ldr r5, [r12, #96] ldr r6, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s17, r0 ldr r0, [r12, #0] ldr r1, [r12, #80] ldr r2, [r12, #104] ldr r3, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 vmov s18, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #36] ldr r3, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 vmov s19, r0 ldr r0, [r12, #28] ldr r1, [r12, #32] ldr r2, [r12, #60] ldr r3, [r12, #76] eor r0, r1 eor r0, r2 eor r0, r3 vmov s20, r0 ldr r0, [r12, #24] ldr r1, [r12, #72] ldr r2, [r12, #80] ldr r3, [r12, #116] ldr r4, [r12, #120] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s21, r0 ldr r0, [r12, #16] ldr r1, [r12, #60] ldr r2, [r12, #64] ldr r3, [r12, #80] ldr r4, [r12, #92] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s22, r0 ldr r0, [r12, #84] ldr r1, [r12, #32] ldr r2, [r12, #116] eor r0, r1 eor r0, r2 vmov s23, r0 ldr r0, [r12, #44] ldr r1, [r12, #60] ldr r2, [r12, #68] ldr r3, [r12, #84] ldr r4, [r12, #88] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s24, r0 ldr r0, [r12, #16] ldr r1, [r12, #36] ldr r2, [r12, #40] ldr r3, [r12, #44] ldr r4, [r12, #68] ldr r5, [r12, #80] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s25, r0 ldr r0, [r12, #36] ldr r1, [r12, #72] ldr r2, [r12, #92] eor r0, r1 eor r0, r2 vmov s26, r0 ldr r0, [r12, #76] ldr r1, [r12, #80] ldr r2, [r12, #88] ldr r3, [r12, #92] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s27, r0 ldr r0, [r12, #68] ldr r1, [r12, #76] ldr r2, [r12, #100] ldr r3, [r12, #60] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s28, r0 ldr r0, [r12, #60] ldr r1, [r12, #112] ldr r2, [r12, #120] eor r0, r1 eor r0, r2 vmov s29, r0 ldr r0, [r12, #76] ldr r1, [r12, #48] ldr r2, [r12, #112] ldr r3, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 vmov s30, r0 ldr r0, [r12, #4] ldr r1, [r12, #52] ldr r2, [r12, #88] ldr r3, [r12, #108] ldr r4, [r12, #116] ldr r5, [r12, #120] ldr r6, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s31, r0 vmov r0, s0; vmov r1, s2; vmov r2, s3; vmov r3, s9; vmov r4, s10; vmov r5, s11; vmov r6, s12; vmov r7, s14; vmov r8, s15; vmov r9, s29; @// g5 ^= f23 vmov r11, s23 eor r5, r11 @// f19 ^= f26 vmov r10, s19 vmov r11, s26 eor r10, r11 vmov s19, r10 @// f18 ^= f27 vmov r10, s18 vmov r11, s27 eor r10, r11 vmov s18, r10 @// f27 ^= f26 vmov r10, s27 vmov r11, s26 eor r10, r11 vmov s27, r10 @// f24 ^= f23 vmov r10, s24 vmov r11, s23 eor r10, r11 vmov s24, r10 @// f20 ^= f23 vmov r10, s20 vmov r11, s23 eor r10, r11 vmov s20, r10 @// g5 ^= f6 vmov r11, s6 eor r5, r11 @// g6 ^= g0 eor r6, r0 @// f26 ^= f23 vmov r10, s26 vmov r11, s23 eor r10, r11 vmov s26, r10 @// f23 ^= g1 vmov r10, s23 eor r10, r1 vmov s23, r10 @// g1 ^= g7 eor r1, r7 @// g5 ^= g2 eor r5, r2 @// f20 ^= g9 vmov r10, s20 eor r10, r9 vmov s20, r10 @// g0 ^= g8 eor r0, r8 @// f22 ^= g9 vmov r10, s22 eor r10, r9 vmov s22, r10 @// f16 ^= g7 vmov r10, s16 eor r10, r7 vmov s16, r10 @// g3 ^= g8 eor r3, r8 @// g8 ^= f18 vmov r11, s18 eor r8, r11 @// f19 ^= f18 vmov r10, s19 vmov r11, s18 eor r10, r11 vmov s19, r10 @// f18 ^= g4 vmov r10, s18 eor r10, r4 vmov s18, r10 @// f30 ^= g4 vmov r10, s30 eor r10, r4 vmov s30, r10 @// g2 ^= g4 eor r2, r4 @// g4 ^= f5 vmov r11, s5 eor r4, r11 @// g4 ^= f31 vmov r11, s31 eor r4, r11 @// f6 ^= f13 vmov r10, s6 vmov r11, s13 eor r10, r11 vmov s6, r10 @// g1 ^= g4 eor r1, r4 @// g4 ^= g6 eor r4, r6 @// g6 ^= g2 eor r6, r2 @// g0 ^= f13 vmov r11, s13 eor r0, r11 @// f13 ^= f22 vmov r10, s13 vmov r11, s22 eor r10, r11 vmov s13, r10 @// f13 ^= f8 vmov r10, s13 vmov r11, s8 eor r10, r11 vmov s13, r10 @// g7 ^= f28 vmov r11, s28 eor r7, r11 @// f25 ^= f22 vmov r10, s25 vmov r11, s22 eor r10, r11 vmov s25, r10 @// g8 ^= f18 vmov r11, s18 eor r8, r11 @// g3 ^= g8 eor r3, r8 @// g6 ^= f16 vmov r11, s16 eor r6, r11 @// g8 ^= g2 eor r8, r2 @// g2 ^= f5 vmov r11, s5 eor r2, r11 @// f5 ^= f6 vmov r10, s5 vmov r11, s6 eor r10, r11 vmov s5, r10 @// f6 ^= g9 vmov r10, s6 eor r10, r9 vmov s6, r10 @// g7 ^= f24 vmov r11, s24 eor r7, r11 @// f24 ^= g9 vmov r10, s24 eor r10, r9 vmov s24, r10 @// g8 ^= f27 vmov r11, s27 eor r8, r11 @// g8 ^= g9 eor r8, r9 @// g7 ^= f28 vmov r11, s28 eor r7, r11 @// f4 ^= g9 vmov r10, s4 eor r10, r9 vmov s4, r10 @// g9 ^= f30 vmov r11, s30 eor r9, r11 @// g9 ^= g3 eor r9, r3 @// g3 ^= f31 vmov r11, s31 eor r3, r11 @// g3 ^= f6 vmov r11, s6 eor r3, r11 @// g3 ^= f25 vmov r11, s25 eor r3, r11 @// g5 ^= f26 vmov r11, s26 eor r5, r11 @// g2 ^= g1 eor r2, r1 @// f26 ^= f27 vmov r10, s26 vmov r11, s27 eor r10, r11 vmov s26, r10 @// f21 ^= f20 vmov r10, s21 vmov r11, s20 eor r10, r11 vmov s21, r10 @// g9 ^= f30 vmov r11, s30 eor r9, r11 @// f31 ^= f19 vmov r10, s31 vmov r11, s19 eor r10, r11 vmov s31, r10 @// g7 ^= f31 vmov r11, s31 eor r7, r11 @// f20 ^= f22 vmov r10, s20 vmov r11, s22 eor r10, r11 vmov s20, r10 @// g2 ^= f1 vmov r11, s1 eor r2, r11 @// g4 ^= f1 vmov r11, s1 eor r4, r11 @// f17 ^= f16 vmov r10, s17 vmov r11, s16 eor r10, r11 vmov s17, r10 @// f5 ^= f4 vmov r10, s5 vmov r11, s4 eor r10, r11 vmov s5, r10 @// g1 ^= g0 eor r1, r0 @// f28 ^= f30 vmov r10, s28 vmov r11, s30 eor r10, r11 vmov s28, r10 @// f1 ^= g0 vmov r10, s1 eor r10, r0 vmov s1, r10 @// f21 ^= f23 vmov r10, s21 vmov r11, s23 eor r10, r11 vmov s21, r10 @// g6 ^= f13 vmov r11, s13 eor r6, r11 @// g0 ^= g2 eor r0, r2 str r0, [r14, #0] vstr.32 s1, [r14, #4] str r1, [r14, #8] str r2, [r14, #12] vstr.32 s4, [r14, #16] vstr.32 s5, [r14, #20] vstr.32 s6, [r14, #24] vstr.32 s7, [r14, #28] vstr.32 s8, [r14, #32] str r3, [r14, #36] str r4, [r14, #40] str r5, [r14, #44] str r6, [r14, #48] vstr.32 s13, [r14, #52] str r7, [r14, #56] str r8, [r14, #60] vstr.32 s16, [r14, #64] vstr.32 s17, [r14, #68] vstr.32 s18, [r14, #72] vstr.32 s19, [r14, #76] vstr.32 s20, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] vstr.32 s23, [r14, #92] vstr.32 s24, [r14, #96] vstr.32 s25, [r14, #100] vstr.32 s26, [r14, #104] vstr.32 s27, [r14, #108] vstr.32 s28, [r14, #112] str r9, [r14, #116] vstr.32 s30, [r14, #120] vstr.32 s31, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v18, .-gft_mul_v18 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v19 .type gft_mul_v19, %function .align 2 gft_mul_v19: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{4, 5, 6, 8, 11, 14, 15, 27, 31}, {4, 7, 9, 10, 11, 14, 26, 27, 30, 31}, {4, 5, 8, 13, 15, 24, 26, 28, 30}, {4, 9, 12, 13, 14, 15, 25, 27, 29, 31}, {0, 1, 3, 4, 5, 7, 8, 15, 24, 25, 28, 29, 31}, {0, 2, 3, 4, 6, 7, 9, 14, 15, 24, 28, 30, 31}, {0, 3, 4, 7, 10, 12, 14, 26, 27, 28, 31}, {1, 2, 3, 5, 6, 7, 11, 13, 15, 26, 29, 30, 31}, {0, 4, 6, 7, 8, 13, 15, 20, 21, 28, 29}, {1, 5, 6, 9, 12, 13, 14, 15, 20, 28}, {2, 5, 6, 7, 10, 12, 14, 15, 22, 23, 30, 31}, {3, 4, 5, 6, 11, 13, 14, 22, 30}, {0, 1, 2, 3, 5, 6, 7, 10, 12, 14, 16, 19, 20, 23, 24, 27, 28, 31}, {0, 2, 4, 5, 6, 11, 13, 15, 17, 18, 19, 21, 22, 23, 25, 26, 27, 29, 30, 31}, {1, 2, 5, 8, 9, 10, 11, 12, 13, 15, 16, 20, 24, 28}, {0, 1, 3, 4, 5, 8, 10, 12, 14, 15, 17, 21, 25, 29}, {0, 2, 3, 16, 18, 19, 20, 21, 22, 24, 27, 30, 31}, {1, 2, 17, 18, 20, 23, 25, 26, 27, 30}, {1, 2, 3, 17, 18, 19, 20, 21, 24, 29, 31}, {0, 1, 2, 16, 17, 18, 20, 25, 28, 29, 30, 31}, {4, 6, 7, 16, 17, 19, 21, 22, 24, 31}, {5, 6, 16, 18, 19, 20, 21, 23, 25, 30, 31}, {5, 6, 7, 16, 19, 20, 21, 22, 26, 28, 30}, {4, 5, 6, 17, 18, 19, 20, 23, 27, 29, 31}, {8, 10, 11, 16, 20, 22, 23, 26, 27, 29, 31}, {9, 10, 17, 21, 22, 26, 28, 29, 30, 31}, {9, 10, 11, 18, 21, 22, 23, 25, 27, 28, 30, 31}, {8, 9, 10, 19, 20, 21, 22, 24, 25, 26, 27, 29, 30}, {12, 14, 15, 16, 17, 18, 19, 21, 22, 23, 26, 31}, {13, 14, 16, 18, 20, 21, 22, 27, 30, 31}, {13, 14, 15, 17, 18, 21, 24, 25, 26, 27, 28, 30}, {12, 13, 14, 16, 17, 19, 20, 21, 24, 26, 29, 31}] @ i = 0 @ better circuit found: #XORs = 252 @ better circuit found: #XORs = 247 @ better circuit found: #XORs = 244 @ better circuit found: #XORs = 235 @ i = 50 @ better circuit found: #XORs = 234 @ i = 100 @ better circuit found: #XORs = 233 @ i = 150 @ i = 200 @ i = 250 @ better circuit found: #XORs = 231 @ better circuit found: #XORs = 230 @ i = 300 @ i = 350 @ i = 400 @ i = 450 @ i = 500 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ better circuit found: #XORs = 228 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{2, 20, 25, 26, 27, 15}, {18, 5, 22, 8, 27}, {4, 18, 19, 22, 25, 26, 30}, {12, 13, 14, 20, 29}, {6, 8, 9, 10, 20, 22, 25}, {18, 2, 7, 8, 30}, {3, 10, 11, 12, 28}, {3, 5, 6, 8, 21}, {3, 4, 6, 10, 13, 30}, {1, 5, 6, 9, 15, 28, 29}, {5, 10, 11, 14, 22}, {23, 4, 14, 24, 30}, {1, 2, 8, 11, 15, 25}, {1, 13, 15, 18, 21, 22, 23}, {2, 6, 16, 18, 24, 27}, {2, 19, 14, 8}, {29, 2, 19, 12, 30}, {1, 19, 20, 11, 12}, {7, 16, 19, 24, 31}, {16, 17, 21, 22, 23, 27}, {0, 7, 22, 28, 29, 31}, {0, 18, 23, 25, 26, 29}, {8, 13, 15, 17, 20, 31}, {2, 4, 9, 26, 31}, {0, 3, 20, 24, 30}, {0, 9, 10, 30}, {0, 11, 17, 27}, {13, 15, 19, 24, 25, 27, 28}, {8, 12, 22, 24, 30}, {19, 23, 7, 8, 15}, {17, 21, 7, 26}, {3, 11, 16, 18, 26, 30}] @[0, 2, 3, 4, 6, 7, 9, 10, 11, 12, 13, 14, 16, 19, 20, 21, 22, 25, 26, 27, 28, 30, 1, 5, 8, 15, 17, 18, 23, 24, 29, 31] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 627) mov r14, r0 mov r12, r1 ldr r0, [r12, #8] ldr r1, [r12, #80] ldr r2, [r12, #100] ldr r3, [r12, #104] ldr r4, [r12, #108] ldr r5, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s0, r0 ldr r0, [r12, #72] ldr r1, [r12, #20] ldr r2, [r12, #88] ldr r3, [r12, #32] ldr r4, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s1, r0 ldr r0, [r12, #16] ldr r1, [r12, #72] ldr r2, [r12, #76] ldr r3, [r12, #88] ldr r4, [r12, #100] ldr r5, [r12, #104] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s2, r0 ldr r0, [r12, #48] ldr r1, [r12, #52] ldr r2, [r12, #56] ldr r3, [r12, #80] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s3, r0 ldr r0, [r12, #24] ldr r1, [r12, #32] ldr r2, [r12, #36] ldr r3, [r12, #40] ldr r4, [r12, #80] ldr r5, [r12, #88] ldr r6, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s4, r0 ldr r0, [r12, #72] ldr r1, [r12, #8] ldr r2, [r12, #28] ldr r3, [r12, #32] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s5, r0 ldr r0, [r12, #12] ldr r1, [r12, #40] ldr r2, [r12, #44] ldr r3, [r12, #48] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s6, r0 ldr r0, [r12, #12] ldr r1, [r12, #20] ldr r2, [r12, #24] ldr r3, [r12, #32] ldr r4, [r12, #84] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s7, r0 ldr r0, [r12, #12] ldr r1, [r12, #16] ldr r2, [r12, #24] ldr r3, [r12, #40] ldr r4, [r12, #52] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s8, r0 ldr r0, [r12, #4] ldr r1, [r12, #20] ldr r2, [r12, #24] ldr r3, [r12, #36] ldr r4, [r12, #60] ldr r5, [r12, #112] ldr r6, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s9, r0 ldr r0, [r12, #20] ldr r1, [r12, #40] ldr r2, [r12, #44] ldr r3, [r12, #56] ldr r4, [r12, #88] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s10, r0 ldr r0, [r12, #92] ldr r1, [r12, #16] ldr r2, [r12, #56] ldr r3, [r12, #96] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s11, r0 ldr r0, [r12, #4] ldr r1, [r12, #8] ldr r2, [r12, #32] ldr r3, [r12, #44] ldr r4, [r12, #60] ldr r5, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s12, r0 ldr r0, [r12, #4] ldr r1, [r12, #52] ldr r2, [r12, #60] ldr r3, [r12, #72] ldr r4, [r12, #84] ldr r5, [r12, #88] ldr r6, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s13, r0 ldr r0, [r12, #8] ldr r1, [r12, #24] ldr r2, [r12, #64] ldr r3, [r12, #72] ldr r4, [r12, #96] ldr r5, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s14, r0 ldr r0, [r12, #8] ldr r1, [r12, #76] ldr r2, [r12, #56] ldr r3, [r12, #32] eor r0, r1 eor r0, r2 eor r0, r3 vmov s15, r0 ldr r0, [r12, #116] ldr r1, [r12, #8] ldr r2, [r12, #76] ldr r3, [r12, #48] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s16, r0 ldr r0, [r12, #4] ldr r1, [r12, #76] ldr r2, [r12, #80] ldr r3, [r12, #44] ldr r4, [r12, #48] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s17, r0 ldr r0, [r12, #28] ldr r1, [r12, #64] ldr r2, [r12, #76] ldr r3, [r12, #96] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s18, r0 ldr r0, [r12, #64] ldr r1, [r12, #68] ldr r2, [r12, #84] ldr r3, [r12, #88] ldr r4, [r12, #92] ldr r5, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s19, r0 ldr r0, [r12, #0] ldr r1, [r12, #28] ldr r2, [r12, #88] ldr r3, [r12, #112] ldr r4, [r12, #116] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s20, r0 ldr r0, [r12, #0] ldr r1, [r12, #72] ldr r2, [r12, #92] ldr r3, [r12, #100] ldr r4, [r12, #104] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s21, r0 ldr r0, [r12, #32] ldr r1, [r12, #52] ldr r2, [r12, #60] ldr r3, [r12, #68] ldr r4, [r12, #80] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s22, r0 ldr r0, [r12, #8] ldr r1, [r12, #16] ldr r2, [r12, #36] ldr r3, [r12, #104] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s23, r0 ldr r0, [r12, #0] ldr r1, [r12, #12] ldr r2, [r12, #80] ldr r3, [r12, #96] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s24, r0 ldr r0, [r12, #0] ldr r1, [r12, #36] ldr r2, [r12, #40] ldr r3, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 vmov s25, r0 ldr r0, [r12, #0] ldr r1, [r12, #44] ldr r2, [r12, #68] ldr r3, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 vmov s26, r0 ldr r0, [r12, #52] ldr r1, [r12, #60] ldr r2, [r12, #76] ldr r3, [r12, #96] ldr r4, [r12, #100] ldr r5, [r12, #108] ldr r6, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s27, r0 ldr r0, [r12, #32] ldr r1, [r12, #48] ldr r2, [r12, #88] ldr r3, [r12, #96] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s28, r0 ldr r0, [r12, #76] ldr r1, [r12, #92] ldr r2, [r12, #28] ldr r3, [r12, #32] ldr r4, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s29, r0 ldr r0, [r12, #68] ldr r1, [r12, #84] ldr r2, [r12, #28] ldr r3, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 vmov s30, r0 ldr r0, [r12, #12] ldr r1, [r12, #44] ldr r2, [r12, #64] ldr r3, [r12, #72] ldr r4, [r12, #104] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s31, r0 vmov r0, s1; vmov r1, s5; vmov r2, s8; vmov r3, s15; vmov r4, s17; vmov r5, s18; vmov r6, s23; vmov r7, s24; vmov r8, s29; vmov r9, s31; @// g4 ^= f16 vmov r11, s16 eor r4, r11 @// f0 ^= g6 vmov r10, s0 eor r10, r6 vmov s0, r10 @// f13 ^= f21 vmov r10, s13 vmov r11, s21 eor r10, r11 vmov s13, r10 @// f26 ^= f21 vmov r10, s26 vmov r11, s21 eor r10, r11 vmov s26, r10 @// f21 ^= f20 vmov r10, s21 vmov r11, s20 eor r10, r11 vmov s21, r10 @// f20 ^= g5 vmov r10, s20 eor r10, r5 vmov s20, r10 @// f2 ^= g0 vmov r10, s2 eor r10, r0 vmov s2, r10 @// g6 ^= g3 eor r6, r3 @// g3 ^= g1 eor r3, r1 @// g1 ^= g0 eor r1, r0 @// g0 ^= f10 vmov r11, s10 eor r0, r11 @// f12 ^= g8 vmov r10, s12 eor r10, r8 vmov s12, r10 @// f12 ^= g5 vmov r10, s12 eor r10, r5 vmov s12, r10 @// f4 ^= f10 vmov r10, s4 vmov r11, s10 eor r10, r11 vmov s4, r10 @// g1 ^= f28 vmov r11, s28 eor r1, r11 @// f16 ^= f3 vmov r10, s16 vmov r11, s3 eor r10, r11 vmov s16, r10 @// f10 ^= f11 vmov r10, s10 vmov r11, s11 eor r10, r11 vmov s10, r10 @// f11 ^= g7 vmov r10, s11 eor r10, r7 vmov s11, r10 @// f16 ^= g7 vmov r10, s16 eor r10, r7 vmov s16, r10 @// f14 ^= g0 vmov r10, s14 eor r10, r0 vmov s14, r10 @// g0 ^= g3 eor r0, r3 @// g8 ^= g3 eor r8, r3 @// f7 ^= f30 vmov r10, s7 vmov r11, s30 eor r10, r11 vmov s7, r10 @// g5 ^= f30 vmov r11, s30 eor r5, r11 @// f30 ^= g3 vmov r10, s30 eor r10, r3 vmov s30, r10 @// f30 ^= f2 vmov r10, s30 vmov r11, s2 eor r10, r11 vmov s30, r10 @// f2 ^= f27 vmov r10, s2 vmov r11, s27 eor r10, r11 vmov s2, r10 @// g3 ^= g9 eor r3, r9 @// g3 ^= f6 vmov r11, s6 eor r3, r11 @// g2 ^= g7 eor r2, r7 @// g7 ^= f28 vmov r11, s28 eor r7, r11 @// f28 ^= g8 vmov r10, s28 eor r10, r8 vmov s28, r10 @// f28 ^= f3 vmov r10, s28 vmov r11, s3 eor r10, r11 vmov s28, r10 @// f11 ^= g2 vmov r10, s11 eor r10, r2 vmov s11, r10 @// g7 ^= f6 vmov r11, s6 eor r7, r11 @// g2 ^= f3 vmov r11, s3 eor r2, r11 @// g8 ^= f22 vmov r11, s22 eor r8, r11 @// g8 ^= f19 vmov r11, s19 eor r8, r11 @// f19 ^= g5 vmov r10, s19 eor r10, r5 vmov s19, r10 @// g3 ^= g5 eor r3, r5 @// f6 ^= f25 vmov r10, s6 vmov r11, s25 eor r10, r11 vmov s6, r10 @// f25 ^= f20 vmov r10, s25 vmov r11, s20 eor r10, r11 vmov s25, r10 @// f25 ^= g5 vmov r10, s25 eor r10, r5 vmov s25, r10 @// f19 ^= f20 vmov r10, s19 vmov r11, s20 eor r10, r11 vmov s19, r10 @// g2 ^= g3 eor r2, r3 @// g5 ^= g9 eor r5, r9 @// g9 ^= g4 eor r9, r4 @// g9 ^= f3 vmov r11, s3 eor r9, r11 @// f20 ^= g2 vmov r10, s20 eor r10, r2 vmov s20, r10 @// g2 ^= f22 vmov r11, s22 eor r2, r11 @// f27 ^= f22 vmov r10, s27 vmov r11, s22 eor r10, r11 vmov s27, r10 @// g0 ^= g6 eor r0, r6 @// g6 ^= f4 vmov r11, s4 eor r6, r11 @// f12 ^= g6 vmov r10, s12 eor r10, r6 vmov s12, r10 @// g6 ^= f26 vmov r11, s26 eor r6, r11 @// g5 ^= g4 eor r5, r4 @// f13 ^= g4 vmov r10, s13 eor r10, r4 vmov s13, r10 @// f9 ^= f3 vmov r10, s9 vmov r11, s3 eor r10, r11 vmov s9, r10 @// f3 ^= f0 vmov r10, s3 vmov r11, s0 eor r10, r11 vmov s3, r10 @// f0 ^= f4 vmov r10, s0 vmov r11, s4 eor r10, r11 vmov s0, r10 @// f4 ^= g4 vmov r10, s4 eor r10, r4 vmov s4, r10 @// f11 ^= f10 vmov r10, s11 vmov r11, s10 eor r10, r11 vmov s11, r10 @// f7 ^= f22 vmov r10, s7 vmov r11, s22 eor r10, r11 vmov s7, r10 @// f22 ^= f2 vmov r10, s22 vmov r11, s2 eor r10, r11 vmov s22, r10 @// f22 ^= f20 vmov r10, s22 vmov r11, s20 eor r10, r11 vmov s22, r10 @// g1 ^= f0 vmov r11, s0 eor r1, r11 @// f30 ^= f2 vmov r10, s30 vmov r11, s2 eor r10, r11 vmov s30, r10 @// g9 ^= g5 eor r9, r5 @// f21 ^= f22 vmov r10, s21 vmov r11, s22 eor r10, r11 vmov s21, r10 @// f10 ^= g1 vmov r10, s10 eor r10, r1 vmov s10, r10 @// g1 ^= f6 vmov r11, s6 eor r1, r11 @// f4 ^= g1 vmov r10, s4 eor r10, r1 vmov s4, r10 @// f7 ^= g4 vmov r10, s7 eor r10, r4 vmov s7, r10 @// g4 ^= f26 vmov r11, s26 eor r4, r11 @// f26 ^= f25 vmov r10, s26 vmov r11, s25 eor r10, r11 vmov s26, r10 @// f6 ^= g0 vmov r10, s6 eor r10, r0 vmov s6, r10 @// g7 ^= f19 vmov r11, s19 eor r7, r11 @// f27 ^= f25 vmov r10, s27 vmov r11, s25 eor r10, r11 vmov s27, r10 @// f28 ^= g9 vmov r10, s28 eor r10, r9 vmov s28, r10 @// f19 ^= g4 vmov r10, s19 eor r10, r4 vmov s19, r10 @// g3 ^= f4 vmov r11, s4 eor r3, r11 @// f14 ^= f9 vmov r10, s14 vmov r11, s9 eor r10, r11 vmov s14, r10 @// f16 ^= g8 vmov r10, s16 eor r10, r8 vmov s16, r10 @// f12 ^= f6 vmov r10, s12 vmov r11, s6 eor r10, r11 vmov s12, r10 @// f13 ^= g6 vmov r10, s13 eor r10, r6 vmov s13, r10 vstr.32 s0, [r14, #0] str r0, [r14, #4] vstr.32 s2, [r14, #8] vstr.32 s3, [r14, #12] vstr.32 s4, [r14, #16] str r1, [r14, #20] vstr.32 s6, [r14, #24] vstr.32 s7, [r14, #28] str r2, [r14, #32] vstr.32 s9, [r14, #36] vstr.32 s10, [r14, #40] vstr.32 s11, [r14, #44] vstr.32 s12, [r14, #48] vstr.32 s13, [r14, #52] vstr.32 s14, [r14, #56] str r3, [r14, #60] vstr.32 s16, [r14, #64] str r4, [r14, #68] str r5, [r14, #72] vstr.32 s19, [r14, #76] vstr.32 s20, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] str r6, [r14, #92] str r7, [r14, #96] vstr.32 s25, [r14, #100] vstr.32 s26, [r14, #104] vstr.32 s27, [r14, #108] vstr.32 s28, [r14, #112] str r8, [r14, #116] vstr.32 s30, [r14, #120] str r9, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v19, .-gft_mul_v19 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v20 .type gft_mul_v20, %function .align 2 gft_mul_v20: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{0, 1, 2, 6, 11, 13, 15, 24, 25, 27, 28, 30}, {0, 3, 7, 10, 11, 12, 13, 14, 15, 24, 26, 27, 29, 31}, {0, 1, 4, 5, 6, 7, 8, 10, 12, 14, 15, 24, 27, 28, 29, 31}, {0, 4, 6, 9, 11, 13, 14, 25, 26, 27, 28, 30, 31}, {1, 4, 6, 10, 14, 15, 26, 27, 28, 29, 30}, {0, 1, 5, 7, 11, 14, 26, 28, 31}, {3, 4, 5, 7, 8, 9, 10, 11, 13, 15, 25, 27, 28, 29}, {2, 3, 4, 6, 7, 8, 10, 12, 13, 14, 15, 24, 25, 26, 27, 28}, {0, 1, 3, 4, 5, 10, 11, 12, 13, 14, 16, 18, 21, 23, 24, 26, 29, 31}, {0, 2, 3, 4, 10, 12, 15, 17, 19, 20, 21, 22, 23, 25, 27, 28, 29, 30, 31}, {0, 3, 6, 7, 9, 11, 12, 13, 16, 17, 19, 20, 22, 23, 24, 25, 27, 28, 30, 31}, {1, 2, 3, 6, 8, 9, 10, 11, 12, 16, 18, 19, 21, 22, 24, 26, 27, 29, 30}, {0, 3, 5, 8, 9, 11, 12, 13, 14, 18, 20, 26, 28}, {1, 2, 3, 4, 5, 8, 10, 11, 12, 15, 19, 21, 27, 29}, {0, 7, 8, 11, 12, 13, 16, 17, 18, 19, 22, 24, 25, 26, 27, 30}, {1, 6, 7, 9, 10, 11, 12, 16, 18, 23, 24, 26, 31}, {0, 1, 6, 7, 18, 23, 27, 29, 31}, {0, 6, 19, 22, 23, 26, 27, 28, 29, 30, 31}, {2, 3, 5, 7, 16, 17, 18, 19, 20, 22, 24, 26, 28, 30, 31}, {2, 4, 5, 6, 7, 16, 18, 21, 23, 25, 27, 29, 30}, {0, 5, 16, 17, 20, 21, 22, 26, 30, 31}, {1, 4, 5, 16, 20, 23, 27, 30}, {2, 7, 18, 19, 20, 21, 24, 25, 26, 27, 29, 31}, {3, 6, 7, 18, 20, 24, 26, 28, 29, 30, 31}, {8, 9, 14, 15, 16, 17, 19, 20, 21, 24, 25, 26, 27, 28, 29, 31}, {8, 14, 16, 18, 19, 20, 24, 26, 28, 30, 31}, {10, 11, 13, 15, 16, 19, 22, 23, 25, 26, 28, 31}, {10, 12, 13, 14, 15, 17, 18, 19, 22, 24, 25, 27, 29, 30, 31}, {8, 13, 16, 19, 21, 25, 27, 28, 30}, {9, 12, 13, 17, 18, 19, 20, 21, 24, 25, 26, 27, 29, 31}, {10, 15, 16, 23, 24, 26, 27, 28, 29, 31}, {11, 14, 15, 17, 22, 23, 25, 26, 28, 30, 31}] @ i = 0 @ better circuit found: #XORs = 253 @ better circuit found: #XORs = 239 @ i = 50 @ i = 100 @ better circuit found: #XORs = 224 @ i = 150 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ i = 450 @ i = 500 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{0, 11, 18, 24, 28}, {3, 8, 10, 12, 21, 27}, {12, 19, 29, 31}, {26, 13, 31}, {13, 16, 25, 27, 28}, {1, 23, 7, 8, 9}, {1, 17, 19, 21, 25, 28}, {2, 4, 5, 11}, {7, 15, 18, 20, 21, 30}, {3, 11, 20, 23, 24, 31}, {0, 6, 8, 9, 26}, {16, 4, 24, 28, 15}, {3, 11, 14, 15}, {2, 4, 10, 14, 16, 31}, {4, 5, 8, 10, 30}, {9, 10, 12, 16, 28, 31}, {13, 18, 27, 29}, {4, 22, 24, 10, 11}, {2, 3, 5, 7, 8, 14, 17, 22}, {2, 6, 15, 24, 25}, {14, 17, 19, 21, 24, 30}, {14, 16, 19, 20, 23}, {6, 12, 26, 27, 30}, {1, 24, 29, 30}, {8, 11, 14, 19, 28, 31}, {9, 19, 22, 24, 31}, {10, 13, 16, 21, 24}, {9, 18, 22, 23, 27, 28}, {1, 8, 10, 14, 19, 27}, {2, 7, 9, 12, 13, 17}, {5, 6, 8, 19, 23}, {11, 15, 16, 27}] @[0, 1, 2, 4, 6, 8, 9, 11, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 28, 29, 30, 3, 5, 7, 10, 12, 13, 14, 17, 27, 31] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 619) mov r14, r0 mov r12, r1 ldr r0, [r12, #0] ldr r1, [r12, #44] ldr r2, [r12, #72] ldr r3, [r12, #96] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s0, r0 ldr r0, [r12, #12] ldr r1, [r12, #32] ldr r2, [r12, #40] ldr r3, [r12, #48] ldr r4, [r12, #84] ldr r5, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s1, r0 ldr r0, [r12, #48] ldr r1, [r12, #76] ldr r2, [r12, #116] ldr r3, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 vmov s2, r0 ldr r0, [r12, #104] ldr r1, [r12, #52] ldr r2, [r12, #124] eor r0, r1 eor r0, r2 vmov s3, r0 ldr r0, [r12, #52] ldr r1, [r12, #64] ldr r2, [r12, #100] ldr r3, [r12, #108] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s4, r0 ldr r0, [r12, #4] ldr r1, [r12, #92] ldr r2, [r12, #28] ldr r3, [r12, #32] ldr r4, [r12, #36] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s5, r0 ldr r0, [r12, #4] ldr r1, [r12, #68] ldr r2, [r12, #76] ldr r3, [r12, #84] ldr r4, [r12, #100] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s6, r0 ldr r0, [r12, #8] ldr r1, [r12, #16] ldr r2, [r12, #20] ldr r3, [r12, #44] eor r0, r1 eor r0, r2 eor r0, r3 vmov s7, r0 ldr r0, [r12, #28] ldr r1, [r12, #60] ldr r2, [r12, #72] ldr r3, [r12, #80] ldr r4, [r12, #84] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s8, r0 ldr r0, [r12, #12] ldr r1, [r12, #44] ldr r2, [r12, #80] ldr r3, [r12, #92] ldr r4, [r12, #96] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s9, r0 ldr r0, [r12, #0] ldr r1, [r12, #24] ldr r2, [r12, #32] ldr r3, [r12, #36] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s10, r0 ldr r0, [r12, #64] ldr r1, [r12, #16] ldr r2, [r12, #96] ldr r3, [r12, #112] ldr r4, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s11, r0 ldr r0, [r12, #12] ldr r1, [r12, #44] ldr r2, [r12, #56] ldr r3, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 vmov s12, r0 ldr r0, [r12, #8] ldr r1, [r12, #16] ldr r2, [r12, #40] ldr r3, [r12, #56] ldr r4, [r12, #64] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s13, r0 ldr r0, [r12, #16] ldr r1, [r12, #20] ldr r2, [r12, #32] ldr r3, [r12, #40] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s14, r0 ldr r0, [r12, #36] ldr r1, [r12, #40] ldr r2, [r12, #48] ldr r3, [r12, #64] ldr r4, [r12, #112] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s15, r0 ldr r0, [r12, #52] ldr r1, [r12, #72] ldr r2, [r12, #108] ldr r3, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 vmov s16, r0 ldr r0, [r12, #16] ldr r1, [r12, #88] ldr r2, [r12, #96] ldr r3, [r12, #40] ldr r4, [r12, #44] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s17, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #20] ldr r3, [r12, #28] ldr r4, [r12, #32] ldr r5, [r12, #56] ldr r6, [r12, #68] ldr r7, [r12, #88] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s18, r0 ldr r0, [r12, #8] ldr r1, [r12, #24] ldr r2, [r12, #60] ldr r3, [r12, #96] ldr r4, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s19, r0 ldr r0, [r12, #56] ldr r1, [r12, #68] ldr r2, [r12, #76] ldr r3, [r12, #84] ldr r4, [r12, #96] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s20, r0 ldr r0, [r12, #56] ldr r1, [r12, #64] ldr r2, [r12, #76] ldr r3, [r12, #80] ldr r4, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s21, r0 ldr r0, [r12, #24] ldr r1, [r12, #48] ldr r2, [r12, #104] ldr r3, [r12, #108] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s22, r0 ldr r0, [r12, #4] ldr r1, [r12, #96] ldr r2, [r12, #116] ldr r3, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 vmov s23, r0 ldr r0, [r12, #32] ldr r1, [r12, #44] ldr r2, [r12, #56] ldr r3, [r12, #76] ldr r4, [r12, #112] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s24, r0 ldr r0, [r12, #36] ldr r1, [r12, #76] ldr r2, [r12, #88] ldr r3, [r12, #96] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s25, r0 ldr r0, [r12, #40] ldr r1, [r12, #52] ldr r2, [r12, #64] ldr r3, [r12, #84] ldr r4, [r12, #96] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s26, r0 ldr r0, [r12, #36] ldr r1, [r12, #72] ldr r2, [r12, #88] ldr r3, [r12, #92] ldr r4, [r12, #108] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s27, r0 ldr r0, [r12, #4] ldr r1, [r12, #32] ldr r2, [r12, #40] ldr r3, [r12, #56] ldr r4, [r12, #76] ldr r5, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s28, r0 ldr r0, [r12, #8] ldr r1, [r12, #28] ldr r2, [r12, #36] ldr r3, [r12, #48] ldr r4, [r12, #52] ldr r5, [r12, #68] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s29, r0 ldr r0, [r12, #20] ldr r1, [r12, #24] ldr r2, [r12, #32] ldr r3, [r12, #76] ldr r4, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s30, r0 ldr r0, [r12, #44] ldr r1, [r12, #60] ldr r2, [r12, #64] ldr r3, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 vmov s31, r0 vmov r0, s3; vmov r1, s5; vmov r2, s7; vmov r3, s10; vmov r4, s12; vmov r5, s13; vmov r6, s14; vmov r7, s17; vmov r8, s27; vmov r9, s31; @// g8 ^= f4 vmov r11, s4 eor r8, r11 @// f1 ^= f26 vmov r10, s1 vmov r11, s26 eor r10, r11 vmov s1, r10 @// f4 ^= g0 vmov r10, s4 eor r10, r0 vmov s4, r10 @// f9 ^= f0 vmov r10, s9 vmov r11, s0 eor r10, r11 vmov s9, r10 @// f4 ^= g5 vmov r10, s4 eor r10, r5 vmov s4, r10 @// g5 ^= f26 vmov r11, s26 eor r5, r11 @// f26 ^= f20 vmov r10, s26 vmov r11, s20 eor r10, r11 vmov s26, r10 @// f20 ^= f25 vmov r10, s20 vmov r11, s25 eor r10, r11 vmov s20, r10 @// g7 ^= g6 eor r7, r6 @// g7 ^= f0 vmov r11, s0 eor r7, r11 @// f0 ^= f16 vmov r10, s0 vmov r11, s16 eor r10, r11 vmov s0, r10 @// g9 ^= f16 vmov r11, s16 eor r9, r11 @// f16 ^= g0 vmov r10, s16 eor r10, r0 vmov s16, r10 @// f15 ^= g0 vmov r10, s15 eor r10, r0 vmov s15, r10 @// g0 ^= g2 eor r0, r2 @// g0 ^= g6 eor r0, r6 @// g2 ^= g4 eor r2, r4 @// g2 ^= f11 vmov r11, s11 eor r2, r11 @// f20 ^= g4 vmov r10, s20 eor r10, r4 vmov s20, r10 @// f25 ^= f21 vmov r10, s25 vmov r11, s21 eor r10, r11 vmov s25, r10 @// g4 ^= f21 vmov r11, s21 eor r4, r11 @// f21 ^= f28 vmov r10, s21 vmov r11, s28 eor r10, r11 vmov s21, r10 @// f21 ^= g6 vmov r10, s21 eor r10, r6 vmov s21, r10 @// g2 ^= f1 vmov r11, s1 eor r2, r11 @// f25 ^= g1 vmov r10, s25 eor r10, r1 vmov s25, r10 @// g4 ^= f30 vmov r11, s30 eor r4, r11 @// g9 ^= f2 vmov r11, s2 eor r9, r11 @// f2 ^= f23 vmov r10, s2 vmov r11, s23 eor r10, r11 vmov s2, r10 @// f19 ^= f23 vmov r10, s19 vmov r11, s23 eor r10, r11 vmov s19, r10 @// g6 ^= f23 vmov r11, s23 eor r6, r11 @// f23 ^= g1 vmov r10, s23 eor r10, r1 vmov s23, r10 @// g1 ^= g3 eor r1, r3 @// g4 ^= g3 eor r4, r3 @// g6 ^= f11 vmov r11, s11 eor r6, r11 @// g2 ^= f0 vmov r11, s0 eor r2, r11 @// f20 ^= g4 vmov r10, s20 eor r10, r4 vmov s20, r10 @// f11 ^= g3 vmov r10, s11 eor r10, r3 vmov s11, r10 @// f11 ^= g7 vmov r10, s11 eor r10, r7 vmov s11, r10 @// g2 ^= g3 eor r2, r3 @// f16 ^= g1 vmov r10, s16 eor r10, r1 vmov s16, r10 @// g1 ^= f30 vmov r11, s30 eor r1, r11 @// g8 ^= f15 vmov r11, s15 eor r8, r11 @// g0 ^= g3 eor r0, r3 @// g7 ^= g1 eor r7, r1 @// f28 ^= f26 vmov r10, s28 vmov r11, s26 eor r10, r11 vmov s28, r10 @// g5 ^= f24 vmov r11, s24 eor r5, r11 @// f22 ^= f2 vmov r10, s22 vmov r11, s2 eor r10, r11 vmov s22, r10 @// g3 ^= f9 vmov r11, s9 eor r3, r11 @// f9 ^= f24 vmov r10, s9 vmov r11, s24 eor r10, r11 vmov s9, r10 @// f2 ^= g1 vmov r10, s2 eor r10, r1 vmov s2, r10 @// g1 ^= f24 vmov r11, s24 eor r1, r11 @// f24 ^= g9 vmov r10, s24 eor r10, r9 vmov s24, r10 @// g9 ^= f26 vmov r11, s26 eor r9, r11 @// f9 ^= g5 vmov r10, s9 eor r10, r5 vmov s9, r10 @// g5 ^= f1 vmov r11, s1 eor r5, r11 @// f30 ^= f22 vmov r10, s30 vmov r11, s22 eor r10, r11 vmov s30, r10 @// f30 ^= g6 vmov r10, s30 eor r10, r6 vmov s30, r10 @// f28 ^= f6 vmov r10, s28 vmov r11, s6 eor r10, r11 vmov s28, r10 @// g9 ^= g8 eor r9, r8 @// f25 ^= g7 vmov r10, s25 eor r10, r7 vmov s25, r10 @// g8 ^= f30 vmov r11, s30 eor r8, r11 @// f22 ^= f19 vmov r10, s22 vmov r11, s19 eor r10, r11 vmov s22, r10 @// f22 ^= f8 vmov r10, s22 vmov r11, s8 eor r10, r11 vmov s22, r10 @// f15 ^= f0 vmov r10, s15 vmov r11, s0 eor r10, r11 vmov s15, r10 @// g5 ^= g6 eor r5, r6 @// g6 ^= g1 eor r6, r1 @// g0 ^= f4 vmov r11, s4 eor r0, r11 @// f4 ^= f19 vmov r10, s4 vmov r11, s19 eor r10, r11 vmov s4, r10 @// f15 ^= f16 vmov r10, s15 vmov r11, s16 eor r10, r11 vmov s15, r10 @// g7 ^= f16 vmov r11, s16 eor r7, r11 @// f23 ^= g3 vmov r10, s23 eor r10, r3 vmov s23, r10 @// g4 ^= f24 vmov r11, s24 eor r4, r11 @// f8 ^= f21 vmov r10, s8 vmov r11, s21 eor r10, r11 vmov s8, r10 @// f6 ^= f29 vmov r10, s6 vmov r11, s29 eor r10, r11 vmov s6, r10 @// f6 ^= g5 vmov r10, s6 eor r10, r5 vmov s6, r10 @// g8 ^= f26 vmov r11, s26 eor r8, r11 @// f26 ^= g9 vmov r10, s26 eor r10, r9 vmov s26, r10 @// f1 ^= g6 vmov r10, s1 eor r10, r6 vmov s1, r10 @// g6 ^= g8 eor r6, r8 @// f0 ^= f19 vmov r10, s0 vmov r11, s19 eor r10, r11 vmov s0, r10 @// f19 ^= f8 vmov r10, s19 vmov r11, s8 eor r10, r11 vmov s19, r10 @// g2 ^= f6 vmov r11, s6 eor r2, r11 @// f2 ^= f4 vmov r10, s2 vmov r11, s4 eor r10, r11 vmov s2, r10 @// f18 ^= f25 vmov r10, s18 vmov r11, s25 eor r10, r11 vmov s18, r10 @// f9 ^= g8 vmov r10, s9 eor r10, r8 vmov s9, r10 @// f11 ^= g5 vmov r10, s11 eor r10, r5 vmov s11, r10 @// f29 ^= f22 vmov r10, s29 vmov r11, s22 eor r10, r11 vmov s29, r10 @// f8 ^= f1 vmov r10, s8 vmov r11, s1 eor r10, r11 vmov s8, r10 @// f24 ^= f29 vmov r10, s24 vmov r11, s29 eor r10, r11 vmov s24, r10 @// g3 ^= g6 eor r3, r6 vstr.32 s0, [r14, #0] vstr.32 s1, [r14, #4] vstr.32 s2, [r14, #8] str r0, [r14, #12] vstr.32 s4, [r14, #16] str r1, [r14, #20] vstr.32 s6, [r14, #24] str r2, [r14, #28] vstr.32 s8, [r14, #32] vstr.32 s9, [r14, #36] str r3, [r14, #40] vstr.32 s11, [r14, #44] str r4, [r14, #48] str r5, [r14, #52] str r6, [r14, #56] vstr.32 s15, [r14, #60] vstr.32 s16, [r14, #64] str r7, [r14, #68] vstr.32 s18, [r14, #72] vstr.32 s19, [r14, #76] vstr.32 s20, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] vstr.32 s23, [r14, #92] vstr.32 s24, [r14, #96] vstr.32 s25, [r14, #100] vstr.32 s26, [r14, #104] str r8, [r14, #108] vstr.32 s28, [r14, #112] vstr.32 s29, [r14, #116] vstr.32 s30, [r14, #120] str r9, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v20, .-gft_mul_v20 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v21 .type gft_mul_v21, %function .align 2 gft_mul_v21: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{3, 6, 9, 14, 15, 26, 27, 28, 29, 31}, {2, 3, 7, 8, 9, 14, 26, 28, 30, 31}, {0, 2, 4, 5, 6, 7, 11, 13, 15, 25, 27, 28, 31}, {1, 3, 4, 6, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 29, 30, 31}, {1, 5, 7, 8, 12, 13, 25, 27, 29, 30}, {0, 1, 4, 5, 6, 7, 9, 12, 24, 25, 26, 27, 28, 29, 31}, {3, 4, 6, 7, 10, 14, 15, 24, 26, 27, 28, 29, 30}, {2, 3, 5, 6, 11, 14, 25, 26, 28, 31}, {0, 2, 4, 5, 6, 8, 10, 11, 12, 13, 17, 19, 20, 25, 27, 28}, {1, 3, 4, 7, 9, 10, 12, 16, 17, 18, 19, 21, 24, 25, 26, 27, 29}, {0, 1, 3, 4, 5, 9, 10, 11, 14, 15, 16, 18, 19, 22, 24, 26, 27, 30}, {0, 2, 3, 4, 8, 9, 10, 14, 17, 18, 23, 25, 26, 31}, {0, 1, 3, 5, 6, 7, 8, 11, 14, 17, 18, 19, 22, 25, 26, 27, 30}, {0, 2, 3, 4, 5, 6, 9, 10, 11, 15, 16, 17, 18, 23, 24, 25, 26, 31}, {0, 3, 5, 8, 12, 13, 14, 15, 17, 20, 21, 22, 23, 25, 28, 29, 30, 31}, {1, 2, 3, 4, 5, 9, 12, 14, 16, 17, 20, 22, 24, 25, 28, 30}, {2, 6, 18, 19, 25, 30, 31}, {3, 7, 18, 24, 25, 30}, {0, 1, 2, 3, 4, 5, 6, 7, 17, 19, 27, 29, 31}, {0, 2, 4, 6, 16, 17, 18, 19, 26, 27, 28, 29, 30, 31}, {1, 5, 6, 22, 23, 24, 28, 29}, {0, 1, 4, 5, 7, 22, 25, 28}, {3, 4, 5, 6, 21, 23, 26, 30, 31}, {2, 3, 4, 7, 20, 21, 22, 23, 27, 30}, {10, 14, 16, 18, 20, 21, 22, 24, 27, 28, 29, 30}, {11, 15, 17, 19, 20, 23, 25, 26, 27, 28, 31}, {8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20, 21, 24, 28, 29}, {8, 10, 12, 14, 16, 18, 19, 20, 25, 28}, {9, 13, 14, 16, 17, 19, 21, 22, 23, 24, 25, 27, 29}, {8, 9, 12, 13, 15, 16, 18, 19, 20, 21, 22, 24, 26, 27, 28, 29}, {11, 12, 13, 14, 16, 19, 21, 24, 27, 31}, {10, 11, 12, 15, 17, 18, 19, 20, 21, 25, 26, 27, 30, 31}] @ i = 0 @ better circuit found: #XORs = 263 @ better circuit found: #XORs = 241 @ better circuit found: #XORs = 237 @ better circuit found: #XORs = 232 @ i = 50 @ i = 100 @ i = 150 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ better circuit found: #XORs = 231 @ i = 450 @ i = 500 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ better circuit found: #XORs = 230 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{8, 11, 15, 24, 31}, {0, 7, 8, 11, 28, 29}, {5, 11, 13, 23}, {1, 7, 10, 12, 25}, {0, 5, 7, 27, 30, 31}, {5, 6, 24, 28, 29}, {0, 30, 24, 25, 9}, {22, 8, 9, 28, 31}, {19, 21, 22, 8, 26}, {7, 25, 26, 27, 9}, {2, 8, 15, 16, 17, 18, 19}, {10, 12, 17, 21, 31}, {3, 4, 6, 10, 21, 26, 28, 30}, {0, 7, 14, 15, 16}, {27, 12, 13, 14, 31}, {0, 2, 8, 12, 26}, {2, 6, 18, 19, 25, 30, 31}, {17, 18, 3, 23}, {16, 4, 23, 28, 29}, {0, 17, 23, 7, 9}, {1, 22, 23}, {0, 1, 4, 12, 31}, {5, 10, 23, 28, 31}, {2, 5, 6, 15, 27, 31}, {1, 13, 16, 20, 24, 29, 31}, {7, 15, 20, 22, 26}, {17, 10, 25, 29, 9}, {2, 3, 9, 20, 22, 24, 29}, {14, 18, 22, 25, 29, 31}, {3, 9, 11, 12, 14}, {8, 13, 16, 18, 21, 24}, {17, 18, 5, 10, 30}] @[0, 1, 2, 4, 5, 6, 10, 12, 13, 15, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27, 28, 30, 3, 7, 8, 9, 11, 14, 21, 26, 29, 31] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 633) mov r14, r0 mov r12, r1 ldr r0, [r12, #32] ldr r1, [r12, #44] ldr r2, [r12, #60] ldr r3, [r12, #96] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s0, r0 ldr r0, [r12, #0] ldr r1, [r12, #28] ldr r2, [r12, #32] ldr r3, [r12, #44] ldr r4, [r12, #112] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s1, r0 ldr r0, [r12, #20] ldr r1, [r12, #44] ldr r2, [r12, #52] ldr r3, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 vmov s2, r0 ldr r0, [r12, #4] ldr r1, [r12, #28] ldr r2, [r12, #40] ldr r3, [r12, #48] ldr r4, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s3, r0 ldr r0, [r12, #0] ldr r1, [r12, #20] ldr r2, [r12, #28] ldr r3, [r12, #108] ldr r4, [r12, #120] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s4, r0 ldr r0, [r12, #20] ldr r1, [r12, #24] ldr r2, [r12, #96] ldr r3, [r12, #112] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s5, r0 ldr r0, [r12, #0] ldr r1, [r12, #120] ldr r2, [r12, #96] ldr r3, [r12, #100] ldr r4, [r12, #36] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s6, r0 ldr r0, [r12, #88] ldr r1, [r12, #32] ldr r2, [r12, #36] ldr r3, [r12, #112] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s7, r0 ldr r0, [r12, #76] ldr r1, [r12, #84] ldr r2, [r12, #88] ldr r3, [r12, #32] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s8, r0 ldr r0, [r12, #28] ldr r1, [r12, #100] ldr r2, [r12, #104] ldr r3, [r12, #108] ldr r4, [r12, #36] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s9, r0 ldr r0, [r12, #8] ldr r1, [r12, #32] ldr r2, [r12, #60] ldr r3, [r12, #64] ldr r4, [r12, #68] ldr r5, [r12, #72] ldr r6, [r12, #76] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s10, r0 ldr r0, [r12, #40] ldr r1, [r12, #48] ldr r2, [r12, #68] ldr r3, [r12, #84] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s11, r0 ldr r0, [r12, #12] ldr r1, [r12, #16] ldr r2, [r12, #24] ldr r3, [r12, #40] ldr r4, [r12, #84] ldr r5, [r12, #104] ldr r6, [r12, #112] ldr r7, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s12, r0 ldr r0, [r12, #0] ldr r1, [r12, #28] ldr r2, [r12, #56] ldr r3, [r12, #60] ldr r4, [r12, #64] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s13, r0 ldr r0, [r12, #108] ldr r1, [r12, #48] ldr r2, [r12, #52] ldr r3, [r12, #56] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s14, r0 ldr r0, [r12, #0] ldr r1, [r12, #8] ldr r2, [r12, #32] ldr r3, [r12, #48] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s15, r0 ldr r0, [r12, #8] ldr r1, [r12, #24] ldr r2, [r12, #72] ldr r3, [r12, #76] ldr r4, [r12, #100] ldr r5, [r12, #120] ldr r6, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s16, r0 ldr r0, [r12, #68] ldr r1, [r12, #72] ldr r2, [r12, #12] ldr r3, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 vmov s17, r0 ldr r0, [r12, #64] ldr r1, [r12, #16] ldr r2, [r12, #92] ldr r3, [r12, #112] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s18, r0 ldr r0, [r12, #0] ldr r1, [r12, #68] ldr r2, [r12, #92] ldr r3, [r12, #28] ldr r4, [r12, #36] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s19, r0 ldr r0, [r12, #4] ldr r1, [r12, #88] ldr r2, [r12, #92] eor r0, r1 eor r0, r2 vmov s20, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #16] ldr r3, [r12, #48] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s21, r0 ldr r0, [r12, #20] ldr r1, [r12, #40] ldr r2, [r12, #92] ldr r3, [r12, #112] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s22, r0 ldr r0, [r12, #8] ldr r1, [r12, #20] ldr r2, [r12, #24] ldr r3, [r12, #60] ldr r4, [r12, #108] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s23, r0 ldr r0, [r12, #4] ldr r1, [r12, #52] ldr r2, [r12, #64] ldr r3, [r12, #80] ldr r4, [r12, #96] ldr r5, [r12, #116] ldr r6, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s24, r0 ldr r0, [r12, #28] ldr r1, [r12, #60] ldr r2, [r12, #80] ldr r3, [r12, #88] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s25, r0 ldr r0, [r12, #68] ldr r1, [r12, #40] ldr r2, [r12, #100] ldr r3, [r12, #116] ldr r4, [r12, #36] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s26, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #36] ldr r3, [r12, #80] ldr r4, [r12, #88] ldr r5, [r12, #96] ldr r6, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s27, r0 ldr r0, [r12, #56] ldr r1, [r12, #72] ldr r2, [r12, #88] ldr r3, [r12, #100] ldr r4, [r12, #116] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s28, r0 ldr r0, [r12, #12] ldr r1, [r12, #36] ldr r2, [r12, #44] ldr r3, [r12, #48] ldr r4, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s29, r0 ldr r0, [r12, #32] ldr r1, [r12, #52] ldr r2, [r12, #64] ldr r3, [r12, #72] ldr r4, [r12, #84] ldr r5, [r12, #96] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s30, r0 ldr r0, [r12, #68] ldr r1, [r12, #72] ldr r2, [r12, #20] ldr r3, [r12, #40] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s31, r0 vmov r0, s3; vmov r1, s7; vmov r2, s8; vmov r3, s9; vmov r4, s11; vmov r5, s14; vmov r6, s21; vmov r7, s26; vmov r8, s29; vmov r9, s31; @// f27 ^= g7 vmov r10, s27 eor r10, r7 vmov s27, r10 @// g7 ^= g3 eor r7, r3 @// f24 ^= g5 vmov r10, s24 eor r10, r5 vmov s24, r10 @// g5 ^= g6 eor r5, r6 @// g6 ^= g0 eor r6, r0 @// f0 ^= g1 vmov r10, s0 eor r10, r1 vmov s0, r10 @// g1 ^= g2 eor r1, r2 @// f0 ^= g8 vmov r10, s0 eor r10, r8 vmov s0, r10 @// g0 ^= f20 vmov r11, s20 eor r0, r11 @// g1 ^= g4 eor r1, r4 @// g7 ^= g1 eor r7, r1 @// f15 ^= g1 vmov r10, s15 eor r10, r1 vmov s15, r10 @// g1 ^= g8 eor r1, r8 @// g8 ^= f17 vmov r11, s17 eor r8, r11 @// f17 ^= f19 vmov r10, s17 vmov r11, s19 eor r10, r11 vmov s17, r10 @// f19 ^= g3 vmov r10, s19 eor r10, r3 vmov s19, r10 @// g1 ^= g9 eor r1, r9 @// f2 ^= f20 vmov r10, s2 vmov r11, s20 eor r10, r11 vmov s2, r10 @// g9 ^= f22 vmov r11, s22 eor r9, r11 @// f27 ^= f0 vmov r10, s27 vmov r11, s0 eor r10, r11 vmov s27, r10 @// f27 ^= f10 vmov r10, s27 vmov r11, s10 eor r10, r11 vmov s27, r10 @// g3 ^= g0 eor r3, r0 @// g0 ^= f22 vmov r11, s22 eor r0, r11 @// f30 ^= g2 vmov r10, s30 eor r10, r2 vmov s30, r10 @// f10 ^= f16 vmov r10, s10 vmov r11, s16 eor r10, r11 vmov s10, r10 @// f19 ^= f18 vmov r10, s19 vmov r11, s18 eor r10, r11 vmov s19, r10 @// f18 ^= f22 vmov r10, s18 vmov r11, s22 eor r10, r11 vmov s18, r10 @// g0 ^= f2 vmov r11, s2 eor r0, r11 @// f24 ^= f20 vmov r10, s24 vmov r11, s20 eor r10, r11 vmov s24, r10 @// g5 ^= f17 vmov r11, s17 eor r5, r11 @// f17 ^= f6 vmov r10, s17 vmov r11, s6 eor r10, r11 vmov s17, r10 @// f4 ^= f1 vmov r10, s4 vmov r11, s1 eor r10, r11 vmov s4, r10 @// f1 ^= f5 vmov r10, s1 vmov r11, s5 eor r10, r11 vmov s1, r10 @// f13 ^= f1 vmov r10, s13 vmov r11, s1 eor r10, r11 vmov s13, r10 @// f1 ^= f6 vmov r10, s1 vmov r11, s6 eor r10, r11 vmov s1, r10 @// f5 ^= f22 vmov r10, s5 vmov r11, s22 eor r10, r11 vmov s5, r10 @// f20 ^= f22 vmov r10, s20 vmov r11, s22 eor r10, r11 vmov s20, r10 @// f22 ^= f12 vmov r10, s22 vmov r11, s12 eor r10, r11 vmov s22, r10 @// f12 ^= g2 vmov r10, s12 eor r10, r2 vmov s12, r10 @// g2 ^= f25 vmov r11, s25 eor r2, r11 @// f18 ^= f23 vmov r10, s18 vmov r11, s23 eor r10, r11 vmov s18, r10 @// f2 ^= f23 vmov r10, s2 vmov r11, s23 eor r10, r11 vmov s2, r10 @// f23 ^= f25 vmov r10, s23 vmov r11, s25 eor r10, r11 vmov s23, r10 @// f23 ^= f22 vmov r10, s23 vmov r11, s22 eor r10, r11 vmov s23, r10 @// g1 ^= f16 vmov r11, s16 eor r1, r11 @// g9 ^= g4 eor r9, r4 @// f28 ^= g8 vmov r10, s28 eor r10, r8 vmov s28, r10 @// g8 ^= g3 eor r8, r3 @// f30 ^= g8 vmov r10, s30 eor r10, r8 vmov s30, r10 @// f30 ^= g4 vmov r10, s30 eor r10, r4 vmov s30, r10 @// f25 ^= g7 vmov r10, s25 eor r10, r7 vmov s25, r10 @// f25 ^= f28 vmov r10, s25 vmov r11, s28 eor r10, r11 vmov s25, r10 @// f6 ^= g6 vmov r10, s6 eor r10, r6 vmov s6, r10 @// g2 ^= g4 eor r2, r4 @// g4 ^= g6 eor r4, r6 @// g6 ^= f20 vmov r11, s20 eor r6, r11 @// f15 ^= f27 vmov r10, s15 vmov r11, s27 eor r10, r11 vmov s15, r10 @// f20 ^= f5 vmov r10, s20 vmov r11, s5 eor r10, r11 vmov s20, r10 @// f4 ^= g0 vmov r10, s4 eor r10, r0 vmov s4, r10 @// f18 ^= f10 vmov r10, s18 vmov r11, s10 eor r10, r11 vmov s18, r10 @// f1 ^= g1 vmov r10, s1 eor r10, r1 vmov s1, r10 @// f15 ^= f17 vmov r10, s15 vmov r11, s17 eor r10, r11 vmov s15, r10 @// f5 ^= g3 vmov r10, s5 eor r10, r3 vmov s5, r10 @// g3 ^= g5 eor r3, r5 @// g5 ^= g6 eor r5, r6 @// f12 ^= g6 vmov r10, s12 eor r10, r6 vmov s12, r10 @// f0 ^= f5 vmov r10, s0 vmov r11, s5 eor r10, r11 vmov s0, r10 @// f24 ^= g9 vmov r10, s24 eor r10, r9 vmov s24, r10 @// f18 ^= f12 vmov r10, s18 vmov r11, s12 eor r10, r11 vmov s18, r10 @// g7 ^= g2 eor r7, r2 @// f10 ^= f6 vmov r10, s10 vmov r11, s6 eor r10, r11 vmov s10, r10 @// g4 ^= g9 eor r4, r9 @// g5 ^= g9 eor r5, r9 @// g4 ^= f1 vmov r11, s1 eor r4, r11 @// f28 ^= f30 vmov r10, s28 vmov r11, s30 eor r10, r11 vmov s28, r10 @// f15 ^= g6 vmov r10, s15 eor r10, r6 vmov s15, r10 @// g5 ^= g7 eor r5, r7 @// f2 ^= g6 vmov r10, s2 eor r10, r6 vmov s2, r10 @// f19 ^= f16 vmov r10, s19 vmov r11, s16 eor r10, r11 vmov s19, r10 @// g3 ^= f28 vmov r11, s28 eor r3, r11 @// g7 ^= f30 vmov r11, s30 eor r7, r11 @// f5 ^= g6 vmov r10, s5 eor r10, r6 vmov s5, r10 @// f6 ^= f0 vmov r10, s6 vmov r11, s0 eor r10, r11 vmov s6, r10 @// f12 ^= g8 vmov r10, s12 eor r10, r8 vmov s12, r10 @// g2 ^= f2 vmov r11, s2 eor r2, r11 @// f10 ^= f12 vmov r10, s10 vmov r11, s12 eor r10, r11 vmov s10, r10 @// g9 ^= f25 vmov r11, s25 eor r9, r11 @// g8 ^= g7 eor r8, r7 @// g0 ^= f6 vmov r11, s6 eor r0, r11 @// f13 ^= g4 vmov r10, s13 eor r10, r4 vmov s13, r10 vstr.32 s0, [r14, #0] vstr.32 s1, [r14, #4] vstr.32 s2, [r14, #8] str r0, [r14, #12] vstr.32 s4, [r14, #16] vstr.32 s5, [r14, #20] vstr.32 s6, [r14, #24] str r1, [r14, #28] str r2, [r14, #32] str r3, [r14, #36] vstr.32 s10, [r14, #40] str r4, [r14, #44] vstr.32 s12, [r14, #48] vstr.32 s13, [r14, #52] str r5, [r14, #56] vstr.32 s15, [r14, #60] vstr.32 s16, [r14, #64] vstr.32 s17, [r14, #68] vstr.32 s18, [r14, #72] vstr.32 s19, [r14, #76] vstr.32 s20, [r14, #80] str r6, [r14, #84] vstr.32 s22, [r14, #88] vstr.32 s23, [r14, #92] vstr.32 s24, [r14, #96] vstr.32 s25, [r14, #100] str r7, [r14, #104] vstr.32 s27, [r14, #108] vstr.32 s28, [r14, #112] str r8, [r14, #116] vstr.32 s30, [r14, #120] str r9, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v21, .-gft_mul_v21 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v22 .type gft_mul_v22, %function .align 2 gft_mul_v22: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{1, 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 24, 25, 27, 29, 30}, {0, 1, 4, 6, 7, 8, 10, 13, 14, 24, 26, 27, 28, 29, 31}, {3, 4, 7, 9, 10, 13, 14, 15, 24, 27, 28, 29, 30}, {2, 3, 5, 6, 7, 8, 9, 11, 12, 13, 14, 25, 26, 27, 28, 31}, {1, 3, 7, 8, 9, 10, 11, 24, 26, 29, 30, 31}, {0, 1, 2, 3, 6, 7, 8, 10, 25, 27, 28, 29, 30}, {0, 2, 3, 4, 6, 9, 10, 24, 25, 27, 29}, {1, 2, 5, 7, 8, 9, 11, 24, 26, 27, 28, 29}, {0, 1, 3, 7, 8, 11, 12, 13, 19, 21, 23, 27, 29, 31}, {0, 2, 3, 6, 7, 9, 10, 11, 12, 18, 19, 20, 21, 22, 23, 26, 27, 28, 29, 30, 31}, {0, 3, 4, 6, 8, 14, 15, 16, 18, 20, 22, 23, 24, 26, 28, 30, 31}, {1, 2, 3, 5, 7, 9, 14, 17, 19, 21, 22, 25, 27, 29, 30}, {0, 1, 7, 8, 11, 18, 22, 23, 26, 30, 31}, {0, 6, 7, 9, 10, 11, 19, 22, 27, 30}, {2, 3, 4, 6, 8, 16, 17, 18, 19, 21, 23, 24, 25, 26, 27, 29, 31}, {2, 5, 7, 9, 16, 18, 20, 21, 22, 23, 24, 26, 28, 29, 30, 31}, {0, 1, 3, 4, 5, 16, 19, 23, 24, 25, 26, 27, 28, 30, 31}, {0, 2, 3, 4, 17, 18, 19, 22, 23, 24, 26, 29, 30}, {0, 3, 6, 7, 16, 20, 22, 25, 26, 29, 30, 31}, {1, 2, 3, 6, 17, 21, 23, 24, 25, 27, 28, 29, 30}, {0, 3, 5, 16, 17, 21, 23, 24, 25, 26, 27}, {1, 2, 3, 4, 5, 16, 20, 21, 22, 23, 24, 26}, {0, 7, 18, 19, 20, 22, 23, 25, 26}, {1, 6, 7, 18, 21, 22, 24, 25, 27}, {8, 9, 11, 12, 13, 16, 17, 19, 23, 25}, {8, 10, 11, 12, 16, 18, 19, 22, 23, 24, 25}, {8, 11, 14, 15, 16, 19, 20, 22, 27}, {9, 10, 11, 14, 17, 18, 19, 21, 23, 26, 27}, {8, 11, 13, 16, 17, 23, 29}, {9, 10, 11, 12, 13, 16, 22, 23, 28, 29}, {8, 15, 18, 19, 20, 22, 31}, {9, 14, 15, 18, 21, 23, 30, 31}] @ i = 0 @ better circuit found: #XORs = 254 @ better circuit found: #XORs = 252 @ better circuit found: #XORs = 242 @ better circuit found: #XORs = 235 @ better circuit found: #XORs = 233 @ i = 50 @ better circuit found: #XORs = 227 @ better circuit found: #XORs = 223 @ i = 100 @ i = 150 @ better circuit found: #XORs = 219 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ i = 450 @ i = 500 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{1, 18, 5, 22, 11}, {0, 2, 7, 8, 14, 25, 30}, {1, 8, 10, 13, 14, 25, 29}, {21, 23, 25, 14, 31}, {1, 7, 15, 26, 27, 30}, {30, 8, 12, 9}, {6, 8, 13, 15, 28}, {2, 4, 5, 10, 25, 26, 29}, {0, 1, 12, 19, 26}, {16, 3, 10, 24, 26, 12}, {3, 25, 27, 15}, {4, 27, 24, 31}, {9, 10, 16, 22, 24, 31}, {6, 12, 18, 22, 27}, {2, 3, 5, 6, 7, 26, 28}, {3, 8, 18, 24, 29, 31}, {0, 19, 6, 30}, {4, 21, 25, 28}, {0, 8, 16, 23, 24}, {2, 6, 18, 21, 23}, {5, 8, 15, 17, 21, 26}, {0, 1, 6, 17, 25, 31}, {2, 20, 6, 22, 24}, {1, 7, 12, 21, 24, 25}, {9, 12, 19, 25, 29}, {10, 11, 25, 27, 30}, {11, 14, 16, 18, 27, 31}, {7, 14, 23, 24, 28}, {0, 17, 29, 24, 11, 13}, {17, 22, 10, 28, 30}, {8, 15, 18, 19, 20, 22, 31}, {18, 9, 25, 15, 30}] @[1, 2, 3, 6, 7, 9, 12, 13, 14, 16, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 4, 5, 8, 10, 11, 15, 17, 18, 19] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 592) mov r14, r0 mov r12, r1 ldr r0, [r12, #4] ldr r1, [r12, #72] ldr r2, [r12, #20] ldr r3, [r12, #88] ldr r4, [r12, #44] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s0, r0 ldr r0, [r12, #0] ldr r1, [r12, #8] ldr r2, [r12, #28] ldr r3, [r12, #32] ldr r4, [r12, #56] ldr r5, [r12, #100] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s1, r0 ldr r0, [r12, #4] ldr r1, [r12, #32] ldr r2, [r12, #40] ldr r3, [r12, #52] ldr r4, [r12, #56] ldr r5, [r12, #100] ldr r6, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s2, r0 ldr r0, [r12, #84] ldr r1, [r12, #92] ldr r2, [r12, #100] ldr r3, [r12, #56] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s3, r0 ldr r0, [r12, #4] ldr r1, [r12, #28] ldr r2, [r12, #60] ldr r3, [r12, #104] ldr r4, [r12, #108] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s4, r0 ldr r0, [r12, #120] ldr r1, [r12, #32] ldr r2, [r12, #48] ldr r3, [r12, #36] eor r0, r1 eor r0, r2 eor r0, r3 vmov s5, r0 ldr r0, [r12, #24] ldr r1, [r12, #32] ldr r2, [r12, #52] ldr r3, [r12, #60] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s6, r0 ldr r0, [r12, #8] ldr r1, [r12, #16] ldr r2, [r12, #20] ldr r3, [r12, #40] ldr r4, [r12, #100] ldr r5, [r12, #104] ldr r6, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s7, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #48] ldr r3, [r12, #76] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s8, r0 ldr r0, [r12, #64] ldr r1, [r12, #12] ldr r2, [r12, #40] ldr r3, [r12, #96] ldr r4, [r12, #104] ldr r5, [r12, #48] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s9, r0 ldr r0, [r12, #12] ldr r1, [r12, #100] ldr r2, [r12, #108] ldr r3, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 vmov s10, r0 ldr r0, [r12, #16] ldr r1, [r12, #108] ldr r2, [r12, #96] ldr r3, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 vmov s11, r0 ldr r0, [r12, #36] ldr r1, [r12, #40] ldr r2, [r12, #64] ldr r3, [r12, #88] ldr r4, [r12, #96] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s12, r0 ldr r0, [r12, #24] ldr r1, [r12, #48] ldr r2, [r12, #72] ldr r3, [r12, #88] ldr r4, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s13, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #20] ldr r3, [r12, #24] ldr r4, [r12, #28] ldr r5, [r12, #104] ldr r6, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s14, r0 ldr r0, [r12, #12] ldr r1, [r12, #32] ldr r2, [r12, #72] ldr r3, [r12, #96] ldr r4, [r12, #116] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s15, r0 ldr r0, [r12, #0] ldr r1, [r12, #76] ldr r2, [r12, #24] ldr r3, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 vmov s16, r0 ldr r0, [r12, #16] ldr r1, [r12, #84] ldr r2, [r12, #100] ldr r3, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 vmov s17, r0 ldr r0, [r12, #0] ldr r1, [r12, #32] ldr r2, [r12, #64] ldr r3, [r12, #92] ldr r4, [r12, #96] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s18, r0 ldr r0, [r12, #8] ldr r1, [r12, #24] ldr r2, [r12, #72] ldr r3, [r12, #84] ldr r4, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s19, r0 ldr r0, [r12, #20] ldr r1, [r12, #32] ldr r2, [r12, #60] ldr r3, [r12, #68] ldr r4, [r12, #84] ldr r5, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s20, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #24] ldr r3, [r12, #68] ldr r4, [r12, #100] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s21, r0 ldr r0, [r12, #8] ldr r1, [r12, #80] ldr r2, [r12, #24] ldr r3, [r12, #88] ldr r4, [r12, #96] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s22, r0 ldr r0, [r12, #4] ldr r1, [r12, #28] ldr r2, [r12, #48] ldr r3, [r12, #84] ldr r4, [r12, #96] ldr r5, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s23, r0 ldr r0, [r12, #36] ldr r1, [r12, #48] ldr r2, [r12, #76] ldr r3, [r12, #100] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s24, r0 ldr r0, [r12, #40] ldr r1, [r12, #44] ldr r2, [r12, #100] ldr r3, [r12, #108] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s25, r0 ldr r0, [r12, #44] ldr r1, [r12, #56] ldr r2, [r12, #64] ldr r3, [r12, #72] ldr r4, [r12, #108] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s26, r0 ldr r0, [r12, #28] ldr r1, [r12, #56] ldr r2, [r12, #92] ldr r3, [r12, #96] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s27, r0 ldr r0, [r12, #0] ldr r1, [r12, #68] ldr r2, [r12, #116] ldr r3, [r12, #96] ldr r4, [r12, #44] ldr r5, [r12, #52] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s28, r0 ldr r0, [r12, #68] ldr r1, [r12, #88] ldr r2, [r12, #40] ldr r3, [r12, #112] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s29, r0 ldr r0, [r12, #32] ldr r1, [r12, #60] ldr r2, [r12, #72] ldr r3, [r12, #76] ldr r4, [r12, #80] ldr r5, [r12, #88] ldr r6, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s30, r0 ldr r0, [r12, #72] ldr r1, [r12, #36] ldr r2, [r12, #100] ldr r3, [r12, #60] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s31, r0 vmov r0, s0; vmov r1, s4; vmov r2, s5; vmov r3, s8; vmov r4, s10; vmov r5, s11; vmov r6, s15; vmov r7, s17; vmov r8, s18; vmov r9, s19; @// f28 ^= g8 vmov r10, s28 eor r10, r8 vmov s28, r10 @// f12 ^= g8 vmov r10, s12 eor r10, r8 vmov s12, r10 @// f20 ^= g8 vmov r10, s20 eor r10, r8 vmov s20, r10 @// g8 ^= f16 vmov r11, s16 eor r8, r11 @// f16 ^= f21 vmov r10, s16 vmov r11, s21 eor r10, r11 vmov s16, r10 @// f21 ^= g5 vmov r10, s21 eor r10, r5 vmov s21, r10 @// f21 ^= f22 vmov r10, s21 vmov r11, s22 eor r10, r11 vmov s21, r10 @// g1 ^= f31 vmov r11, s31 eor r1, r11 @// f3 ^= g9 vmov r10, s3 eor r10, r9 vmov s3, r10 @// f31 ^= f3 vmov r10, s31 vmov r11, s3 eor r10, r11 vmov s31, r10 @// f31 ^= g9 vmov r10, s31 eor r10, r9 vmov s31, r10 @// f22 ^= g9 vmov r10, s22 eor r10, r9 vmov s22, r10 @// g9 ^= g7 eor r9, r7 @// g9 ^= f7 vmov r11, s7 eor r9, r11 @// f27 ^= g5 vmov r10, s27 eor r10, r5 vmov s27, r10 @// g5 ^= f26 vmov r11, s26 eor r5, r11 @// g5 ^= g0 eor r5, r0 @// g0 ^= f13 vmov r11, s13 eor r0, r11 @// g9 ^= f29 vmov r11, s29 eor r9, r11 @// f16 ^= g7 vmov r10, s16 eor r10, r7 vmov s16, r10 @// g7 ^= f23 vmov r11, s23 eor r7, r11 @// f29 ^= g2 vmov r10, s29 eor r10, r2 vmov s29, r10 @// g2 ^= g7 eor r2, r7 @// g7 ^= g3 eor r7, r3 @// f7 ^= f25 vmov r10, s7 vmov r11, s25 eor r10, r11 vmov s7, r10 @// f9 ^= g3 vmov r10, s9 eor r10, r3 vmov s9, r10 @// f3 ^= g1 vmov r10, s3 eor r10, r1 vmov s3, r10 @// g1 ^= f25 vmov r11, s25 eor r1, r11 @// g5 ^= g2 eor r5, r2 @// g5 ^= g8 eor r5, r8 @// g9 ^= f14 vmov r11, s14 eor r9, r11 @// f22 ^= f23 vmov r10, s22 vmov r11, s23 eor r10, r11 vmov s22, r10 @// f22 ^= g3 vmov r10, s22 eor r10, r3 vmov s22, r10 @// f14 ^= g7 vmov r10, s14 eor r10, r7 vmov s14, r10 @// f1 ^= f2 vmov r10, s1 vmov r11, s2 eor r10, r11 vmov s1, r10 @// f2 ^= g4 vmov r10, s2 eor r10, r4 vmov s2, r10 @// f1 ^= g2 vmov r10, s1 eor r10, r2 vmov s1, r10 @// f6 ^= g4 vmov r10, s6 eor r10, r4 vmov s6, r10 @// f20 ^= g4 vmov r10, s20 eor r10, r4 vmov s20, r10 @// g4 ^= g6 eor r4, r6 @// g4 ^= f27 vmov r11, s27 eor r4, r11 @// f27 ^= f16 vmov r10, s27 vmov r11, s16 eor r10, r11 vmov s27, r10 @// f14 ^= g6 vmov r10, s14 eor r10, r6 vmov s14, r10 @// f25 ^= f13 vmov r10, s25 vmov r11, s13 eor r10, r11 vmov s25, r10 @// f25 ^= g8 vmov r10, s25 eor r10, r8 vmov s25, r10 @// g8 ^= g6 eor r8, r6 @// f23 ^= f13 vmov r10, s23 vmov r11, s13 eor r10, r11 vmov s23, r10 @// g3 ^= g1 eor r3, r1 @// f13 ^= g3 vmov r10, s13 eor r10, r3 vmov s13, r10 @// g3 ^= f31 vmov r11, s31 eor r3, r11 @// f27 ^= g1 vmov r10, s27 eor r10, r1 vmov s27, r10 @// f21 ^= f20 vmov r10, s21 vmov r11, s20 eor r10, r11 vmov s21, r10 @// f9 ^= g0 vmov r10, s9 eor r10, r0 vmov s9, r10 @// g0 ^= f6 vmov r11, s6 eor r0, r11 @// g8 ^= f22 vmov r11, s22 eor r8, r11 @// f26 ^= f30 vmov r10, s26 vmov r11, s30 eor r10, r11 vmov s26, r10 @// f29 ^= f28 vmov r10, s29 vmov r11, s28 eor r10, r11 vmov s29, r10 @// g7 ^= g9 eor r7, r9 @// g9 ^= f23 vmov r11, s23 eor r9, r11 @// g3 ^= f2 vmov r11, s2 eor r3, r11 @// f12 ^= g1 vmov r10, s12 eor r10, r1 vmov s12, r10 @// f7 ^= g2 vmov r10, s7 eor r10, r2 vmov s7, r10 @// g1 ^= g6 eor r1, r6 @// g6 ^= g2 eor r6, r2 @// f6 ^= f1 vmov r10, s6 vmov r11, s1 eor r10, r11 vmov s6, r10 @// f2 ^= g2 vmov r10, s2 eor r10, r2 vmov s2, r10 @// g5 ^= g9 eor r5, r9 @// f1 ^= f3 vmov r10, s1 vmov r11, s3 eor r10, r11 vmov s1, r10 @// f24 ^= f28 vmov r10, s24 vmov r11, s28 eor r10, r11 vmov s24, r10 @// g2 ^= f6 vmov r11, s6 eor r2, r11 @// g4 ^= g8 eor r4, r8 @// g6 ^= f21 vmov r11, s21 eor r6, r11 @// f16 ^= f20 vmov r10, s16 vmov r11, s20 eor r10, r11 vmov s16, r10 @// f3 ^= g0 vmov r10, s3 eor r10, r0 vmov s3, r10 @// g0 ^= f2 vmov r11, s2 eor r0, r11 @// f14 ^= f20 vmov r10, s14 vmov r11, s20 eor r10, r11 vmov s14, r10 @// f9 ^= g6 vmov r10, s9 eor r10, r6 vmov s9, r10 str r0, [r14, #0] vstr.32 s1, [r14, #4] vstr.32 s2, [r14, #8] vstr.32 s3, [r14, #12] str r1, [r14, #16] str r2, [r14, #20] vstr.32 s6, [r14, #24] vstr.32 s7, [r14, #28] str r3, [r14, #32] vstr.32 s9, [r14, #36] str r4, [r14, #40] str r5, [r14, #44] vstr.32 s12, [r14, #48] vstr.32 s13, [r14, #52] vstr.32 s14, [r14, #56] str r6, [r14, #60] vstr.32 s16, [r14, #64] str r7, [r14, #68] str r8, [r14, #72] str r9, [r14, #76] vstr.32 s20, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] vstr.32 s23, [r14, #92] vstr.32 s24, [r14, #96] vstr.32 s25, [r14, #100] vstr.32 s26, [r14, #104] vstr.32 s27, [r14, #108] vstr.32 s28, [r14, #112] vstr.32 s29, [r14, #116] vstr.32 s30, [r14, #120] vstr.32 s31, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v22, .-gft_mul_v22 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v23 .type gft_mul_v23, %function .align 2 gft_mul_v23: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{1, 2, 3, 5, 10, 12, 13, 14, 24, 25, 30, 31}, {0, 1, 2, 4, 5, 11, 12, 15, 24, 30}, {1, 7, 8, 9, 10, 11, 12, 13, 26, 27, 29, 31}, {0, 1, 6, 7, 8, 10, 12, 26, 28, 29, 30, 31}, {0, 1, 2, 4, 7, 8, 9, 11, 12, 13, 14, 15, 24, 29}, {0, 3, 5, 6, 7, 8, 10, 11, 12, 14, 25, 28, 29}, {0, 1, 4, 8, 11, 13, 14, 26, 31}, {0, 5, 9, 10, 11, 12, 13, 15, 27, 30, 31}, {0, 2, 3, 5, 8, 9, 16, 17, 20, 23, 24, 25, 28, 31}, {1, 2, 4, 5, 8, 16, 21, 22, 23, 24, 29, 30, 31}, {1, 2, 3, 7, 10, 11, 18, 19, 20, 26, 27, 28}, {0, 1, 2, 6, 7, 10, 18, 21, 26, 29}, {0, 1, 2, 5, 7, 12, 13, 16, 18, 19, 21, 22, 23, 24, 26, 27, 29, 30, 31}, {0, 3, 4, 5, 6, 7, 12, 17, 18, 20, 21, 22, 25, 26, 28, 29, 30}, {0, 1, 4, 6, 7, 14, 15, 17, 18, 19, 21, 25, 26, 27, 29}, {0, 5, 6, 14, 16, 17, 18, 20, 21, 24, 25, 26, 28, 29}, {1, 4, 7, 18, 19, 20, 21, 23, 26, 28, 29, 30}, {0, 1, 5, 6, 7, 18, 20, 22, 23, 27, 28, 31}, {3, 4, 17, 19, 20, 23, 24, 25, 26, 27, 28, 29}, {2, 3, 5, 16, 17, 18, 19, 21, 22, 23, 24, 26, 28}, {0, 2, 3, 4, 5, 6, 7, 17, 19, 21, 22, 24, 25, 27, 28, 29, 30, 31}, {1, 2, 4, 6, 16, 17, 18, 19, 20, 21, 23, 24, 26, 27, 28, 30}, {1, 2, 3, 5, 6, 16, 18, 19, 20, 21, 22, 24, 27, 29, 30}, {0, 1, 2, 4, 5, 7, 17, 18, 20, 23, 25, 26, 27, 28, 29, 31}, {9, 12, 15, 16, 18, 19, 21, 24, 28, 31}, {8, 9, 13, 14, 15, 17, 18, 20, 21, 25, 29, 30, 31}, {11, 12, 17, 18, 19, 23, 26, 28}, {10, 11, 13, 16, 17, 18, 22, 23, 27, 29}, {8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 21, 23, 24, 26, 27, 30, 31}, {9, 10, 12, 14, 16, 19, 20, 21, 22, 23, 25, 26, 30}, {9, 10, 11, 13, 14, 16, 17, 20, 22, 23, 25, 26, 27, 29, 31}, {8, 9, 10, 12, 13, 15, 16, 21, 22, 24, 25, 26, 28, 29, 30, 31}] @ i = 0 @ better circuit found: #XORs = 247 @ better circuit found: #XORs = 244 @ i = 50 @ i = 100 @ i = 150 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ i = 450 @ better circuit found: #XORs = 242 @ i = 500 @ i = 550 @ better circuit found: #XORs = 239 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{2, 8, 9, 11, 25, 28, 29}, {2, 3, 22, 26}, {7, 10, 12, 27, 28, 31}, {7, 9, 10, 11, 13}, {5, 30, 8, 10, 11, 29, 14}, {0, 4, 9, 14, 15, 28}, {1, 2, 13, 25, 26}, {5, 6, 7, 27, 15}, {17, 2, 5, 25, 27}, {8, 12, 13, 19, 23, 30}, {1, 3, 11, 24, 27}, {0, 1, 9, 18, 20}, {0, 6, 12, 30, 31}, {2, 13, 16, 17, 30}, {17, 18, 21, 10, 14}, {6, 9, 14, 16, 18, 24, 30}, {3, 7, 8, 12, 19, 20, 26}, {3, 12, 17, 18, 22, 28}, {0, 4, 9, 12, 25, 29}, {11, 18, 21, 28, 31}, {16, 29, 19, 4, 25, 13}, {16, 24, 25, 27, 31}, {5, 8, 15, 18, 19, 21}, {4, 7, 22, 26, 31}, {9, 10, 15, 22, 23, 24, 28, 30}, {15, 29, 31}, {12, 17, 19, 21, 23, 26, 31}, {1, 16, 17, 22, 23, 24, 29}, {0, 2, 12, 17, 22, 24}, {3, 10, 13, 18}, {0, 2, 13, 14, 31}, {17, 4, 8}] @[0, 2, 3, 4, 5, 6, 7, 11, 14, 15, 16, 17, 18, 19, 21, 23, 24, 26, 27, 28, 29, 31, 1, 8, 9, 10, 12, 13, 20, 22, 25, 30] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 660) mov r14, r0 mov r12, r1 ldr r0, [r12, #8] ldr r1, [r12, #32] ldr r2, [r12, #36] ldr r3, [r12, #44] ldr r4, [r12, #100] ldr r5, [r12, #112] ldr r6, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s0, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #88] ldr r3, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 vmov s1, r0 ldr r0, [r12, #28] ldr r1, [r12, #40] ldr r2, [r12, #48] ldr r3, [r12, #108] ldr r4, [r12, #112] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s2, r0 ldr r0, [r12, #28] ldr r1, [r12, #36] ldr r2, [r12, #40] ldr r3, [r12, #44] ldr r4, [r12, #52] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s3, r0 ldr r0, [r12, #20] ldr r1, [r12, #120] ldr r2, [r12, #32] ldr r3, [r12, #40] ldr r4, [r12, #44] ldr r5, [r12, #116] ldr r6, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s4, r0 ldr r0, [r12, #0] ldr r1, [r12, #16] ldr r2, [r12, #36] ldr r3, [r12, #56] ldr r4, [r12, #60] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s5, r0 ldr r0, [r12, #4] ldr r1, [r12, #8] ldr r2, [r12, #52] ldr r3, [r12, #100] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s6, r0 ldr r0, [r12, #20] ldr r1, [r12, #24] ldr r2, [r12, #28] ldr r3, [r12, #108] ldr r4, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s7, r0 ldr r0, [r12, #68] ldr r1, [r12, #8] ldr r2, [r12, #20] ldr r3, [r12, #100] ldr r4, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s8, r0 ldr r0, [r12, #32] ldr r1, [r12, #48] ldr r2, [r12, #52] ldr r3, [r12, #76] ldr r4, [r12, #92] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s9, r0 ldr r0, [r12, #4] ldr r1, [r12, #12] ldr r2, [r12, #44] ldr r3, [r12, #96] ldr r4, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s10, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #36] ldr r3, [r12, #72] ldr r4, [r12, #80] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s11, r0 ldr r0, [r12, #0] ldr r1, [r12, #24] ldr r2, [r12, #48] ldr r3, [r12, #120] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s12, r0 ldr r0, [r12, #8] ldr r1, [r12, #52] ldr r2, [r12, #64] ldr r3, [r12, #68] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s13, r0 ldr r0, [r12, #68] ldr r1, [r12, #72] ldr r2, [r12, #84] ldr r3, [r12, #40] ldr r4, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s14, r0 ldr r0, [r12, #24] ldr r1, [r12, #36] ldr r2, [r12, #56] ldr r3, [r12, #64] ldr r4, [r12, #72] ldr r5, [r12, #96] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s15, r0 ldr r0, [r12, #12] ldr r1, [r12, #28] ldr r2, [r12, #32] ldr r3, [r12, #48] ldr r4, [r12, #76] ldr r5, [r12, #80] ldr r6, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s16, r0 ldr r0, [r12, #12] ldr r1, [r12, #48] ldr r2, [r12, #68] ldr r3, [r12, #72] ldr r4, [r12, #88] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s17, r0 ldr r0, [r12, #0] ldr r1, [r12, #16] ldr r2, [r12, #36] ldr r3, [r12, #48] ldr r4, [r12, #100] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s18, r0 ldr r0, [r12, #44] ldr r1, [r12, #72] ldr r2, [r12, #84] ldr r3, [r12, #112] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s19, r0 ldr r0, [r12, #64] ldr r1, [r12, #116] ldr r2, [r12, #76] ldr r3, [r12, #16] ldr r4, [r12, #100] ldr r5, [r12, #52] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s20, r0 ldr r0, [r12, #64] ldr r1, [r12, #96] ldr r2, [r12, #100] ldr r3, [r12, #108] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s21, r0 ldr r0, [r12, #20] ldr r1, [r12, #32] ldr r2, [r12, #60] ldr r3, [r12, #72] ldr r4, [r12, #76] ldr r5, [r12, #84] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s22, r0 ldr r0, [r12, #16] ldr r1, [r12, #28] ldr r2, [r12, #88] ldr r3, [r12, #104] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s23, r0 ldr r0, [r12, #36] ldr r1, [r12, #40] ldr r2, [r12, #60] ldr r3, [r12, #88] ldr r4, [r12, #92] ldr r5, [r12, #96] ldr r6, [r12, #112] ldr r7, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s24, r0 ldr r0, [r12, #60] ldr r1, [r12, #116] ldr r2, [r12, #124] eor r0, r1 eor r0, r2 vmov s25, r0 ldr r0, [r12, #48] ldr r1, [r12, #68] ldr r2, [r12, #76] ldr r3, [r12, #84] ldr r4, [r12, #92] ldr r5, [r12, #104] ldr r6, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s26, r0 ldr r0, [r12, #4] ldr r1, [r12, #64] ldr r2, [r12, #68] ldr r3, [r12, #88] ldr r4, [r12, #92] ldr r5, [r12, #96] ldr r6, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s27, r0 ldr r0, [r12, #0] ldr r1, [r12, #8] ldr r2, [r12, #48] ldr r3, [r12, #68] ldr r4, [r12, #88] ldr r5, [r12, #96] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s28, r0 ldr r0, [r12, #12] ldr r1, [r12, #40] ldr r2, [r12, #52] ldr r3, [r12, #72] eor r0, r1 eor r0, r2 eor r0, r3 vmov s29, r0 ldr r0, [r12, #0] ldr r1, [r12, #8] ldr r2, [r12, #52] ldr r3, [r12, #56] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s30, r0 ldr r0, [r12, #68] ldr r1, [r12, #16] ldr r2, [r12, #32] eor r0, r1 eor r0, r2 vmov s31, r0 vmov r0, s1; vmov r1, s8; vmov r2, s9; vmov r3, s10; vmov r4, s12; vmov r5, s13; vmov r6, s20; vmov r7, s22; vmov r8, s25; vmov r9, s30; @// f28 ^= g9 vmov r10, s28 eor r10, r9 vmov s28, r10 @// f4 ^= f3 vmov r10, s4 vmov r11, s3 eor r10, r11 vmov s4, r10 @// g5 ^= g0 eor r5, r0 @// g0 ^= f23 vmov r11, s23 eor r0, r11 @// f17 ^= f14 vmov r10, s17 vmov r11, s14 eor r10, r11 vmov s17, r10 @// g0 ^= g8 eor r0, r8 @// f18 ^= f11 vmov r10, s18 vmov r11, s11 eor r10, r11 vmov s18, r10 @// g9 ^= f6 vmov r11, s6 eor r9, r11 @// f15 ^= f14 vmov r10, s15 vmov r11, s14 eor r10, r11 vmov s15, r10 @// g9 ^= f11 vmov r11, s11 eor r9, r11 @// f11 ^= f14 vmov r10, s11 vmov r11, s14 eor r10, r11 vmov s11, r10 @// f14 ^= g1 vmov r10, s14 eor r10, r1 vmov s14, r10 @// g1 ^= f7 vmov r11, s7 eor r1, r11 @// f3 ^= f2 vmov r10, s3 vmov r11, s2 eor r10, r11 vmov s3, r10 @// f3 ^= g4 vmov r10, s3 eor r10, r4 vmov s3, r10 @// g7 ^= f31 vmov r11, s31 eor r7, r11 @// g1 ^= g8 eor r1, r8 @// f5 ^= g8 vmov r10, s5 eor r10, r8 vmov s5, r10 @// f27 ^= g3 vmov r10, s27 eor r10, r3 vmov s27, r10 @// g8 ^= g4 eor r8, r4 @// g4 ^= g2 eor r4, r2 @// g2 ^= g6 eor r2, r6 @// f31 ^= g6 vmov r10, s31 eor r10, r6 vmov s31, r10 @// g6 ^= f6 vmov r11, s6 eor r6, r11 @// g2 ^= f21 vmov r11, s21 eor r2, r11 @// g2 ^= g3 eor r2, r3 @// f16 ^= f26 vmov r10, s16 vmov r11, s26 eor r10, r11 vmov s16, r10 @// g6 ^= f18 vmov r11, s18 eor r6, r11 @// f17 ^= f14 vmov r10, s17 vmov r11, s14 eor r10, r11 vmov s17, r10 @// f14 ^= g7 vmov r10, s14 eor r10, r7 vmov s14, r10 @// f31 ^= g5 vmov r10, s31 eor r10, r5 vmov s31, r10 @// g4 ^= f26 vmov r11, s26 eor r4, r11 @// f11 ^= g9 vmov r10, s11 eor r10, r9 vmov s11, r10 @// f31 ^= f29 vmov r10, s31 vmov r11, s29 eor r10, r11 vmov s31, r10 @// f27 ^= f29 vmov r10, s27 vmov r11, s29 eor r10, r11 vmov s27, r10 @// f29 ^= g5 vmov r10, s29 eor r10, r5 vmov s29, r10 @// f29 ^= f26 vmov r10, s29 vmov r11, s26 eor r10, r11 vmov s29, r10 @// f26 ^= f19 vmov r10, s26 vmov r11, s19 eor r10, r11 vmov s26, r10 @// f19 ^= f21 vmov r10, s19 vmov r11, s21 eor r10, r11 vmov s19, r10 @// f19 ^= f17 vmov r10, s19 vmov r11, s17 eor r10, r11 vmov s19, r10 @// g7 ^= f7 vmov r11, s7 eor r7, r11 @// f17 ^= f6 vmov r10, s17 vmov r11, s6 eor r10, r11 vmov s17, r10 @// g5 ^= f23 vmov r11, s23 eor r5, r11 @// f18 ^= g3 vmov r10, s18 eor r10, r3 vmov s18, r10 @// g3 ^= f7 vmov r11, s7 eor r3, r11 @// f7 ^= f2 vmov r10, s7 vmov r11, s2 eor r10, r11 vmov s7, r10 @// f2 ^= f5 vmov r10, s2 vmov r11, s5 eor r10, r11 vmov s2, r10 @// f5 ^= f0 vmov r10, s5 vmov r11, s0 eor r10, r11 vmov s5, r10 @// g6 ^= f21 vmov r11, s21 eor r6, r11 @// f11 ^= g1 vmov r10, s11 eor r10, r1 vmov s11, r10 @// f15 ^= g8 vmov r10, s15 eor r10, r8 vmov s15, r10 @// f15 ^= f7 vmov r10, s15 vmov r11, s7 eor r10, r11 vmov s15, r10 @// g5 ^= f28 vmov r11, s28 eor r5, r11 @// f6 ^= f5 vmov r10, s6 vmov r11, s5 eor r10, r11 vmov s6, r10 @// f5 ^= g0 vmov r10, s5 eor r10, r0 vmov s5, r10 @// g0 ^= g3 eor r0, r3 @// g3 ^= f7 vmov r11, s7 eor r3, r11 @// f23 ^= g1 vmov r10, s23 eor r10, r1 vmov s23, r10 @// f21 ^= g1 vmov r10, s21 eor r10, r1 vmov s21, r10 @// g1 ^= f16 vmov r11, s16 eor r1, r11 @// g0 ^= g8 eor r0, r8 @// g8 ^= g4 eor r8, r4 @// g4 ^= f16 vmov r11, s16 eor r4, r11 @// f0 ^= f4 vmov r10, s0 vmov r11, s4 eor r10, r11 vmov s0, r10 @// g7 ^= f16 vmov r11, s16 eor r7, r11 @// f24 ^= f29 vmov r10, s24 vmov r11, s29 eor r10, r11 vmov s24, r10 @// f16 ^= g2 vmov r10, s16 eor r10, r2 vmov s16, r10 @// f18 ^= f26 vmov r10, s18 vmov r11, s26 eor r10, r11 vmov s18, r10 @// f17 ^= g4 vmov r10, s17 eor r10, r4 vmov s17, r10 @// f16 ^= f26 vmov r10, s16 vmov r11, s26 eor r10, r11 vmov s16, r10 @// f0 ^= g3 vmov r10, s0 eor r10, r3 vmov s0, r10 @// g3 ^= g6 eor r3, r6 @// f2 ^= f6 vmov r10, s2 vmov r11, s6 eor r10, r11 vmov s2, r10 @// g2 ^= f19 vmov r11, s19 eor r2, r11 @// f5 ^= f7 vmov r10, s5 vmov r11, s7 eor r10, r11 vmov s5, r10 @// g1 ^= f15 vmov r11, s15 eor r1, r11 @// f15 ^= g9 vmov r10, s15 eor r10, r9 vmov s15, r10 @// f29 ^= g9 vmov r10, s29 eor r10, r9 vmov s29, r10 @// f7 ^= f3 vmov r10, s7 vmov r11, s3 eor r10, r11 vmov s7, r10 @// f19 ^= f26 vmov r10, s19 vmov r11, s26 eor r10, r11 vmov s19, r10 @// f28 ^= g8 vmov r10, s28 eor r10, r8 vmov s28, r10 @// f3 ^= f2 vmov r10, s3 vmov r11, s2 eor r10, r11 vmov s3, r10 @// g8 ^= g9 eor r8, r9 @// g7 ^= g2 eor r7, r2 @// f28 ^= f27 vmov r10, s28 vmov r11, s27 eor r10, r11 vmov s28, r10 @// f4 ^= g0 vmov r10, s4 eor r10, r0 vmov s4, r10 @// f31 ^= f24 vmov r10, s31 vmov r11, s24 eor r10, r11 vmov s31, r10 @// f14 ^= f11 vmov r10, s14 vmov r11, s11 eor r10, r11 vmov s14, r10 @// f23 ^= f17 vmov r10, s23 vmov r11, s17 eor r10, r11 vmov s23, r10 @// g5 ^= f15 vmov r11, s15 eor r5, r11 @// g9 ^= f27 vmov r11, s27 eor r9, r11 @// f21 ^= f16 vmov r10, s21 vmov r11, s16 eor r10, r11 vmov s21, r10 @// g6 ^= g5 eor r6, r5 @// g4 ^= g7 eor r4, r7 vstr.32 s0, [r14, #0] str r0, [r14, #4] vstr.32 s2, [r14, #8] vstr.32 s3, [r14, #12] vstr.32 s4, [r14, #16] vstr.32 s5, [r14, #20] vstr.32 s6, [r14, #24] vstr.32 s7, [r14, #28] str r1, [r14, #32] str r2, [r14, #36] str r3, [r14, #40] vstr.32 s11, [r14, #44] str r4, [r14, #48] str r5, [r14, #52] vstr.32 s14, [r14, #56] vstr.32 s15, [r14, #60] vstr.32 s16, [r14, #64] vstr.32 s17, [r14, #68] vstr.32 s18, [r14, #72] vstr.32 s19, [r14, #76] str r6, [r14, #80] vstr.32 s21, [r14, #84] str r7, [r14, #88] vstr.32 s23, [r14, #92] vstr.32 s24, [r14, #96] str r8, [r14, #100] vstr.32 s26, [r14, #104] vstr.32 s27, [r14, #108] vstr.32 s28, [r14, #112] vstr.32 s29, [r14, #116] str r9, [r14, #120] vstr.32 s31, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v23, .-gft_mul_v23 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v24 .type gft_mul_v24, %function .align 2 gft_mul_v24: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{0, 2, 4, 6, 8, 10, 14, 15, 17, 18, 21, 22, 24, 25, 26}, {1, 3, 5, 7, 9, 11, 14, 16, 17, 19, 20, 21, 23, 24, 27}, {0, 1, 3, 4, 5, 7, 8, 9, 11, 13, 15, 16, 17, 18, 20, 21, 22, 24, 25}, {0, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 19, 20, 23, 24}, {2, 3, 4, 7, 8, 14, 16, 18, 20, 21, 28, 29, 30}, {2, 5, 6, 7, 9, 15, 17, 19, 20, 28, 31}, {1, 3, 4, 10, 12, 13, 14, 15, 16, 17, 19, 22, 23, 28, 29}, {0, 1, 2, 3, 5, 11, 12, 14, 16, 18, 19, 22, 28}, {0, 1, 2, 3, 6, 7, 9, 11, 12, 15, 17, 19, 20, 21, 23, 26, 27, 28, 30, 31}, {0, 2, 6, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 22, 23, 26, 29, 30}, {1, 2, 5, 7, 8, 10, 11, 12, 16, 18, 19, 20, 23, 25, 27, 29, 30, 31}, {0, 1, 3, 4, 5, 6, 7, 9, 10, 13, 17, 18, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}, {0, 5, 6, 7, 8, 10, 11, 12, 13, 14, 17, 19, 24, 25, 26, 27, 28, 29}, {1, 4, 5, 6, 9, 10, 12, 15, 16, 17, 18, 19, 24, 26, 28}, {2, 5, 9, 10, 11, 12, 13, 16, 18, 19, 25, 26, 30, 31}, {3, 4, 5, 8, 9, 10, 12, 17, 18, 24, 25, 27, 30}, {0, 1, 2, 6, 12, 14, 17, 20, 24, 26, 28, 31}, {0, 3, 7, 13, 15, 16, 17, 21, 25, 27, 29, 30, 31}, {0, 1, 4, 5, 6, 7, 12, 13, 15, 19, 22, 24, 25, 27, 28}, {0, 4, 6, 12, 14, 15, 18, 19, 23, 24, 26, 27, 29}, {1, 4, 6, 10, 11, 14, 15, 17, 18, 19, 22, 23, 24, 26, 27, 31}, {0, 1, 5, 7, 10, 14, 16, 17, 18, 22, 25, 26, 30, 31}, {3, 4, 5, 7, 9, 11, 13, 15, 17, 21, 23, 25, 26, 27, 28, 30}, {2, 3, 4, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 20, 21, 22, 23, 24, 25, 26, 29, 31}, {0, 9, 10, 14, 17, 18, 19, 22, 23, 26, 27, 28, 30, 31}, {1, 8, 9, 11, 15, 16, 17, 18, 22, 26, 29, 30}, {2, 8, 9, 10, 12, 13, 14, 15, 17, 21, 23, 25, 27, 29, 30, 31}, {3, 8, 11, 12, 14, 16, 17, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30}, {4, 9, 14, 16, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, {5, 8, 9, 15, 17, 20, 22, 24, 26, 28}, {6, 11, 12, 13, 14, 15, 18, 21, 22, 25, 26, 30, 31}, {7, 10, 11, 12, 14, 19, 20, 21, 23, 24, 25, 27, 30}] @ i = 0 @ better circuit found: #XORs = 263 @ better circuit found: #XORs = 262 @ better circuit found: #XORs = 258 @ better circuit found: #XORs = 250 @ i = 50 @ i = 100 @ i = 150 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ better circuit found: #XORs = 249 @ i = 450 @ i = 500 @ better circuit found: #XORs = 248 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{6, 7, 18, 19, 24, 31}, {1, 4, 8, 16, 18, 24, 27}, {0, 9, 11, 19, 27, 29}, {0, 5, 16, 28}, {5, 21, 7, 8, 9, 11}, {2, 18, 22, 8, 26}, {5, 13, 22, 28}, {1, 2, 14, 25}, {6, 11, 12, 21, 22, 29}, {7, 8, 23, 27, 29}, {6, 13, 17, 21, 27, 29}, {5, 21, 15, 31}, {0, 4, 8, 12, 14, 15, 26, 31}, {3, 5, 6, 20}, {16, 23, 10, 31}, {5, 10, 11, 18, 24, 25, 27}, {2, 6, 10, 11, 28, 29}, {1, 9, 13, 20, 26}, {5, 6, 9, 17, 19, 22, 24, 27}, {7, 17, 19, 25, 28}, {0, 1, 6, 10, 17, 21, 31}, {2, 3, 4, 7, 9, 15, 26}, {4, 5, 12, 13, 24, 25, 30}, {12, 13, 21, 25, 26, 31}, {0, 1, 7, 8, 14, 24, 30}, {4, 9, 10, 14, 16, 18, 30}, {2, 5, 16, 22, 23, 25}, {3, 4, 8, 9, 11, 12, 17, 30}, {14, 20, 22, 24, 26}, {5, 8, 9, 14, 15, 17, 28}, {3, 11, 12, 18, 19, 22, 25}, {3, 14, 17, 19, 26, 29}] @[1, 2, 4, 5, 6, 7, 9, 10, 11, 15, 16, 18, 19, 20, 21, 22, 23, 25, 27, 29, 30, 31, 0, 3, 8, 12, 13, 14, 17, 24, 26, 28] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 671) mov r14, r0 mov r12, r1 ldr r0, [r12, #24] ldr r1, [r12, #28] ldr r2, [r12, #72] ldr r3, [r12, #76] ldr r4, [r12, #96] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s0, r0 ldr r0, [r12, #4] ldr r1, [r12, #16] ldr r2, [r12, #32] ldr r3, [r12, #64] ldr r4, [r12, #72] ldr r5, [r12, #96] ldr r6, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s1, r0 ldr r0, [r12, #0] ldr r1, [r12, #36] ldr r2, [r12, #44] ldr r3, [r12, #76] ldr r4, [r12, #108] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s2, r0 ldr r0, [r12, #0] ldr r1, [r12, #20] ldr r2, [r12, #64] ldr r3, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 vmov s3, r0 ldr r0, [r12, #20] ldr r1, [r12, #84] ldr r2, [r12, #28] ldr r3, [r12, #32] ldr r4, [r12, #36] ldr r5, [r12, #44] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s4, r0 ldr r0, [r12, #8] ldr r1, [r12, #72] ldr r2, [r12, #88] ldr r3, [r12, #32] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s5, r0 ldr r0, [r12, #20] ldr r1, [r12, #52] ldr r2, [r12, #88] ldr r3, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 vmov s6, r0 ldr r0, [r12, #4] ldr r1, [r12, #8] ldr r2, [r12, #56] ldr r3, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 vmov s7, r0 ldr r0, [r12, #24] ldr r1, [r12, #44] ldr r2, [r12, #48] ldr r3, [r12, #84] ldr r4, [r12, #88] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s8, r0 ldr r0, [r12, #28] ldr r1, [r12, #32] ldr r2, [r12, #92] ldr r3, [r12, #108] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s9, r0 ldr r0, [r12, #24] ldr r1, [r12, #52] ldr r2, [r12, #68] ldr r3, [r12, #84] ldr r4, [r12, #108] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s10, r0 ldr r0, [r12, #20] ldr r1, [r12, #84] ldr r2, [r12, #60] ldr r3, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 vmov s11, r0 ldr r0, [r12, #0] ldr r1, [r12, #16] ldr r2, [r12, #32] ldr r3, [r12, #48] ldr r4, [r12, #56] ldr r5, [r12, #60] ldr r6, [r12, #104] ldr r7, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s12, r0 ldr r0, [r12, #12] ldr r1, [r12, #20] ldr r2, [r12, #24] ldr r3, [r12, #80] eor r0, r1 eor r0, r2 eor r0, r3 vmov s13, r0 ldr r0, [r12, #64] ldr r1, [r12, #92] ldr r2, [r12, #40] ldr r3, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 vmov s14, r0 ldr r0, [r12, #20] ldr r1, [r12, #40] ldr r2, [r12, #44] ldr r3, [r12, #72] ldr r4, [r12, #96] ldr r5, [r12, #100] ldr r6, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s15, r0 ldr r0, [r12, #8] ldr r1, [r12, #24] ldr r2, [r12, #40] ldr r3, [r12, #44] ldr r4, [r12, #112] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s16, r0 ldr r0, [r12, #4] ldr r1, [r12, #36] ldr r2, [r12, #52] ldr r3, [r12, #80] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s17, r0 ldr r0, [r12, #20] ldr r1, [r12, #24] ldr r2, [r12, #36] ldr r3, [r12, #68] ldr r4, [r12, #76] ldr r5, [r12, #88] ldr r6, [r12, #96] ldr r7, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s18, r0 ldr r0, [r12, #28] ldr r1, [r12, #68] ldr r2, [r12, #76] ldr r3, [r12, #100] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s19, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #24] ldr r3, [r12, #40] ldr r4, [r12, #68] ldr r5, [r12, #84] ldr r6, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s20, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #16] ldr r3, [r12, #28] ldr r4, [r12, #36] ldr r5, [r12, #60] ldr r6, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s21, r0 ldr r0, [r12, #16] ldr r1, [r12, #20] ldr r2, [r12, #48] ldr r3, [r12, #52] ldr r4, [r12, #96] ldr r5, [r12, #100] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s22, r0 ldr r0, [r12, #48] ldr r1, [r12, #52] ldr r2, [r12, #84] ldr r3, [r12, #100] ldr r4, [r12, #104] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s23, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #28] ldr r3, [r12, #32] ldr r4, [r12, #56] ldr r5, [r12, #96] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s24, r0 ldr r0, [r12, #16] ldr r1, [r12, #36] ldr r2, [r12, #40] ldr r3, [r12, #56] ldr r4, [r12, #64] ldr r5, [r12, #72] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s25, r0 ldr r0, [r12, #8] ldr r1, [r12, #20] ldr r2, [r12, #64] ldr r3, [r12, #88] ldr r4, [r12, #92] ldr r5, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s26, r0 ldr r0, [r12, #12] ldr r1, [r12, #16] ldr r2, [r12, #32] ldr r3, [r12, #36] ldr r4, [r12, #44] ldr r5, [r12, #48] ldr r6, [r12, #68] ldr r7, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s27, r0 ldr r0, [r12, #56] ldr r1, [r12, #80] ldr r2, [r12, #88] ldr r3, [r12, #96] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s28, r0 ldr r0, [r12, #20] ldr r1, [r12, #32] ldr r2, [r12, #36] ldr r3, [r12, #56] ldr r4, [r12, #60] ldr r5, [r12, #68] ldr r6, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s29, r0 ldr r0, [r12, #12] ldr r1, [r12, #44] ldr r2, [r12, #48] ldr r3, [r12, #72] ldr r4, [r12, #76] ldr r5, [r12, #88] ldr r6, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s30, r0 ldr r0, [r12, #12] ldr r1, [r12, #56] ldr r2, [r12, #68] ldr r3, [r12, #76] ldr r4, [r12, #104] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s31, r0 vmov r0, s0; vmov r1, s3; vmov r2, s8; vmov r3, s12; vmov r4, s13; vmov r5, s14; vmov r6, s17; vmov r7, s24; vmov r8, s26; vmov r9, s28; @// f21 ^= f5 vmov r10, s21 vmov r11, s5 eor r10, r11 vmov s21, r10 @// g5 ^= g8 eor r5, r8 @// g8 ^= f5 vmov r11, s5 eor r8, r11 @// f5 ^= g9 vmov r10, s5 eor r10, r9 vmov s5, r10 @// f22 ^= g1 vmov r10, s22 eor r10, r1 vmov s22, r10 @// f7 ^= g1 vmov r10, s7 eor r10, r1 vmov s7, r10 @// g1 ^= f29 vmov r11, s29 eor r1, r11 @// g1 ^= g7 eor r1, r7 @// f5 ^= g0 vmov r10, s5 eor r10, r0 vmov s5, r10 @// g0 ^= f19 vmov r11, s19 eor r0, r11 @// f31 ^= f19 vmov r10, s31 vmov r11, s19 eor r10, r11 vmov s31, r10 @// f2 ^= f10 vmov r10, s2 vmov r11, s10 eor r10, r11 vmov s2, r10 @// f10 ^= f20 vmov r10, s10 vmov r11, s20 eor r10, r11 vmov s10, r10 @// f6 ^= g8 vmov r10, s6 eor r10, r8 vmov s6, r10 @// f6 ^= f30 vmov r10, s6 vmov r11, s30 eor r10, r11 vmov s6, r10 @// f31 ^= g9 vmov r10, s31 eor r10, r9 vmov s31, r10 @// g7 ^= f9 vmov r11, s9 eor r7, r11 @// f31 ^= f30 vmov r10, s31 vmov r11, s30 eor r10, r11 vmov s31, r10 @// f19 ^= f9 vmov r10, s19 vmov r11, s9 eor r10, r11 vmov s19, r10 @// f9 ^= f10 vmov r10, s9 vmov r11, s10 eor r10, r11 vmov s9, r10 @// f10 ^= g6 vmov r10, s10 eor r10, r6 vmov s10, r10 @// f19 ^= g0 vmov r10, s19 eor r10, r0 vmov s19, r10 @// g0 ^= f20 vmov r11, s20 eor r0, r11 @// f20 ^= g2 vmov r10, s20 eor r10, r2 vmov s20, r10 @// f23 ^= g2 vmov r10, s23 eor r10, r2 vmov s23, r10 @// g2 ^= g4 eor r2, r4 @// g2 ^= g9 eor r2, r9 @// f11 ^= g5 vmov r10, s11 eor r10, r5 vmov s11, r10 @// g8 ^= g5 eor r8, r5 @// g5 ^= f15 vmov r11, s15 eor r5, r11 @// f1 ^= f15 vmov r10, s1 vmov r11, s15 eor r10, r11 vmov s1, r10 @// g2 ^= g7 eor r2, r7 @// g7 ^= g0 eor r7, r0 @// f23 ^= f2 vmov r10, s23 vmov r11, s2 eor r10, r11 vmov s23, r10 @// f7 ^= g4 vmov r10, s7 eor r10, r4 vmov s7, r10 @// f21 ^= g3 vmov r10, s21 eor r10, r3 vmov s21, r10 @// g6 ^= g4 eor r6, r4 @// f22 ^= g1 vmov r10, s22 eor r10, r1 vmov s22, r10 @// f10 ^= f9 vmov r10, s10 vmov r11, s9 eor r10, r11 vmov s10, r10 @// f4 ^= g4 vmov r10, s4 eor r10, r4 vmov s4, r10 @// f9 ^= f5 vmov r10, s9 vmov r11, s5 eor r10, r11 vmov s9, r10 @// g6 ^= f18 vmov r11, s18 eor r6, r11 @// g8 ^= f21 vmov r11, s21 eor r8, r11 @// g5 ^= g6 eor r5, r6 @// g6 ^= g1 eor r6, r1 @// g4 ^= f30 vmov r11, s30 eor r4, r11 @// f30 ^= f27 vmov r10, s30 vmov r11, s27 eor r10, r11 vmov s30, r10 @// f30 ^= f2 vmov r10, s30 vmov r11, s2 eor r10, r11 vmov s30, r10 @// g6 ^= f23 vmov r11, s23 eor r6, r11 @// f7 ^= g4 vmov r10, s7 eor r10, r4 vmov s7, r10 @// f2 ^= g4 vmov r10, s2 eor r10, r4 vmov s2, r10 @// g4 ^= f1 vmov r11, s1 eor r4, r11 @// f4 ^= f16 vmov r10, s4 vmov r11, s16 eor r10, r11 vmov s4, r10 @// f1 ^= f27 vmov r10, s1 vmov r11, s27 eor r10, r11 vmov s1, r10 @// f30 ^= g3 vmov r10, s30 eor r10, r3 vmov s30, r10 @// g1 ^= f27 vmov r11, s27 eor r1, r11 @// g0 ^= f16 vmov r11, s16 eor r0, r11 @// f4 ^= f25 vmov r10, s4 vmov r11, s25 eor r10, r11 vmov s4, r10 @// f5 ^= f29 vmov r10, s5 vmov r11, s29 eor r10, r11 vmov s5, r10 @// f19 ^= g3 vmov r10, s19 eor r10, r3 vmov s19, r10 @// g3 ^= f20 vmov r11, s20 eor r3, r11 @// f29 ^= g9 vmov r10, s29 eor r10, r9 vmov s29, r10 @// f16 ^= f20 vmov r10, s16 vmov r11, s20 eor r10, r11 vmov s16, r10 @// f16 ^= g9 vmov r10, s16 eor r10, r9 vmov s16, r10 @// g9 ^= f25 vmov r11, s25 eor r9, r11 @// g9 ^= g7 eor r9, r7 @// f31 ^= g7 vmov r10, s31 eor r10, r7 vmov s31, r10 @// f21 ^= f1 vmov r10, s21 vmov r11, s1 eor r10, r11 vmov s21, r10 @// g7 ^= f23 vmov r11, s23 eor r7, r11 @// f11 ^= g0 vmov r10, s11 eor r10, r0 vmov s11, r10 @// g5 ^= f1 vmov r11, s1 eor r5, r11 @// g4 ^= f29 vmov r11, s29 eor r4, r11 @// f25 ^= g3 vmov r10, s25 eor r10, r3 vmov s25, r10 @// g8 ^= g6 eor r8, r6 @// f1 ^= f31 vmov r10, s1 vmov r11, s31 eor r10, r11 vmov s1, r10 @// g0 ^= g3 eor r0, r3 @// f18 ^= f22 vmov r10, s18 vmov r11, s22 eor r10, r11 vmov s18, r10 @// f22 ^= g2 vmov r10, s22 eor r10, r2 vmov s22, r10 @// f6 ^= g3 vmov r10, s6 eor r10, r3 vmov s6, r10 @// f15 ^= f27 vmov r10, s15 vmov r11, s27 eor r10, r11 vmov s15, r10 @// f20 ^= f19 vmov r10, s20 vmov r11, s19 eor r10, r11 vmov s20, r10 @// g3 ^= f18 vmov r11, s18 eor r3, r11 @// f2 ^= g1 vmov r10, s2 eor r10, r1 vmov s2, r10 @// g1 ^= f9 vmov r11, s9 eor r1, r11 @// f10 ^= g5 vmov r10, s10 eor r10, r5 vmov s10, r10 @// g2 ^= f5 vmov r11, s5 eor r2, r11 @// f27 ^= g9 vmov r10, s27 eor r10, r9 vmov s27, r10 @// f9 ^= f25 vmov r10, s9 vmov r11, s25 eor r10, r11 vmov s9, r10 @// f11 ^= f22 vmov r10, s11 vmov r11, s22 eor r10, r11 vmov s11, r10 @// f23 ^= g1 vmov r10, s23 eor r10, r1 vmov s23, r10 str r0, [r14, #0] vstr.32 s1, [r14, #4] vstr.32 s2, [r14, #8] str r1, [r14, #12] vstr.32 s4, [r14, #16] vstr.32 s5, [r14, #20] vstr.32 s6, [r14, #24] vstr.32 s7, [r14, #28] str r2, [r14, #32] vstr.32 s9, [r14, #36] vstr.32 s10, [r14, #40] vstr.32 s11, [r14, #44] str r3, [r14, #48] str r4, [r14, #52] str r5, [r14, #56] vstr.32 s15, [r14, #60] vstr.32 s16, [r14, #64] str r6, [r14, #68] vstr.32 s18, [r14, #72] vstr.32 s19, [r14, #76] vstr.32 s20, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] vstr.32 s23, [r14, #92] str r7, [r14, #96] vstr.32 s25, [r14, #100] str r8, [r14, #104] vstr.32 s27, [r14, #108] str r9, [r14, #112] vstr.32 s29, [r14, #116] vstr.32 s30, [r14, #120] vstr.32 s31, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v24, .-gft_mul_v24 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v25 .type gft_mul_v25, %function .align 2 gft_mul_v25: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{0, 4, 7, 11, 13, 15, 16, 17, 19, 20, 21, 23, 27, 28, 30, 31}, {1, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 18, 19, 20, 22, 23, 26, 27, 29, 30}, {2, 4, 8, 10, 12, 14, 15, 16, 19, 20, 23, 24, 26, 29, 30, 31}, {3, 5, 9, 11, 13, 14, 17, 18, 19, 21, 22, 23, 25, 27, 28, 29, 30}, {0, 2, 3, 6, 7, 10, 14, 15, 17, 19, 20, 24, 25, 26, 27, 28, 29, 30}, {1, 2, 6, 11, 14, 16, 17, 18, 19, 21, 24, 26, 28, 31}, {1, 2, 3, 5, 7, 8, 9, 10, 11, 13, 15, 16, 18, 19, 22, 25, 26, 28, 29}, {0, 1, 2, 4, 5, 6, 7, 8, 10, 12, 13, 14, 15, 17, 18, 23, 24, 25, 27, 28}, {0, 1, 3, 4, 5, 9, 11, 13, 15, 16, 20, 21, 25, 27, 31}, {0, 2, 3, 4, 8, 9, 10, 11, 12, 13, 14, 15, 17, 20, 24, 25, 26, 27, 30, 31}, {0, 3, 6, 7, 8, 10, 11, 12, 14, 15, 18, 22, 23, 24, 26, 27, 28, 30}, {1, 2, 3, 6, 9, 10, 13, 14, 19, 22, 25, 26, 29, 31}, {0, 3, 5, 10, 13, 14, 15, 16, 19, 23, 24, 25, 28, 31}, {1, 2, 3, 4, 5, 11, 12, 13, 14, 17, 18, 19, 22, 23, 24, 29, 30, 31}, {0, 7, 8, 9, 10, 11, 13, 16, 20, 22, 26, 27, 28}, {1, 6, 7, 8, 10, 12, 13, 17, 21, 23, 26, 29}, {0, 3, 5, 6, 7, 13, 15, 19, 20, 21, 22, 27}, {1, 2, 3, 4, 5, 6, 12, 13, 14, 15, 18, 19, 20, 23, 26, 27}, {0, 5, 12, 14, 15, 16, 18, 20, 21, 24, 26}, {1, 4, 5, 13, 14, 17, 19, 20, 25, 27}, {1, 2, 4, 5, 6, 7, 10, 14, 16, 17, 19, 20, 21, 31}, {0, 1, 3, 4, 6, 11, 15, 16, 18, 19, 20, 30, 31}, {0, 1, 2, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 19, 22, 23, 28, 30}, {0, 3, 4, 5, 7, 8, 10, 12, 14, 17, 18, 19, 22, 29, 31}, {1, 8, 9, 11, 13, 14, 15, 16, 19, 20, 21, 24, 30}, {0, 1, 8, 10, 11, 12, 13, 14, 17, 18, 19, 20, 25, 31}, {3, 8, 11, 13, 16, 22, 23, 26, 28, 29, 30, 31}, {2, 3, 9, 10, 11, 12, 13, 17, 22, 27, 28, 30}, {5, 9, 10, 12, 14, 15, 16, 19, 25, 28, 29}, {4, 5, 8, 9, 11, 13, 14, 17, 18, 19, 24, 25, 28}, {7, 8, 9, 10, 13, 14, 15, 16, 27, 30, 31}, {6, 7, 8, 11, 12, 13, 14, 17, 26, 27, 30}] @ i = 0 @ better circuit found: #XORs = 260 @ better circuit found: #XORs = 257 @ better circuit found: #XORs = 252 @ i = 50 @ better circuit found: #XORs = 251 @ i = 100 @ i = 150 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ i = 450 @ i = 500 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{0, 1, 3, 9, 21, 23}, {0, 2, 4, 7, 10, 16, 28, 31}, {3, 24, 28, 31, 14}, {1, 8, 21, 23, 26, 27, 30}, {2, 7, 8, 13, 14, 29, 30}, {1, 6, 15, 23, 24}, {8, 11, 14, 25, 31}, {0, 3, 5, 11, 17, 25, 28}, {5, 14, 15, 21, 25, 30}, {4, 8, 18, 28}, {4, 23, 24, 12, 31}, {19}, {18, 7, 9, 14}, {17, 21, 22, 28, 30}, {4, 6, 14, 20, 22, 23}, {26, 25, 10, 29, 31}, {7, 8, 11, 20, 21, 24, 28}, {5, 6, 12, 17}, {10, 5, 21, 26, 28}, {6, 7, 13, 15, 27}, {13, 10, 8, 25}, {3, 10, 12, 16, 18, 31}, {7, 12, 22, 24, 25}, {3, 14, 15, 20, 26, 29}, {0, 12, 14, 16, 18, 25}, {0, 9, 15, 17, 18, 28}, {2, 9, 11, 23, 26}, {4, 15, 16, 23, 27, 28}, {6, 10, 12, 29}, {1, 3, 7, 14, 20, 25, 27}, {30, 23, 11, 14}, {5, 23, 9, 27, 29}] @[0, 1, 2, 3, 4, 7, 8, 10, 12, 14, 15, 16, 18, 21, 22, 24, 26, 27, 28, 29, 30, 31, 5, 6, 9, 11, 13, 17, 19, 20, 23, 25] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 692) mov r14, r0 mov r12, r1 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #12] ldr r3, [r12, #36] ldr r4, [r12, #84] ldr r5, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s0, r0 ldr r0, [r12, #0] ldr r1, [r12, #8] ldr r2, [r12, #16] ldr r3, [r12, #28] ldr r4, [r12, #40] ldr r5, [r12, #64] ldr r6, [r12, #112] ldr r7, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s1, r0 ldr r0, [r12, #12] ldr r1, [r12, #96] ldr r2, [r12, #112] ldr r3, [r12, #124] ldr r4, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s2, r0 ldr r0, [r12, #4] ldr r1, [r12, #32] ldr r2, [r12, #84] ldr r3, [r12, #92] ldr r4, [r12, #104] ldr r5, [r12, #108] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s3, r0 ldr r0, [r12, #8] ldr r1, [r12, #28] ldr r2, [r12, #32] ldr r3, [r12, #52] ldr r4, [r12, #56] ldr r5, [r12, #116] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s4, r0 ldr r0, [r12, #4] ldr r1, [r12, #24] ldr r2, [r12, #60] ldr r3, [r12, #92] ldr r4, [r12, #96] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s5, r0 ldr r0, [r12, #32] ldr r1, [r12, #44] ldr r2, [r12, #56] ldr r3, [r12, #100] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s6, r0 ldr r0, [r12, #0] ldr r1, [r12, #12] ldr r2, [r12, #20] ldr r3, [r12, #44] ldr r4, [r12, #68] ldr r5, [r12, #100] ldr r6, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s7, r0 ldr r0, [r12, #20] ldr r1, [r12, #56] ldr r2, [r12, #60] ldr r3, [r12, #84] ldr r4, [r12, #100] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s8, r0 ldr r0, [r12, #16] ldr r1, [r12, #32] ldr r2, [r12, #72] ldr r3, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 vmov s9, r0 ldr r0, [r12, #16] ldr r1, [r12, #92] ldr r2, [r12, #96] ldr r3, [r12, #48] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s10, r0 ldr r0, [r12, #76] vmov s11, r0 ldr r0, [r12, #72] ldr r1, [r12, #28] ldr r2, [r12, #36] ldr r3, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 vmov s12, r0 ldr r0, [r12, #68] ldr r1, [r12, #84] ldr r2, [r12, #88] ldr r3, [r12, #112] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s13, r0 ldr r0, [r12, #16] ldr r1, [r12, #24] ldr r2, [r12, #56] ldr r3, [r12, #80] ldr r4, [r12, #88] ldr r5, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s14, r0 ldr r0, [r12, #104] ldr r1, [r12, #100] ldr r2, [r12, #40] ldr r3, [r12, #116] ldr r4, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s15, r0 ldr r0, [r12, #28] ldr r1, [r12, #32] ldr r2, [r12, #44] ldr r3, [r12, #80] ldr r4, [r12, #84] ldr r5, [r12, #96] ldr r6, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s16, r0 ldr r0, [r12, #20] ldr r1, [r12, #24] ldr r2, [r12, #48] ldr r3, [r12, #68] eor r0, r1 eor r0, r2 eor r0, r3 vmov s17, r0 ldr r0, [r12, #40] ldr r1, [r12, #20] ldr r2, [r12, #84] ldr r3, [r12, #104] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s18, r0 ldr r0, [r12, #24] ldr r1, [r12, #28] ldr r2, [r12, #52] ldr r3, [r12, #60] ldr r4, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s19, r0 ldr r0, [r12, #52] ldr r1, [r12, #40] ldr r2, [r12, #32] ldr r3, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 vmov s20, r0 ldr r0, [r12, #12] ldr r1, [r12, #40] ldr r2, [r12, #48] ldr r3, [r12, #64] ldr r4, [r12, #72] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s21, r0 ldr r0, [r12, #28] ldr r1, [r12, #48] ldr r2, [r12, #88] ldr r3, [r12, #96] ldr r4, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s22, r0 ldr r0, [r12, #12] ldr r1, [r12, #56] ldr r2, [r12, #60] ldr r3, [r12, #80] ldr r4, [r12, #104] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s23, r0 ldr r0, [r12, #0] ldr r1, [r12, #48] ldr r2, [r12, #56] ldr r3, [r12, #64] ldr r4, [r12, #72] ldr r5, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s24, r0 ldr r0, [r12, #0] ldr r1, [r12, #36] ldr r2, [r12, #60] ldr r3, [r12, #68] ldr r4, [r12, #72] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s25, r0 ldr r0, [r12, #8] ldr r1, [r12, #36] ldr r2, [r12, #44] ldr r3, [r12, #92] ldr r4, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s26, r0 ldr r0, [r12, #16] ldr r1, [r12, #60] ldr r2, [r12, #64] ldr r3, [r12, #92] ldr r4, [r12, #108] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s27, r0 ldr r0, [r12, #24] ldr r1, [r12, #40] ldr r2, [r12, #48] ldr r3, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 vmov s28, r0 ldr r0, [r12, #4] ldr r1, [r12, #12] ldr r2, [r12, #28] ldr r3, [r12, #56] ldr r4, [r12, #80] ldr r5, [r12, #100] ldr r6, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s29, r0 ldr r0, [r12, #120] ldr r1, [r12, #92] ldr r2, [r12, #44] ldr r3, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 vmov s30, r0 ldr r0, [r12, #20] ldr r1, [r12, #92] ldr r2, [r12, #36] ldr r3, [r12, #108] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s31, r0 vmov r0, s5; vmov r1, s6; vmov r2, s9; vmov r3, s11; vmov r4, s13; vmov r5, s17; vmov r6, s19; vmov r7, s20; vmov r8, s23; vmov r9, s25; @// g2 ^= g3 eor r2, r3 @// f8 ^= g3 vmov r10, s8 eor r10, r3 vmov s8, r10 @// f28 ^= g3 vmov r10, s28 eor r10, r3 vmov s28, r10 @// f10 ^= f22 vmov r10, s10 vmov r11, s22 eor r10, r11 vmov s10, r10 @// f18 ^= f15 vmov r10, s18 vmov r11, s15 eor r10, r11 vmov s18, r10 @// f14 ^= g0 vmov r10, s14 eor r10, r0 vmov s14, r10 @// f22 ^= g3 vmov r10, s22 eor r10, r3 vmov s22, r10 @// g3 ^= g7 eor r3, r7 @// g7 ^= f15 vmov r11, s15 eor r7, r11 @// g7 ^= f18 vmov r11, s18 eor r7, r11 @// g7 ^= f16 vmov r11, s16 eor r7, r11 @// f15 ^= g1 vmov r10, s15 eor r10, r1 vmov s15, r10 @// f21 ^= f28 vmov r10, s21 vmov r11, s28 eor r10, r11 vmov s21, r10 @// g2 ^= g3 eor r2, r3 @// f16 ^= g4 vmov r10, s16 eor r10, r4 vmov s16, r10 @// f29 ^= f2 vmov r10, s29 vmov r11, s2 eor r10, r11 vmov s29, r10 @// g0 ^= f30 vmov r11, s30 eor r0, r11 @// g0 ^= f8 vmov r11, s8 eor r0, r11 @// g0 ^= g3 eor r0, r3 @// f8 ^= f12 vmov r10, s8 vmov r11, s12 eor r10, r11 vmov s8, r10 @// f8 ^= g6 vmov r10, s8 eor r10, r6 vmov s8, r10 @// f4 ^= g6 vmov r10, s4 eor r10, r6 vmov s4, r10 @// g4 ^= f18 vmov r11, s18 eor r4, r11 @// g4 ^= f7 vmov r11, s7 eor r4, r11 @// g1 ^= f12 vmov r11, s12 eor r1, r11 @// f0 ^= g9 vmov r10, s0 eor r10, r9 vmov s0, r10 @// f0 ^= g6 vmov r10, s0 eor r10, r6 vmov s0, r10 @// g6 ^= g5 eor r6, r5 @// f12 ^= g9 vmov r10, s12 eor r10, r9 vmov s12, r10 @// g5 ^= g9 eor r5, r9 @// g9 ^= g2 eor r9, r2 @// g9 ^= g6 eor r9, r6 @// f18 ^= f2 vmov r10, s18 vmov r11, s2 eor r10, r11 vmov s18, r10 @// f18 ^= f24 vmov r10, s18 vmov r11, s24 eor r10, r11 vmov s18, r10 @// f24 ^= g5 vmov r10, s24 eor r10, r5 vmov s24, r10 @// g5 ^= f7 vmov r11, s7 eor r5, r11 @// f16 ^= g1 vmov r10, s16 eor r10, r1 vmov s16, r10 @// f7 ^= g3 vmov r10, s7 eor r10, r3 vmov s7, r10 @// g3 ^= f15 vmov r11, s15 eor r3, r11 @// g3 ^= f26 vmov r11, s26 eor r3, r11 @// f31 ^= g6 vmov r10, s31 eor r10, r6 vmov s31, r10 @// g6 ^= f22 vmov r11, s22 eor r6, r11 @// g6 ^= f14 vmov r11, s14 eor r6, r11 @// f12 ^= f2 vmov r10, s12 vmov r11, s2 eor r10, r11 vmov s12, r10 @// f29 ^= g1 vmov r10, s29 eor r10, r1 vmov s29, r10 @// f21 ^= f2 vmov r10, s21 vmov r11, s2 eor r10, r11 vmov s21, r10 @// g0 ^= f28 vmov r11, s28 eor r0, r11 @// f16 ^= f12 vmov r10, s16 vmov r11, s12 eor r10, r11 vmov s16, r10 @// g5 ^= f26 vmov r11, s26 eor r5, r11 @// f2 ^= g8 vmov r10, s2 eor r10, r8 vmov s2, r10 @// f18 ^= g8 vmov r10, s18 eor r10, r8 vmov s18, r10 @// g8 ^= f14 vmov r11, s14 eor r8, r11 @// g3 ^= f10 vmov r11, s10 eor r3, r11 @// f10 ^= f12 vmov r10, s10 vmov r11, s12 eor r10, r11 vmov s10, r10 @// f12 ^= f31 vmov r10, s12 vmov r11, s31 eor r10, r11 vmov s12, r10 @// f2 ^= f27 vmov r10, s2 vmov r11, s27 eor r10, r11 vmov s2, r10 @// f30 ^= f27 vmov r10, s30 vmov r11, s27 eor r10, r11 vmov s30, r10 @// f27 ^= f31 vmov r10, s27 vmov r11, s31 eor r10, r11 vmov s27, r10 @// f31 ^= f26 vmov r10, s31 vmov r11, s26 eor r10, r11 vmov s31, r10 @// g3 ^= g8 eor r3, r8 @// f22 ^= f28 vmov r10, s22 vmov r11, s28 eor r10, r11 vmov s22, r10 @// g9 ^= g1 eor r9, r1 @// f1 ^= g4 vmov r10, s1 eor r10, r4 vmov s1, r10 @// g4 ^= g8 eor r4, r8 @// g0 ^= f31 vmov r11, s31 eor r0, r11 @// f16 ^= f8 vmov r10, s16 vmov r11, s8 eor r10, r11 vmov s16, r10 @// f27 ^= f1 vmov r10, s27 vmov r11, s1 eor r10, r11 vmov s27, r10 @// f10 ^= g2 vmov r10, s10 eor r10, r2 vmov s10, r10 @// f26 ^= g1 vmov r10, s26 eor r10, r1 vmov s26, r10 @// f26 ^= g2 vmov r10, s26 eor r10, r2 vmov s26, r10 @// f7 ^= g7 vmov r10, s7 eor r10, r7 vmov s7, r10 @// f14 ^= g4 vmov r10, s14 eor r10, r4 vmov s14, r10 @// g4 ^= f26 vmov r11, s26 eor r4, r11 @// f26 ^= f1 vmov r10, s26 vmov r11, s1 eor r10, r11 vmov s26, r10 @// f21 ^= g8 vmov r10, s21 eor r10, r8 vmov s21, r10 @// f21 ^= f14 vmov r10, s21 vmov r11, s14 eor r10, r11 vmov s21, r10 @// f15 ^= f3 vmov r10, s15 vmov r11, s3 eor r10, r11 vmov s15, r10 @// f3 ^= g8 vmov r10, s3 eor r10, r8 vmov s3, r10 @// g8 ^= g7 eor r8, r7 @// g7 ^= g2 eor r7, r2 @// f31 ^= f4 vmov r10, s31 vmov r11, s4 eor r10, r11 vmov s31, r10 @// g2 ^= g1 eor r2, r1 @// f30 ^= g2 vmov r10, s30 eor r10, r2 vmov s30, r10 @// f2 ^= f28 vmov r10, s2 vmov r11, s28 eor r10, r11 vmov s2, r10 @// g2 ^= f28 vmov r11, s28 eor r2, r11 @// g1 ^= f24 vmov r11, s24 eor r1, r11 @// f28 ^= f24 vmov r10, s28 vmov r11, s24 eor r10, r11 vmov s28, r10 @// f24 ^= f8 vmov r10, s24 vmov r11, s8 eor r10, r11 vmov s24, r10 @// g3 ^= f22 vmov r11, s22 eor r3, r11 @// g0 ^= g1 eor r0, r1 @// f24 ^= f29 vmov r10, s24 vmov r11, s29 eor r10, r11 vmov s24, r10 @// f14 ^= f30 vmov r10, s14 vmov r11, s30 eor r10, r11 vmov s14, r10 @// f15 ^= f31 vmov r10, s15 vmov r11, s31 eor r10, r11 vmov s15, r10 @// g9 ^= g6 eor r9, r6 @// f29 ^= g6 vmov r10, s29 eor r10, r6 vmov s29, r10 @// f2 ^= f4 vmov r10, s2 vmov r11, s4 eor r10, r11 vmov s2, r10 @// g7 ^= g0 eor r7, r0 @// g8 ^= g9 eor r8, r9 @// f22 ^= g4 vmov r10, s22 eor r10, r4 vmov s22, r10 @// f12 ^= f28 vmov r10, s12 vmov r11, s28 eor r10, r11 vmov s12, r10 @// f0 ^= f21 vmov r10, s0 vmov r11, s21 eor r10, r11 vmov s0, r10 @// f8 ^= f21 vmov r10, s8 vmov r11, s21 eor r10, r11 vmov s8, r10 @// g5 ^= g6 eor r5, r6 @// g4 ^= g8 eor r4, r8 @// f22 ^= f28 vmov r10, s22 vmov r11, s28 eor r10, r11 vmov s22, r10 @// f10 ^= f31 vmov r10, s10 vmov r11, s31 eor r10, r11 vmov s10, r10 @// f3 ^= f29 vmov r10, s3 vmov r11, s29 eor r10, r11 vmov s3, r10 @// g1 ^= g3 eor r1, r3 @// f4 ^= f7 vmov r10, s4 vmov r11, s7 eor r10, r11 vmov s4, r10 @// f7 ^= g5 vmov r10, s7 eor r10, r5 vmov s7, r10 @// g2 ^= f4 vmov r11, s4 eor r2, r11 @// f1 ^= g5 vmov r10, s1 eor r10, r5 vmov s1, r10 vstr.32 s0, [r14, #0] vstr.32 s1, [r14, #4] vstr.32 s2, [r14, #8] vstr.32 s3, [r14, #12] vstr.32 s4, [r14, #16] str r0, [r14, #20] str r1, [r14, #24] vstr.32 s7, [r14, #28] vstr.32 s8, [r14, #32] str r2, [r14, #36] vstr.32 s10, [r14, #40] str r3, [r14, #44] vstr.32 s12, [r14, #48] str r4, [r14, #52] vstr.32 s14, [r14, #56] vstr.32 s15, [r14, #60] vstr.32 s16, [r14, #64] str r5, [r14, #68] vstr.32 s18, [r14, #72] str r6, [r14, #76] str r7, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] str r8, [r14, #92] vstr.32 s24, [r14, #96] str r9, [r14, #100] vstr.32 s26, [r14, #104] vstr.32 s27, [r14, #108] vstr.32 s28, [r14, #112] vstr.32 s29, [r14, #116] vstr.32 s30, [r14, #120] vstr.32 s31, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v25, .-gft_mul_v25 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v26 .type gft_mul_v26, %function .align 2 gft_mul_v26: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{5, 7, 8, 9, 10, 12, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 31}, {4, 5, 6, 7, 8, 11, 13, 15, 16, 18, 20, 22, 24, 26, 30, 31}, {4, 6, 7, 8, 9, 12, 13, 15, 17, 18, 21, 22, 25, 26, 28, 30}, {5, 6, 8, 12, 14, 15, 16, 17, 19, 20, 21, 23, 24, 25, 27, 29, 31}, {2, 6, 10, 11, 12, 13, 15, 19, 20, 21, 22, 24, 25, 30, 31}, {3, 7, 10, 12, 14, 15, 18, 19, 20, 23, 24, 30}, {0, 1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 15, 16, 18, 20, 21, 26, 27, 29, 31}, {0, 2, 4, 6, 8, 9, 10, 11, 13, 14, 15, 17, 19, 20, 26, 28, 29, 30, 31}, {0, 1, 3, 4, 5, 7, 8, 9, 11, 12, 17, 19, 23, 24, 26, 28, 29, 30}, {0, 2, 3, 4, 6, 7, 8, 10, 11, 13, 16, 17, 18, 19, 22, 23, 25, 27, 28, 31}, {0, 3, 4, 7, 8, 11, 14, 16, 18, 19, 20, 22, 24, 25, 27, 28, 29}, {1, 2, 3, 5, 6, 7, 9, 10, 11, 15, 17, 18, 21, 23, 24, 26, 27, 28}, {1, 3, 4, 9, 10, 11, 12, 14, 16, 17, 20, 23, 24, 25, 27, 29, 30, 31}, {0, 1, 2, 3, 5, 8, 9, 10, 13, 15, 16, 21, 22, 23, 24, 26, 27, 28, 29, 30}, {0, 2, 3, 6, 9, 12, 13, 15, 18, 19, 20, 24, 27, 29}, {1, 2, 7, 8, 9, 12, 14, 15, 18, 21, 25, 26, 27, 28, 29}, {1, 6, 15, 17, 21, 22, 23, 24, 25, 26, 28, 30, 31}, {0, 1, 7, 14, 15, 16, 17, 20, 21, 22, 24, 27, 29, 30}, {3, 4, 5, 6, 7, 12, 14, 19, 21, 24, 25, 29, 30, 31}, {2, 3, 4, 6, 13, 15, 18, 19, 20, 21, 24, 28, 29, 30}, {1, 8, 9, 12, 13, 17, 18, 22, 24, 25, 26, 27, 31}, {0, 1, 8, 12, 16, 17, 19, 23, 24, 26, 30, 31}, {3, 10, 11, 14, 15, 16, 17, 18, 20, 21, 22, 23, 25, 26, 28, 30}, {2, 3, 10, 14, 16, 19, 20, 22, 24, 25, 27, 29, 31}, {0, 3, 8, 9, 11, 14, 17, 20, 21, 23, 28, 30}, {1, 2, 3, 8, 10, 11, 15, 16, 17, 20, 22, 23, 29, 31}, {0, 8, 11, 12, 13, 14, 15, 19, 20, 23, 28, 29, 31}, {1, 9, 10, 11, 12, 14, 18, 19, 21, 22, 23, 28, 30, 31}, {4, 7, 9, 12, 15, 17, 19, 23, 26, 27, 30, 31}, {5, 6, 7, 8, 9, 13, 14, 15, 16, 17, 18, 19, 22, 23, 26, 30}, {4, 11, 12, 16, 18, 19, 20, 22, 25, 27, 29, 31}, {5, 10, 11, 13, 17, 18, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31}] @ i = 0 @ better circuit found: #XORs = 286 @ better circuit found: #XORs = 268 @ better circuit found: #XORs = 257 @ better circuit found: #XORs = 256 @ i = 50 @ better circuit found: #XORs = 245 @ i = 100 @ i = 150 @ better circuit found: #XORs = 243 @ i = 200 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ i = 450 @ i = 500 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ i = 900 @ i = 950 @ initial state: [{0, 7, 6, 12}, {4, 19, 20, 21, 24, 29}, {4, 10, 13, 25, 30}, {17, 9, 15}, {20, 5, 22, 23, 21}, {2, 7, 14, 19, 20, 23, 29, 30}, {0, 7, 13, 26, 27}, {2, 11, 17, 18, 22, 25}, {7, 15, 16, 21, 28, 31}, {17, 24, 30, 31}, {16, 19, 5, 7, 25}, {17, 22, 6, 12, 8}, {3, 5, 21, 9, 14}, {11, 15, 23, 30}, {2, 6, 8, 11, 15, 25}, {0, 8, 9, 10, 18, 29}, {9, 21, 25, 27, 28, 30}, {11, 16, 21, 25}, {19, 6, 7, 15}, {0, 4, 12, 25}, {23, 6, 8, 13}, {1, 7, 8, 19, 21, 23}, {18, 19, 26, 5, 10}, {4, 11, 14, 15, 29}, {16, 17, 20, 5, 27}, {18, 25, 12, 15}, {2, 3, 29, 24, 25, 10}, {6, 9, 26}, {19, 22, 24, 25, 12}, {0, 3, 9, 16, 19, 23, 28}, {4, 23, 21, 31}, {0, 1, 5, 9, 11}] @[0, 1, 4, 5, 7, 8, 12, 13, 14, 15, 16, 17, 19, 21, 22, 23, 24, 25, 26, 28, 29, 31, 2, 3, 6, 9, 10, 11, 18, 20, 27, 30] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 676) mov r14, r0 mov r12, r1 ldr r0, [r12, #0] ldr r1, [r12, #28] ldr r2, [r12, #24] ldr r3, [r12, #48] eor r0, r1 eor r0, r2 eor r0, r3 vmov s0, r0 ldr r0, [r12, #16] ldr r1, [r12, #76] ldr r2, [r12, #80] ldr r3, [r12, #84] ldr r4, [r12, #96] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s1, r0 ldr r0, [r12, #16] ldr r1, [r12, #40] ldr r2, [r12, #52] ldr r3, [r12, #100] ldr r4, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s2, r0 ldr r0, [r12, #68] ldr r1, [r12, #36] ldr r2, [r12, #60] eor r0, r1 eor r0, r2 vmov s3, r0 ldr r0, [r12, #80] ldr r1, [r12, #20] ldr r2, [r12, #88] ldr r3, [r12, #92] ldr r4, [r12, #84] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s4, r0 ldr r0, [r12, #8] ldr r1, [r12, #28] ldr r2, [r12, #56] ldr r3, [r12, #76] ldr r4, [r12, #80] ldr r5, [r12, #92] ldr r6, [r12, #116] ldr r7, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s5, r0 ldr r0, [r12, #0] ldr r1, [r12, #28] ldr r2, [r12, #52] ldr r3, [r12, #104] ldr r4, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s6, r0 ldr r0, [r12, #8] ldr r1, [r12, #44] ldr r2, [r12, #68] ldr r3, [r12, #72] ldr r4, [r12, #88] ldr r5, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s7, r0 ldr r0, [r12, #28] ldr r1, [r12, #60] ldr r2, [r12, #64] ldr r3, [r12, #84] ldr r4, [r12, #112] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s8, r0 ldr r0, [r12, #68] ldr r1, [r12, #96] ldr r2, [r12, #120] ldr r3, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 vmov s9, r0 ldr r0, [r12, #64] ldr r1, [r12, #76] ldr r2, [r12, #20] ldr r3, [r12, #28] ldr r4, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s10, r0 ldr r0, [r12, #68] ldr r1, [r12, #88] ldr r2, [r12, #24] ldr r3, [r12, #48] ldr r4, [r12, #32] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s11, r0 ldr r0, [r12, #12] ldr r1, [r12, #20] ldr r2, [r12, #84] ldr r3, [r12, #36] ldr r4, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s12, r0 ldr r0, [r12, #44] ldr r1, [r12, #60] ldr r2, [r12, #92] ldr r3, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 vmov s13, r0 ldr r0, [r12, #8] ldr r1, [r12, #24] ldr r2, [r12, #32] ldr r3, [r12, #44] ldr r4, [r12, #60] ldr r5, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s14, r0 ldr r0, [r12, #0] ldr r1, [r12, #32] ldr r2, [r12, #36] ldr r3, [r12, #40] ldr r4, [r12, #72] ldr r5, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s15, r0 ldr r0, [r12, #36] ldr r1, [r12, #84] ldr r2, [r12, #100] ldr r3, [r12, #108] ldr r4, [r12, #112] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s16, r0 ldr r0, [r12, #44] ldr r1, [r12, #64] ldr r2, [r12, #84] ldr r3, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 vmov s17, r0 ldr r0, [r12, #76] ldr r1, [r12, #24] ldr r2, [r12, #28] ldr r3, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 vmov s18, r0 ldr r0, [r12, #0] ldr r1, [r12, #16] ldr r2, [r12, #48] ldr r3, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 vmov s19, r0 ldr r0, [r12, #92] ldr r1, [r12, #24] ldr r2, [r12, #32] ldr r3, [r12, #52] eor r0, r1 eor r0, r2 eor r0, r3 vmov s20, r0 ldr r0, [r12, #4] ldr r1, [r12, #28] ldr r2, [r12, #32] ldr r3, [r12, #76] ldr r4, [r12, #84] ldr r5, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s21, r0 ldr r0, [r12, #72] ldr r1, [r12, #76] ldr r2, [r12, #104] ldr r3, [r12, #20] ldr r4, [r12, #40] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s22, r0 ldr r0, [r12, #16] ldr r1, [r12, #44] ldr r2, [r12, #56] ldr r3, [r12, #60] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s23, r0 ldr r0, [r12, #64] ldr r1, [r12, #68] ldr r2, [r12, #80] ldr r3, [r12, #20] ldr r4, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s24, r0 ldr r0, [r12, #72] ldr r1, [r12, #100] ldr r2, [r12, #48] ldr r3, [r12, #60] eor r0, r1 eor r0, r2 eor r0, r3 vmov s25, r0 ldr r0, [r12, #8] ldr r1, [r12, #12] ldr r2, [r12, #116] ldr r3, [r12, #96] ldr r4, [r12, #100] ldr r5, [r12, #40] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s26, r0 ldr r0, [r12, #24] ldr r1, [r12, #36] ldr r2, [r12, #104] eor r0, r1 eor r0, r2 vmov s27, r0 ldr r0, [r12, #76] ldr r1, [r12, #88] ldr r2, [r12, #96] ldr r3, [r12, #100] ldr r4, [r12, #48] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s28, r0 ldr r0, [r12, #0] ldr r1, [r12, #12] ldr r2, [r12, #36] ldr r3, [r12, #64] ldr r4, [r12, #76] ldr r5, [r12, #92] ldr r6, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s29, r0 ldr r0, [r12, #16] ldr r1, [r12, #92] ldr r2, [r12, #84] ldr r3, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 vmov s30, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #20] ldr r3, [r12, #36] ldr r4, [r12, #44] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s31, r0 vmov r0, s2; vmov r1, s3; vmov r2, s6; vmov r3, s9; vmov r4, s10; vmov r5, s11; vmov r6, s18; vmov r7, s20; vmov r8, s27; vmov r9, s30; @// f0 ^= g5 vmov r10, s0 eor r10, r5 vmov s0, r10 @// f0 ^= g1 vmov r10, s0 eor r10, r1 vmov s0, r10 @// f7 ^= f28 vmov r10, s7 vmov r11, s28 eor r10, r11 vmov s7, r10 @// g1 ^= g3 eor r1, r3 @// g3 ^= g9 eor r3, r9 @// f17 ^= g9 vmov r10, s17 eor r10, r9 vmov s17, r10 @// g9 ^= f4 vmov r11, s4 eor r9, r11 @// g9 ^= f24 vmov r11, s24 eor r9, r11 @// f14 ^= g8 vmov r10, s14 eor r10, r8 vmov s14, r10 @// f26 ^= g0 vmov r10, s26 eor r10, r0 vmov s26, r10 @// f7 ^= f22 vmov r10, s7 vmov r11, s22 eor r10, r11 vmov s7, r10 @// f13 ^= f25 vmov r10, s13 vmov r11, s25 eor r10, r11 vmov s13, r10 @// f25 ^= g0 vmov r10, s25 eor r10, r0 vmov s25, r10 @// g0 ^= g7 eor r0, r7 @// g7 ^= g5 eor r7, r5 @// g5 ^= f28 vmov r11, s28 eor r5, r11 @// f4 ^= g7 vmov r10, s4 eor r10, r7 vmov s4, r10 @// f24 ^= g4 vmov r10, s24 eor r10, r4 vmov s24, r10 @// f22 ^= g4 vmov r10, s22 eor r10, r4 vmov s22, r10 @// f4 ^= f7 vmov r10, s4 vmov r11, s7 eor r10, r11 vmov s4, r10 @// f4 ^= g8 vmov r10, s4 eor r10, r8 vmov s4, r10 @// f15 ^= g8 vmov r10, s15 eor r10, r8 vmov s15, r10 @// g8 ^= g6 eor r8, r6 @// g6 ^= g4 eor r6, r4 @// g4 ^= f28 vmov r11, s28 eor r4, r11 @// g9 ^= g3 eor r9, r3 @// g3 ^= g1 eor r3, r1 @// g8 ^= f22 vmov r11, s22 eor r8, r11 @// g8 ^= f29 vmov r11, s29 eor r8, r11 @// g6 ^= f17 vmov r11, s17 eor r6, r11 @// f17 ^= f23 vmov r10, s17 vmov r11, s23 eor r10, r11 vmov s17, r10 @// f29 ^= f12 vmov r10, s29 vmov r11, s12 eor r10, r11 vmov s29, r10 @// f8 ^= f12 vmov r10, s8 vmov r11, s12 eor r10, r11 vmov s8, r10 @// f12 ^= f31 vmov r10, s12 vmov r11, s31 eor r10, r11 vmov s12, r10 @// g6 ^= g1 eor r6, r1 @// f17 ^= g1 vmov r10, s17 eor r10, r1 vmov s17, r10 @// f28 ^= g1 vmov r10, s28 eor r10, r1 vmov s28, r10 @// g1 ^= f16 vmov r11, s16 eor r1, r11 @// g1 ^= g5 eor r1, r5 @// g5 ^= f21 vmov r11, s21 eor r5, r11 @// g5 ^= g3 eor r5, r3 @// g9 ^= g7 eor r9, r7 @// f5 ^= f23 vmov r10, s5 vmov r11, s23 eor r10, r11 vmov s5, r10 @// g4 ^= f0 vmov r11, s0 eor r4, r11 @// g4 ^= f8 vmov r11, s8 eor r4, r11 @// g3 ^= f23 vmov r11, s23 eor r3, r11 @// f13 ^= g7 vmov r10, s13 eor r10, r7 vmov s13, r10 @// f24 ^= f21 vmov r10, s24 vmov r11, s21 eor r10, r11 vmov s24, r10 @// f21 ^= g2 vmov r10, s21 eor r10, r2 vmov s21, r10 @// f21 ^= g9 vmov r10, s21 eor r10, r9 vmov s21, r10 @// g9 ^= f13 vmov r11, s13 eor r9, r11 @// g0 ^= f25 vmov r11, s25 eor r0, r11 @// f8 ^= f23 vmov r10, s8 vmov r11, s23 eor r10, r11 vmov s8, r10 @// f25 ^= f26 vmov r10, s25 vmov r11, s26 eor r10, r11 vmov s25, r10 @// f14 ^= g0 vmov r10, s14 eor r10, r0 vmov s14, r10 @// f14 ^= g2 vmov r10, s14 eor r10, r2 vmov s14, r10 @// g7 ^= f19 vmov r11, s19 eor r7, r11 @// g7 ^= g2 eor r7, r2 @// f7 ^= g6 vmov r10, s7 eor r10, r6 vmov s7, r10 @// g1 ^= f1 vmov r11, s1 eor r1, r11 @// f31 ^= g2 vmov r10, s31 eor r10, r2 vmov s31, r10 @// g2 ^= f22 vmov r11, s22 eor r2, r11 @// g2 ^= g0 eor r2, r0 @// g6 ^= f8 vmov r11, s8 eor r6, r11 @// f23 ^= f25 vmov r10, s23 vmov r11, s25 eor r10, r11 vmov s23, r10 @// f25 ^= g9 vmov r10, s25 eor r10, r9 vmov s25, r10 @// g9 ^= f1 vmov r11, s1 eor r9, r11 @// f25 ^= f24 vmov r10, s25 vmov r11, s24 eor r10, r11 vmov s25, r10 @// f13 ^= f22 vmov r10, s13 vmov r11, s22 eor r10, r11 vmov s13, r10 @// f17 ^= f0 vmov r10, s17 vmov r11, s0 eor r10, r11 vmov s17, r10 @// f17 ^= f24 vmov r10, s17 vmov r11, s24 eor r10, r11 vmov s17, r10 @// f29 ^= f19 vmov r10, s29 vmov r11, s19 eor r10, r11 vmov s29, r10 @// g8 ^= f12 vmov r11, s12 eor r8, r11 @// f19 ^= f16 vmov r10, s19 vmov r11, s16 eor r10, r11 vmov s19, r10 @// f12 ^= f16 vmov r10, s12 vmov r11, s16 eor r10, r11 vmov s12, r10 @// f16 ^= g7 vmov r10, s16 eor r10, r7 vmov s16, r10 @// f16 ^= g5 vmov r10, s16 eor r10, r5 vmov s16, r10 @// g0 ^= g5 eor r0, r5 @// g5 ^= f26 vmov r11, s26 eor r5, r11 @// g2 ^= g1 eor r2, r1 @// f26 ^= f5 vmov r10, s26 vmov r11, s5 eor r10, r11 vmov s26, r10 @// f13 ^= f15 vmov r10, s13 vmov r11, s15 eor r10, r11 vmov s13, r10 @// f22 ^= f0 vmov r10, s22 vmov r11, s0 eor r10, r11 vmov s22, r10 @// f0 ^= f15 vmov r10, s0 vmov r11, s15 eor r10, r11 vmov s0, r10 @// f31 ^= f0 vmov r10, s31 vmov r11, s0 eor r10, r11 vmov s31, r10 @// f15 ^= f14 vmov r10, s15 vmov r11, s14 eor r10, r11 vmov s15, r10 @// f14 ^= f26 vmov r10, s14 vmov r11, s26 eor r10, r11 vmov s14, r10 @// f26 ^= g4 vmov r10, s26 eor r10, r4 vmov s26, r10 @// f5 ^= f23 vmov r10, s5 vmov r11, s23 eor r10, r11 vmov s5, r10 @// f1 ^= g3 vmov r10, s1 eor r10, r3 vmov s1, r10 @// g3 ^= f4 vmov r11, s4 eor r3, r11 @// f15 ^= g8 vmov r10, s15 eor r10, r8 vmov s15, r10 @// g8 ^= f28 vmov r11, s28 eor r8, r11 @// g6 ^= f29 vmov r11, s29 eor r6, r11 @// f24 ^= f12 vmov r10, s24 vmov r11, s12 eor r10, r11 vmov s24, r10 @// f4 ^= f28 vmov r10, s4 vmov r11, s28 eor r10, r11 vmov s4, r10 @// f28 ^= g7 vmov r10, s28 eor r10, r7 vmov s28, r10 @// f23 ^= g9 vmov r10, s23 eor r10, r9 vmov s23, r10 @// f31 ^= f16 vmov r10, s31 vmov r11, s16 eor r10, r11 vmov s31, r10 @// g7 ^= g0 eor r7, r0 @// g0 ^= f16 vmov r11, s16 eor r0, r11 @// f22 ^= f24 vmov r10, s22 vmov r11, s24 eor r10, r11 vmov s22, r10 @// g1 ^= f29 vmov r11, s29 eor r1, r11 @// g5 ^= f31 vmov r11, s31 eor r5, r11 @// f29 ^= g0 vmov r10, s29 eor r10, r0 vmov s29, r10 @// f1 ^= f29 vmov r10, s1 vmov r11, s29 eor r10, r11 vmov s1, r10 @// f19 ^= f14 vmov r10, s19 vmov r11, s14 eor r10, r11 vmov s19, r10 @// f12 ^= g2 vmov r10, s12 eor r10, r2 vmov s12, r10 @// g2 ^= g5 eor r2, r5 @// f13 ^= g5 vmov r10, s13 eor r10, r5 vmov s13, r10 @// f8 ^= f21 vmov r10, s8 vmov r11, s21 eor r10, r11 vmov s8, r10 @// g4 ^= g9 eor r4, r9 @// g3 ^= g4 eor r3, r4 @// f7 ^= f26 vmov r10, s7 vmov r11, s26 eor r10, r11 vmov s7, r10 @// f0 ^= g1 vmov r10, s0 eor r10, r1 vmov s0, r10 vstr.32 s0, [r14, #0] vstr.32 s1, [r14, #4] str r0, [r14, #8] str r1, [r14, #12] vstr.32 s4, [r14, #16] vstr.32 s5, [r14, #20] str r2, [r14, #24] vstr.32 s7, [r14, #28] vstr.32 s8, [r14, #32] str r3, [r14, #36] str r4, [r14, #40] str r5, [r14, #44] vstr.32 s12, [r14, #48] vstr.32 s13, [r14, #52] vstr.32 s14, [r14, #56] vstr.32 s15, [r14, #60] vstr.32 s16, [r14, #64] vstr.32 s17, [r14, #68] str r6, [r14, #72] vstr.32 s19, [r14, #76] str r7, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] vstr.32 s23, [r14, #92] vstr.32 s24, [r14, #96] vstr.32 s25, [r14, #100] vstr.32 s26, [r14, #104] str r8, [r14, #108] vstr.32 s28, [r14, #112] vstr.32 s29, [r14, #116] str r9, [r14, #120] vstr.32 s31, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v26, .-gft_mul_v26 .syntax unified .cpu cortex-m4 .fpu fpv4-sp-d16 .global gft_mul_v27 .type gft_mul_v27, %function .align 2 gft_mul_v27: push { r0-r12, r14 } vpush {d8-d15} @ final state: [{1, 2, 4, 6, 7, 9, 11, 13, 15, 19, 23, 24, 26, 27, 28, 30}, {0, 1, 3, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 18, 19, 22, 23, 25, 26, 29, 31}, {0, 1, 2, 5, 6, 7, 8, 10, 11, 12, 14, 15, 16, 18, 20, 22, 25, 26, 27, 28, 29, 31}, {0, 3, 4, 5, 6, 9, 10, 13, 14, 17, 19, 21, 23, 24, 25, 26, 28, 30, 31}, {0, 1, 2, 3, 4, 7, 10, 13, 14, 15, 16, 17, 20, 21, 23, 26, 27, 28}, {0, 2, 5, 6, 7, 11, 12, 13, 14, 16, 20, 22, 23, 26, 29}, {1, 2, 4, 8, 9, 10, 11, 13, 18, 19, 20, 23, 25, 27, 30}, {0, 1, 3, 5, 8, 10, 12, 13, 18, 21, 22, 23, 24, 25, 26, 27, 31}, {6, 9, 10, 12, 15, 16, 19, 20, 21, 22, 23, 24, 28, 29, 30}, {7, 8, 9, 11, 13, 14, 15, 17, 18, 19, 20, 22, 25, 28, 31}, {4, 5, 6, 7, 8, 9, 10, 12, 16, 21, 22, 26, 28, 29}, {4, 6, 8, 11, 13, 17, 20, 21, 23, 27, 28}, {1, 5, 8, 10, 11, 12, 13, 15, 19, 20, 24, 25, 27, 29, 31}, {0, 1, 4, 5, 9, 10, 12, 14, 15, 18, 19, 21, 24, 26, 27, 28, 29, 30, 31}, {3, 7, 9, 10, 11, 12, 15, 16, 18, 22, 24, 27, 28, 30, 31}, {2, 3, 6, 7, 8, 9, 10, 13, 14, 15, 17, 19, 23, 25, 26, 27, 29, 30}, {0, 4, 6, 7, 12, 13, 16, 17, 18, 25, 27, 28, 31}, {1, 5, 6, 12, 16, 19, 24, 25, 26, 27, 29, 30, 31}, {2, 5, 6, 7, 14, 15, 16, 17, 24, 26, 27, 28}, {3, 4, 5, 6, 14, 16, 25, 26, 29}, {0, 1, 2, 3, 5, 6, 7, 8, 11, 12, 15, 20, 21, 22, 24, 26, 27, 28, 29, 30}, {0, 2, 4, 5, 6, 9, 10, 11, 13, 14, 15, 20, 23, 25, 26, 28, 31}, {1, 2, 5, 8, 12, 20, 21, 25, 26, 27, 28, 29}, {0, 1, 3, 4, 5, 9, 13, 20, 24, 25, 26, 28}, {0, 2, 3, 10, 11, 12, 14, 15, 16, 18, 19, 22, 25, 27, 30}, {1, 2, 10, 13, 14, 17, 18, 23, 24, 25, 26, 27, 31}, {1, 2, 3, 9, 11, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 26, 27, 28, 29, 30, 31}, {0, 1, 2, 8, 9, 10, 11, 12, 13, 14, 16, 17, 18, 20, 22, 25, 26, 28, 30}, {4, 6, 7, 8, 9, 10, 11, 12, 13, 17, 20, 21, 22, 23, 25, 31}, {5, 6, 8, 10, 12, 16, 17, 20, 22, 24, 25, 30, 31}, {5, 6, 7, 9, 10, 14, 15, 19, 21, 22, 27, 28, 30}, {4, 5, 6, 8, 9, 11, 14, 18, 19, 20, 21, 23, 26, 27, 29, 31}] @ i = 0 @ better circuit found: #XORs = 272 @ better circuit found: #XORs = 254 @ better circuit found: #XORs = 252 @ i = 50 @ i = 100 @ i = 150 @ i = 200 @ better circuit found: #XORs = 251 @ i = 250 @ i = 300 @ i = 350 @ i = 400 @ i = 450 @ better circuit found: #XORs = 250 @ i = 500 @ i = 550 @ i = 600 @ i = 650 @ i = 700 @ i = 750 @ i = 800 @ i = 850 @ better circuit found: #XORs = 246 @ i = 900 @ i = 950 @ initial state: [{3, 6}, {9, 10, 24, 27, 29}, {10, 11, 14, 15, 21, 31}, {2, 16, 17, 18, 19, 20, 21, 25}, {1, 12, 24, 27}, {12, 14, 16, 21, 29, 31}, {6, 19, 31}, {2, 5, 21, 24, 25, 27}, {4, 14, 27, 30}, {20, 22, 30, 24, 25, 8}, {23, 22, 7, 8, 26, 12, 14}, {7, 16, 17, 21, 22, 23, 28, 29}, {18, 4, 22, 11, 28}, {17, 2, 4, 23}, {3, 7, 9, 15, 18, 25}, {0, 10, 11, 14, 25, 26}, {5, 16, 22, 24, 27, 28}, {5, 10, 12, 16, 17, 19}, {15, 16, 18, 26, 28}, {4, 5, 14, 16, 25, 26, 29}, {0, 6, 7, 16, 18, 22}, {2, 8, 13, 21, 22, 24, 26}, {0, 8, 20, 23, 26, 29, 30}, {6, 9, 10, 21, 23, 25, 26, 28}, {0, 2, 14, 17, 28}, {10, 12, 13, 18, 25, 26, 27, 30, 31}, {0, 3, 4, 12, 17, 20}, {0, 1, 5, 14, 19, 22, 30}, {4, 7, 9, 10, 13, 17}, {4, 6, 10, 13, 24, 25}, {7, 9, 11, 16, 24, 30}, {0, 5, 8, 14, 16, 20, 24, 31}] @[2, 3, 5, 7, 8, 10, 11, 12, 14, 15, 16, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 0, 1, 4, 6, 9, 13, 17, 18, 29, 30] @instuctions with recommended mapping to GP and FP registers: @(cycle count without considering memory stores: 655) mov r14, r0 mov r12, r1 ldr r0, [r12, #12] ldr r1, [r12, #24] eor r0, r1 vmov s0, r0 ldr r0, [r12, #36] ldr r1, [r12, #40] ldr r2, [r12, #96] ldr r3, [r12, #108] ldr r4, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s1, r0 ldr r0, [r12, #40] ldr r1, [r12, #44] ldr r2, [r12, #56] ldr r3, [r12, #60] ldr r4, [r12, #84] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s2, r0 ldr r0, [r12, #8] ldr r1, [r12, #64] ldr r2, [r12, #68] ldr r3, [r12, #72] ldr r4, [r12, #76] ldr r5, [r12, #80] ldr r6, [r12, #84] ldr r7, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s3, r0 ldr r0, [r12, #4] ldr r1, [r12, #48] ldr r2, [r12, #96] ldr r3, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 vmov s4, r0 ldr r0, [r12, #48] ldr r1, [r12, #56] ldr r2, [r12, #64] ldr r3, [r12, #84] ldr r4, [r12, #116] ldr r5, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s5, r0 ldr r0, [r12, #24] ldr r1, [r12, #76] ldr r2, [r12, #124] eor r0, r1 eor r0, r2 vmov s6, r0 ldr r0, [r12, #8] ldr r1, [r12, #20] ldr r2, [r12, #84] ldr r3, [r12, #96] ldr r4, [r12, #100] ldr r5, [r12, #108] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s7, r0 ldr r0, [r12, #16] ldr r1, [r12, #56] ldr r2, [r12, #108] ldr r3, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 vmov s8, r0 ldr r0, [r12, #80] ldr r1, [r12, #88] ldr r2, [r12, #120] ldr r3, [r12, #96] ldr r4, [r12, #100] ldr r5, [r12, #32] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s9, r0 ldr r0, [r12, #92] ldr r1, [r12, #88] ldr r2, [r12, #28] ldr r3, [r12, #32] ldr r4, [r12, #104] ldr r5, [r12, #48] ldr r6, [r12, #56] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s10, r0 ldr r0, [r12, #28] ldr r1, [r12, #64] ldr r2, [r12, #68] ldr r3, [r12, #84] ldr r4, [r12, #88] ldr r5, [r12, #92] ldr r6, [r12, #112] ldr r7, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s11, r0 ldr r0, [r12, #72] ldr r1, [r12, #16] ldr r2, [r12, #88] ldr r3, [r12, #44] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s12, r0 ldr r0, [r12, #68] ldr r1, [r12, #8] ldr r2, [r12, #16] ldr r3, [r12, #92] eor r0, r1 eor r0, r2 eor r0, r3 vmov s13, r0 ldr r0, [r12, #12] ldr r1, [r12, #28] ldr r2, [r12, #36] ldr r3, [r12, #60] ldr r4, [r12, #72] ldr r5, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s14, r0 ldr r0, [r12, #0] ldr r1, [r12, #40] ldr r2, [r12, #44] ldr r3, [r12, #56] ldr r4, [r12, #100] ldr r5, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s15, r0 ldr r0, [r12, #20] ldr r1, [r12, #64] ldr r2, [r12, #88] ldr r3, [r12, #96] ldr r4, [r12, #108] ldr r5, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s16, r0 ldr r0, [r12, #20] ldr r1, [r12, #40] ldr r2, [r12, #48] ldr r3, [r12, #64] ldr r4, [r12, #68] ldr r5, [r12, #76] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s17, r0 ldr r0, [r12, #60] ldr r1, [r12, #64] ldr r2, [r12, #72] ldr r3, [r12, #104] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s18, r0 ldr r0, [r12, #16] ldr r1, [r12, #20] ldr r2, [r12, #56] ldr r3, [r12, #64] ldr r4, [r12, #100] ldr r5, [r12, #104] ldr r6, [r12, #116] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s19, r0 ldr r0, [r12, #0] ldr r1, [r12, #24] ldr r2, [r12, #28] ldr r3, [r12, #64] ldr r4, [r12, #72] ldr r5, [r12, #88] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s20, r0 ldr r0, [r12, #8] ldr r1, [r12, #32] ldr r2, [r12, #52] ldr r3, [r12, #84] ldr r4, [r12, #88] ldr r5, [r12, #96] ldr r6, [r12, #104] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s21, r0 ldr r0, [r12, #0] ldr r1, [r12, #32] ldr r2, [r12, #80] ldr r3, [r12, #92] ldr r4, [r12, #104] ldr r5, [r12, #116] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s22, r0 ldr r0, [r12, #24] ldr r1, [r12, #36] ldr r2, [r12, #40] ldr r3, [r12, #84] ldr r4, [r12, #92] ldr r5, [r12, #100] ldr r6, [r12, #104] ldr r7, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s23, r0 ldr r0, [r12, #0] ldr r1, [r12, #8] ldr r2, [r12, #56] ldr r3, [r12, #68] ldr r4, [r12, #112] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 vmov s24, r0 ldr r0, [r12, #40] ldr r1, [r12, #48] ldr r2, [r12, #52] ldr r3, [r12, #72] ldr r4, [r12, #100] ldr r5, [r12, #104] ldr r6, [r12, #108] ldr r7, [r12, #120] ldr r8, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 eor r0, r8 vmov s25, r0 ldr r0, [r12, #0] ldr r1, [r12, #12] ldr r2, [r12, #16] ldr r3, [r12, #48] ldr r4, [r12, #68] ldr r5, [r12, #80] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s26, r0 ldr r0, [r12, #0] ldr r1, [r12, #4] ldr r2, [r12, #20] ldr r3, [r12, #56] ldr r4, [r12, #76] ldr r5, [r12, #88] ldr r6, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 vmov s27, r0 ldr r0, [r12, #16] ldr r1, [r12, #28] ldr r2, [r12, #36] ldr r3, [r12, #40] ldr r4, [r12, #52] ldr r5, [r12, #68] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s28, r0 ldr r0, [r12, #16] ldr r1, [r12, #24] ldr r2, [r12, #40] ldr r3, [r12, #52] ldr r4, [r12, #96] ldr r5, [r12, #100] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s29, r0 ldr r0, [r12, #28] ldr r1, [r12, #36] ldr r2, [r12, #44] ldr r3, [r12, #64] ldr r4, [r12, #96] ldr r5, [r12, #120] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 vmov s30, r0 ldr r0, [r12, #0] ldr r1, [r12, #20] ldr r2, [r12, #32] ldr r3, [r12, #56] ldr r4, [r12, #64] ldr r5, [r12, #80] ldr r6, [r12, #96] ldr r7, [r12, #124] eor r0, r1 eor r0, r2 eor r0, r3 eor r0, r4 eor r0, r5 eor r0, r6 eor r0, r7 vmov s31, r0 vmov r0, s0; vmov r1, s1; vmov r2, s4; vmov r3, s6; vmov r4, s9; vmov r5, s13; vmov r6, s17; vmov r7, s18; vmov r8, s29; vmov r9, s30; @// g5 ^= f24 vmov r11, s24 eor r5, r11 @// f10 ^= g0 vmov r10, s10 eor r10, r0 vmov s10, r10 @// f19 ^= g0 vmov r10, s19 eor r10, r0 vmov s19, r10 @// g0 ^= g2 eor r0, r2 @// g0 ^= g1 eor r0, r1 @// g2 ^= f8 vmov r11, s8 eor r2, r11 @// g2 ^= g5 eor r2, r5 @// g5 ^= f12 vmov r11, s12 eor r5, r11 @// f27 ^= g3 vmov r10, s27 eor r10, r3 vmov s27, r10 @// g0 ^= f14 vmov r11, s14 eor r0, r11 @// f14 ^= g9 vmov r10, s14 eor r10, r9 vmov s14, r10 @// g9 ^= g1 eor r9, r1 @// f11 ^= g9 vmov r10, s11 eor r10, r9 vmov s11, r10 @// f12 ^= g7 vmov r10, s12 eor r10, r7 vmov s12, r10 @// f10 ^= f19 vmov r10, s10 vmov r11, s19 eor r10, r11 vmov s10, r10 @// g7 ^= f20 vmov r11, s20 eor r7, r11 @// g7 ^= f24 vmov r11, s24 eor r7, r11 @// f22 ^= g2 vmov r10, s22 eor r10, r2 vmov s22, r10 @// f8 ^= g4 vmov r10, s8 eor r10, r4 vmov s8, r10 @// g4 ^= g8 eor r4, r8 @// g8 ^= g3 eor r8, r3 @// g1 ^= g3 eor r1, r3 @// g3 ^= f23 vmov r11, s23 eor r3, r11 @// g3 ^= f11 vmov r11, s11 eor r3, r11 @// g2 ^= f24 vmov r11, s24 eor r2, r11 @// f27 ^= g7 vmov r10, s27 eor r10, r7 vmov s27, r10 @// g5 ^= f12 vmov r11, s12 eor r5, r11 @// g8 ^= g6 eor r8, r6 @// g9 ^= f16 vmov r11, s16 eor r9, r11 @// f27 ^= g6 vmov r10, s27 eor r10, r6 vmov s27, r10 @// g6 ^= f16 vmov r11, s16 eor r6, r11 @// f21 ^= f12 vmov r10, s21 vmov r11, s12 eor r10, r11 vmov s21, r10 @// f11 ^= g4 vmov r10, s11 eor r10, r4 vmov s11, r10 @// f31 ^= g5 vmov r10, s31 eor r10, r5 vmov s31, r10 @// g1 ^= f2 vmov r11, s2 eor r1, r11 @// f21 ^= f31 vmov r10, s21 vmov r11, s31 eor r10, r11 vmov s21, r10 @// f15 ^= f26 vmov r10, s15 vmov r11, s26 eor r10, r11 vmov s15, r10 @// f15 ^= f22 vmov r10, s15 vmov r11, s22 eor r10, r11 vmov s15, r10 @// g3 ^= f8 vmov r11, s8 eor r3, r11 @// f24 ^= g6 vmov r10, s24 eor r10, r6 vmov s24, r10 @// g6 ^= f22 vmov r11, s22 eor r6, r11 @// g0 ^= f22 vmov r11, s22 eor r0, r11 @// f22 ^= f7 vmov r10, s22 vmov r11, s7 eor r10, r11 vmov s22, r10 @// f7 ^= f8 vmov r10, s7 vmov r11, s8 eor r10, r11 vmov s7, r10 @// f3 ^= f14 vmov r10, s3 vmov r11, s14 eor r10, r11 vmov s3, r10 @// f7 ^= f26 vmov r10, s7 vmov r11, s26 eor r10, r11 vmov s7, r10 @// f8 ^= f10 vmov r10, s8 vmov r11, s10 eor r10, r11 vmov s8, r10 @// f25 ^= g2 vmov r10, s25 eor r10, r2 vmov s25, r10 @// g2 ^= f7 vmov r11, s7 eor r2, r11 @// g2 ^= g4 eor r2, r4 @// g7 ^= f16 vmov r11, s16 eor r7, r11 @// f16 ^= g8 vmov r10, s16 eor r10, r8 vmov s16, r10 @// f12 ^= g4 vmov r10, s12 eor r10, r4 vmov s12, r10 @// g8 ^= g4 eor r8, r4 @// g4 ^= g0 eor r4, r0 @// f28 ^= f16 vmov r10, s28 vmov r11, s16 eor r10, r11 vmov s28, r10 @// g6 ^= g8 eor r6, r8 @// g4 ^= g3 eor r4, r3 @// g3 ^= f25 vmov r11, s25 eor r3, r11 @// g9 ^= g1 eor r9, r1 @// f5 ^= f20 vmov r10, s5 vmov r11, s20 eor r10, r11 vmov s5, r10 @// f16 ^= f20 vmov r10, s16 vmov r11, s20 eor r10, r11 vmov s16, r10 @// g5 ^= f23 vmov r11, s23 eor r5, r11 @// f10 ^= f23 vmov r10, s10 vmov r11, s23 eor r10, r11 vmov s10, r10 @// f23 ^= g2 vmov r10, s23 eor r10, r2 vmov s23, r10 @// f20 ^= f14 vmov r10, s20 vmov r11, s14 eor r10, r11 vmov s20, r10 @// f2 ^= f14 vmov r10, s2 vmov r11, s14 eor r10, r11 vmov s2, r10 @// g0 ^= g3 eor r0, r3 @// f12 ^= g6 vmov r10, s12 eor r10, r6 vmov s12, r10 @// f8 ^= g9 vmov r10, s8 eor r10, r9 vmov s8, r10 @// f26 ^= f5 vmov r10, s26 vmov r11, s5 eor r10, r11 vmov s26, r10 @// f5 ^= f21 vmov r10, s5 vmov r11, s21 eor r10, r11 vmov s5, r10 @// g2 ^= g7 eor r2, r7 @// f21 ^= g5 vmov r10, s21 eor r10, r5 vmov s21, r10 @// f15 ^= g0 vmov r10, s15 eor r10, r0 vmov s15, r10 @// f31 ^= g1 vmov r10, s31 eor r10, r1 vmov s31, r10 @// f24 ^= f14 vmov r10, s24 vmov r11, s14 eor r10, r11 vmov s24, r10 @// f7 ^= f25 vmov r10, s7 vmov r11, s25 eor r10, r11 vmov s7, r10 @// f14 ^= f28 vmov r10, s14 vmov r11, s28 eor r10, r11 vmov s14, r10 @// f28 ^= f11 vmov r10, s28 vmov r11, s11 eor r10, r11 vmov s28, r10 @// g5 ^= g6 eor r5, r6 @// f27 ^= g4 vmov r10, s27 eor r10, r4 vmov s27, r10 @// f3 ^= f21 vmov r10, s3 vmov r11, s21 eor r10, r11 vmov s3, r10 @// f20 ^= f22 vmov r10, s20 vmov r11, s22 eor r10, r11 vmov s20, r10 @// f26 ^= g0 vmov r10, s26 eor r10, r0 vmov s26, r10 @// g1 ^= f7 vmov r11, s7 eor r1, r11 @// f2 ^= f20 vmov r10, s2 vmov r11, s20 eor r10, r11 vmov s2, r10 str r0, [r14, #0] str r1, [r14, #4] vstr.32 s2, [r14, #8] vstr.32 s3, [r14, #12] str r2, [r14, #16] vstr.32 s5, [r14, #20] str r3, [r14, #24] vstr.32 s7, [r14, #28] vstr.32 s8, [r14, #32] str r4, [r14, #36] vstr.32 s10, [r14, #40] vstr.32 s11, [r14, #44] vstr.32 s12, [r14, #48] str r5, [r14, #52] vstr.32 s14, [r14, #56] vstr.32 s15, [r14, #60] vstr.32 s16, [r14, #64] str r6, [r14, #68] str r7, [r14, #72] vstr.32 s19, [r14, #76] vstr.32 s20, [r14, #80] vstr.32 s21, [r14, #84] vstr.32 s22, [r14, #88] vstr.32 s23, [r14, #92] vstr.32 s24, [r14, #96] vstr.32 s25, [r14, #100] vstr.32 s26, [r14, #104] vstr.32 s27, [r14, #108] vstr.32 s28, [r14, #112] str r8, [r14, #116] str r9, [r14, #120] vstr.32 s31, [r14, #124] vpop { d8-d15 } pop { r0-r12, r14 } bx lr .size gft_mul_v27, .-gft_mul_v27