Revision 1990ae634d0602ff022afc946ef66933e4e4a2dc authored by Santiago Zanella-Beguelin on 09 December 2019, 17:48:55 UTC, committed by Santiago Zanella-Beguelin on 09 December 2019, 17:50:10 UTC
1 parent ae8e182
Raw File
poly1305-x86_64-mingw.S
.text
.global x64_poly1305
x64_poly1305:
  mov %rdi, %rax
  mov %rsi, %r11
  mov %rcx, %rdi
  mov %rdx, %rsi
  mov %r8, %rdx
  mov %r9, %rcx
  movq %rcx, 184(%rdi)
  push %rbx
  push %rbp
  push %rax
  push %r11
  push %r12
  push %r13
  push %r14
  push %r15
  movq 24(%rdi), %r11
  movq 32(%rdi), %r12
  mov $1152921487695413247, %rcx
  and %rcx, %r11
  mov $1152921487695413244, %rcx
  and %rcx, %r12
  movq %r11, 24(%rdi)
  movq %r12, 32(%rdi)
  mov %rdx, %rax
  and $15, %rax
  sub %rax, %rdx
  movq %rax, 56(%rdi)
  movq %rdx, 64(%rdi)
  mov $1, %rcx
  shr $4, %rdx
  mov %rdx, %r15
  movq 24(%rdi), %r11
  movq 32(%rdi), %r13
  movq 0(%rdi), %r14
  movq 8(%rdi), %rbx
  movq 16(%rdi), %rbp
  mov %r13, %r12
  shr $2, %r13
  mov %r12, %rax
  add %r12, %r13
  jmp L1
.balign 16
L0:
  addq 0(%rsi), %r14
  adcq 8(%rsi), %rbx
  lea 16(%rsi), %rsi
  adc %rcx, %rbp
  mul %r14
  mov %rax, %r9
  mov %r11, %rax
  mov %rdx, %r10
  mul %r14
  mov %rax, %r14
  mov %r11, %rax
  mov %rdx, %r8
  mul %rbx
  add %rax, %r9
  mov %r13, %rax
  adc %rdx, %r10
  mul %rbx
  mov %rbp, %rbx
  add %rax, %r14
  adc %rdx, %r8
  imul %r13, %rbx
  add %rbx, %r9
  mov %r8, %rbx
  adc $0, %r10
  imul %r11, %rbp
  add %r9, %rbx
  mov $18446744073709551612, %rax
  adc %rbp, %r10
  and %r10, %rax
  mov %r10, %rbp
  shr $2, %r10
  and $3, %rbp
  add %r10, %rax
  add %rax, %r14
  adc $0, %rbx
  adc $0, %rbp
  mov %r12, %rax
  sub $1, %r15
.balign 16
L1:
  cmp $0, %r15
  jne L0
  movq %r14, 0(%rdi)
  movq %rbx, 8(%rdi)
  movq %rbp, 16(%rdi)
  movq 184(%rdi), %rax
  cmp $1, %rax
  jne L2
  movq 56(%rdi), %r15
  cmp $0, %r15
  je L4
  movq 32(%rdi), %rax
  movq 0(%rsi), %r8
  movq 8(%rsi), %r9
  cmp $8, %r15
  jae L6
  mov %r15, %rcx
  shl $3, %rcx
  mov $1, %rdx
  shl %cl, %rdx
  mov %rdx, %rcx
  sub $1, %rcx
  and %rcx, %r8
  mov $0, %r9
  add %r8, %r14
  adc %r9, %rbx
  adc $0, %rbp
  add %rdx, %r14
  adc $0, %rbx
  adc $0, %rbp
  jmp L7
L6:
  mov %r15, %rcx
  sub $8, %rcx
  shl $3, %rcx
  mov $1, %rdx
  shl %cl, %rdx
  mov %rdx, %rcx
  sub $1, %rcx
  and %rcx, %r9
  add %r8, %r14
  adc %r9, %rbx
  adc $0, %rbp
  add $0, %r14
  adc %rdx, %rbx
  adc $0, %rbp
L7:
  mul %r14
  mov %rax, %r9
  mov %r11, %rax
  mov %rdx, %r10
  mul %r14
  mov %rax, %r14
  mov %r11, %rax
  mov %rdx, %r8
  mul %rbx
  add %rax, %r9
  mov %r13, %rax
  adc %rdx, %r10
  mul %rbx
  mov %rbp, %rbx
  add %rax, %r14
  adc %rdx, %r8
  imul %r13, %rbx
  add %rbx, %r9
  mov %r8, %rbx
  adc $0, %r10
  imul %r11, %rbp
  add %r9, %rbx
  mov $18446744073709551612, %rax
  adc %rbp, %r10
  and %r10, %rax
  mov %r10, %rbp
  shr $2, %r10
  and $3, %rbp
  add %r10, %rax
  add %rax, %r14
  adc $0, %rbx
  adc $0, %rbp
  jmp L5
L4:
L5:
  mov %r14, %r8
  mov %rbx, %r9
  mov %rbp, %r10
  add $5, %r8
  adc $0, %r9
  adc $0, %r10
  shr $2, %r10
  mov %r10, %rax
  sub $1, %rax
  and %rax, %r14
  and %rax, %rbx
  mov $0, %rax
  sub %r10, %rax
  and %rax, %r8
  and %rax, %r9
  add %r8, %r14
  add %r9, %rbx
  movq 40(%rdi), %rax
  movq 48(%rdi), %rdx
  add %rax, %r14
  adc %rdx, %rbx
  jmp L3
L2:
L3:
  movq %r14, 0(%rdi)
  movq %rbx, 8(%rdi)
  movq %rbp, 16(%rdi)
  pop %r15
  pop %r14
  pop %r13
  pop %r12
  pop %rsi
  pop %rax
  pop %rbp
  pop %rbx
  mov %rax, %rdi
  ret


back to top