https://github.com/weidai11/cryptopp
Raw File
sha256_armv4.S
@ Copyright 2007-2019 The OpenSSL Project Authors. All Rights Reserved.
@
@ ====================================================================
@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
@ project. The module is, however, dual licensed under OpenSSL and
@ CRYPTOGAMS licenses depending on where you obtain it. For further
@ details see http://www.openssl.org/~appro/cryptogams/.
@ ====================================================================

@ JW, MAY 2019: Begin defines from taken from arm_arch.h
@               The defines were included through the header.

# if !defined(__ARM_ARCH__)
#  if defined(__CC_ARM)
#   define __ARM_ARCH__ __TARGET_ARCH_ARM
#   if defined(__BIG_ENDIAN)
#    define __ARMEB__
#   else
#    define __ARMEL__
#   endif
#  elif defined(__GNUC__)
#   if   defined(__aarch64__)
#    define __ARM_ARCH__ 8
#    if __BYTE_ORDER__==__ORDER_BIG_ENDIAN__
#     define __ARMEB__
#    else
#     define __ARMEL__
#    endif

#   elif defined(__ARM_ARCH)
#    define __ARM_ARCH__ __ARM_ARCH
#   elif defined(__ARM_ARCH_8A__)
#    define __ARM_ARCH__ 8
#   elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)     || \
        defined(__ARM_ARCH_7R__)|| defined(__ARM_ARCH_7M__)     || \
        defined(__ARM_ARCH_7EM__)
#    define __ARM_ARCH__ 7
#   elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__)     || \
        defined(__ARM_ARCH_6K__)|| defined(__ARM_ARCH_6M__)     || \
        defined(__ARM_ARCH_6Z__)|| defined(__ARM_ARCH_6ZK__)    || \
        defined(__ARM_ARCH_6T2__)
#    define __ARM_ARCH__ 6
#   elif defined(__ARM_ARCH_5__) || defined(__ARM_ARCH_5T__)     || \
        defined(__ARM_ARCH_5E__)|| defined(__ARM_ARCH_5TE__)    || \
        defined(__ARM_ARCH_5TEJ__)
#    define __ARM_ARCH__ 5
#   elif defined(__ARM_ARCH_4__) || defined(__ARM_ARCH_4T__)
#    define __ARM_ARCH__ 4
#   else
#    error "unsupported ARM architecture"
#   endif
#  endif
# endif

# if !defined(__ARM_MAX_ARCH__)
#  define __ARM_MAX_ARCH__ __ARM_ARCH__
# endif

# if __ARM_MAX_ARCH__<__ARM_ARCH__
#  error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
# elif __ARM_MAX_ARCH__!=__ARM_ARCH__
#  if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
#   error "can't build universal big-endian binary"
#  endif
# endif

# define CRYPTOGAMS_ARMV7_NEON      (1<<0)

@ JW, MAY 2019: End defines from taken from arm_arch.h
@               Back to original Cryptogams code

#if defined(__thumb2__)
.syntax	unified
.thumb
#else
.code	32
#endif

.text

.type	K256,%object
.align	5
K256:
.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size	K256,.-K256
.word	0				@ terminator

.align	5
.globl	cryptogams_sha256_block_data_order
.type	cryptogams_sha256_block_data_order,%function

cryptogams_sha256_block_data_order:
.Lcryptogams_sha256_block_data_order:

#if __ARM_ARCH__<7 && !defined(__thumb2__)
	sub	r3,pc,#8		@ cryptogams_sha256_block_data_order
#else
	adr	r3,.Lcryptogams_sha256_block_data_order
#endif

	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
	sub	r14,r3,#256+32	@ K256
	sub	sp,sp,#16*4		@ alloca(X[16])

.Loop:
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r5,r6		@ magic
	eor	r12,r12,r12
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 0
# if 0==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r8,r8,ror#5
	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r8,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 0
	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
	ldrb	r12,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r12,lsl#8
	ldrb	r12,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 0==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r8,r8,ror#5
	orr	r2,r2,r12,lsl#24
	eor	r0,r0,r8,ror#19	@ Sigma1(e)
#endif
	ldr	r12,[r14],#4			@ *K256++
	add	r11,r11,r2			@ h+=X[i]
	str	r2,[sp,#0*4]
	eor	r2,r9,r10
	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r8
	add	r11,r11,r12			@ h+=K256[i]
	eor	r2,r2,r10			@ Ch(e,f,g)
	eor	r0,r4,r4,ror#11
	add	r11,r11,r2			@ h+=Ch(e,f,g)
#if 0==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 0<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r4,r5			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
	eor	r12,r4,r5			@ a^b, b^c in next round
	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r4,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r7,r7,r11			@ d+=h
	eor	r3,r3,r5			@ Maj(a,b,c)
	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 1
# if 1==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r7,r7,ror#5
	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r7,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 1
	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
	ldrb	r3,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r3,lsl#8
	ldrb	r3,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 1==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r7,r7,ror#5
	orr	r2,r2,r3,lsl#24
	eor	r0,r0,r7,ror#19	@ Sigma1(e)
#endif
	ldr	r3,[r14],#4			@ *K256++
	add	r10,r10,r2			@ h+=X[i]
	str	r2,[sp,#1*4]
	eor	r2,r8,r9
	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r7
	add	r10,r10,r3			@ h+=K256[i]
	eor	r2,r2,r9			@ Ch(e,f,g)
	eor	r0,r11,r11,ror#11
	add	r10,r10,r2			@ h+=Ch(e,f,g)
#if 1==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 1<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r11,r4			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
	eor	r3,r11,r4			@ a^b, b^c in next round
	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r11,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r6,r6,r10			@ d+=h
	eor	r12,r12,r4			@ Maj(a,b,c)
	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 2
# if 2==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r6,r6,ror#5
	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r6,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 2
	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
	ldrb	r12,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r12,lsl#8
	ldrb	r12,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 2==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r6,r6,ror#5
	orr	r2,r2,r12,lsl#24
	eor	r0,r0,r6,ror#19	@ Sigma1(e)
#endif
	ldr	r12,[r14],#4			@ *K256++
	add	r9,r9,r2			@ h+=X[i]
	str	r2,[sp,#2*4]
	eor	r2,r7,r8
	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r6
	add	r9,r9,r12			@ h+=K256[i]
	eor	r2,r2,r8			@ Ch(e,f,g)
	eor	r0,r10,r10,ror#11
	add	r9,r9,r2			@ h+=Ch(e,f,g)
#if 2==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 2<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r10,r11			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
	eor	r12,r10,r11			@ a^b, b^c in next round
	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r10,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r5,r5,r9			@ d+=h
	eor	r3,r3,r11			@ Maj(a,b,c)
	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 3
# if 3==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r5,r5,ror#5
	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r5,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 3
	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
	ldrb	r3,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r3,lsl#8
	ldrb	r3,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 3==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r5,r5,ror#5
	orr	r2,r2,r3,lsl#24
	eor	r0,r0,r5,ror#19	@ Sigma1(e)
#endif
	ldr	r3,[r14],#4			@ *K256++
	add	r8,r8,r2			@ h+=X[i]
	str	r2,[sp,#3*4]
	eor	r2,r6,r7
	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r5
	add	r8,r8,r3			@ h+=K256[i]
	eor	r2,r2,r7			@ Ch(e,f,g)
	eor	r0,r9,r9,ror#11
	add	r8,r8,r2			@ h+=Ch(e,f,g)
#if 3==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 3<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r9,r10			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
	eor	r3,r9,r10			@ a^b, b^c in next round
	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r9,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r4,r4,r8			@ d+=h
	eor	r12,r12,r10			@ Maj(a,b,c)
	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 4
# if 4==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r4,r4,ror#5
	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r4,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 4
	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
	ldrb	r12,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r12,lsl#8
	ldrb	r12,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 4==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r4,r4,ror#5
	orr	r2,r2,r12,lsl#24
	eor	r0,r0,r4,ror#19	@ Sigma1(e)
#endif
	ldr	r12,[r14],#4			@ *K256++
	add	r7,r7,r2			@ h+=X[i]
	str	r2,[sp,#4*4]
	eor	r2,r5,r6
	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r4
	add	r7,r7,r12			@ h+=K256[i]
	eor	r2,r2,r6			@ Ch(e,f,g)
	eor	r0,r8,r8,ror#11
	add	r7,r7,r2			@ h+=Ch(e,f,g)
#if 4==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 4<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r8,r9			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
	eor	r12,r8,r9			@ a^b, b^c in next round
	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r8,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r11,r11,r7			@ d+=h
	eor	r3,r3,r9			@ Maj(a,b,c)
	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 5
# if 5==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r11,r11,ror#5
	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r11,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 5
	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
	ldrb	r3,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r3,lsl#8
	ldrb	r3,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 5==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r11,r11,ror#5
	orr	r2,r2,r3,lsl#24
	eor	r0,r0,r11,ror#19	@ Sigma1(e)
#endif
	ldr	r3,[r14],#4			@ *K256++
	add	r6,r6,r2			@ h+=X[i]
	str	r2,[sp,#5*4]
	eor	r2,r4,r5
	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r11
	add	r6,r6,r3			@ h+=K256[i]
	eor	r2,r2,r5			@ Ch(e,f,g)
	eor	r0,r7,r7,ror#11
	add	r6,r6,r2			@ h+=Ch(e,f,g)
#if 5==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 5<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r7,r8			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
	eor	r3,r7,r8			@ a^b, b^c in next round
	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r7,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r10,r10,r6			@ d+=h
	eor	r12,r12,r8			@ Maj(a,b,c)
	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 6
# if 6==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r10,r10,ror#5
	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r10,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 6
	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
	ldrb	r12,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r12,lsl#8
	ldrb	r12,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 6==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r10,r10,ror#5
	orr	r2,r2,r12,lsl#24
	eor	r0,r0,r10,ror#19	@ Sigma1(e)
#endif
	ldr	r12,[r14],#4			@ *K256++
	add	r5,r5,r2			@ h+=X[i]
	str	r2,[sp,#6*4]
	eor	r2,r11,r4
	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r10
	add	r5,r5,r12			@ h+=K256[i]
	eor	r2,r2,r4			@ Ch(e,f,g)
	eor	r0,r6,r6,ror#11
	add	r5,r5,r2			@ h+=Ch(e,f,g)
#if 6==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 6<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r6,r7			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
	eor	r12,r6,r7			@ a^b, b^c in next round
	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r6,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r9,r9,r5			@ d+=h
	eor	r3,r3,r7			@ Maj(a,b,c)
	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 7
# if 7==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r9,r9,ror#5
	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r9,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 7
	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
	ldrb	r3,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r3,lsl#8
	ldrb	r3,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 7==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r9,r9,ror#5
	orr	r2,r2,r3,lsl#24
	eor	r0,r0,r9,ror#19	@ Sigma1(e)
#endif
	ldr	r3,[r14],#4			@ *K256++
	add	r4,r4,r2			@ h+=X[i]
	str	r2,[sp,#7*4]
	eor	r2,r10,r11
	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r9
	add	r4,r4,r3			@ h+=K256[i]
	eor	r2,r2,r11			@ Ch(e,f,g)
	eor	r0,r5,r5,ror#11
	add	r4,r4,r2			@ h+=Ch(e,f,g)
#if 7==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 7<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r5,r6			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
	eor	r3,r5,r6			@ a^b, b^c in next round
	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r5,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r8,r8,r4			@ d+=h
	eor	r12,r12,r6			@ Maj(a,b,c)
	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 8
# if 8==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r8,r8,ror#5
	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r8,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 8
	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
	ldrb	r12,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r12,lsl#8
	ldrb	r12,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 8==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r8,r8,ror#5
	orr	r2,r2,r12,lsl#24
	eor	r0,r0,r8,ror#19	@ Sigma1(e)
#endif
	ldr	r12,[r14],#4			@ *K256++
	add	r11,r11,r2			@ h+=X[i]
	str	r2,[sp,#8*4]
	eor	r2,r9,r10
	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r8
	add	r11,r11,r12			@ h+=K256[i]
	eor	r2,r2,r10			@ Ch(e,f,g)
	eor	r0,r4,r4,ror#11
	add	r11,r11,r2			@ h+=Ch(e,f,g)
#if 8==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 8<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r4,r5			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
	eor	r12,r4,r5			@ a^b, b^c in next round
	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r4,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r7,r7,r11			@ d+=h
	eor	r3,r3,r5			@ Maj(a,b,c)
	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 9
# if 9==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r7,r7,ror#5
	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r7,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 9
	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
	ldrb	r3,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r3,lsl#8
	ldrb	r3,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 9==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r7,r7,ror#5
	orr	r2,r2,r3,lsl#24
	eor	r0,r0,r7,ror#19	@ Sigma1(e)
#endif
	ldr	r3,[r14],#4			@ *K256++
	add	r10,r10,r2			@ h+=X[i]
	str	r2,[sp,#9*4]
	eor	r2,r8,r9
	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r7
	add	r10,r10,r3			@ h+=K256[i]
	eor	r2,r2,r9			@ Ch(e,f,g)
	eor	r0,r11,r11,ror#11
	add	r10,r10,r2			@ h+=Ch(e,f,g)
#if 9==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 9<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r11,r4			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
	eor	r3,r11,r4			@ a^b, b^c in next round
	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r11,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r6,r6,r10			@ d+=h
	eor	r12,r12,r4			@ Maj(a,b,c)
	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 10
# if 10==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r6,r6,ror#5
	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r6,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 10
	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
	ldrb	r12,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r12,lsl#8
	ldrb	r12,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 10==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r6,r6,ror#5
	orr	r2,r2,r12,lsl#24
	eor	r0,r0,r6,ror#19	@ Sigma1(e)
#endif
	ldr	r12,[r14],#4			@ *K256++
	add	r9,r9,r2			@ h+=X[i]
	str	r2,[sp,#10*4]
	eor	r2,r7,r8
	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r6
	add	r9,r9,r12			@ h+=K256[i]
	eor	r2,r2,r8			@ Ch(e,f,g)
	eor	r0,r10,r10,ror#11
	add	r9,r9,r2			@ h+=Ch(e,f,g)
#if 10==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 10<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r10,r11			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
	eor	r12,r10,r11			@ a^b, b^c in next round
	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r10,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r5,r5,r9			@ d+=h
	eor	r3,r3,r11			@ Maj(a,b,c)
	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 11
# if 11==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r5,r5,ror#5
	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r5,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 11
	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
	ldrb	r3,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r3,lsl#8
	ldrb	r3,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 11==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r5,r5,ror#5
	orr	r2,r2,r3,lsl#24
	eor	r0,r0,r5,ror#19	@ Sigma1(e)
#endif
	ldr	r3,[r14],#4			@ *K256++
	add	r8,r8,r2			@ h+=X[i]
	str	r2,[sp,#11*4]
	eor	r2,r6,r7
	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r5
	add	r8,r8,r3			@ h+=K256[i]
	eor	r2,r2,r7			@ Ch(e,f,g)
	eor	r0,r9,r9,ror#11
	add	r8,r8,r2			@ h+=Ch(e,f,g)
#if 11==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 11<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r9,r10			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
	eor	r3,r9,r10			@ a^b, b^c in next round
	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r9,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r4,r4,r8			@ d+=h
	eor	r12,r12,r10			@ Maj(a,b,c)
	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 12
# if 12==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r4,r4,ror#5
	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r4,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 12
	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
	ldrb	r12,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r12,lsl#8
	ldrb	r12,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 12==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r4,r4,ror#5
	orr	r2,r2,r12,lsl#24
	eor	r0,r0,r4,ror#19	@ Sigma1(e)
#endif
	ldr	r12,[r14],#4			@ *K256++
	add	r7,r7,r2			@ h+=X[i]
	str	r2,[sp,#12*4]
	eor	r2,r5,r6
	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r4
	add	r7,r7,r12			@ h+=K256[i]
	eor	r2,r2,r6			@ Ch(e,f,g)
	eor	r0,r8,r8,ror#11
	add	r7,r7,r2			@ h+=Ch(e,f,g)
#if 12==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 12<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r8,r9			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
	eor	r12,r8,r9			@ a^b, b^c in next round
	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r8,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r11,r11,r7			@ d+=h
	eor	r3,r3,r9			@ Maj(a,b,c)
	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 13
# if 13==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r11,r11,ror#5
	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r11,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 13
	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
	ldrb	r3,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r3,lsl#8
	ldrb	r3,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 13==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r11,r11,ror#5
	orr	r2,r2,r3,lsl#24
	eor	r0,r0,r11,ror#19	@ Sigma1(e)
#endif
	ldr	r3,[r14],#4			@ *K256++
	add	r6,r6,r2			@ h+=X[i]
	str	r2,[sp,#13*4]
	eor	r2,r4,r5
	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r11
	add	r6,r6,r3			@ h+=K256[i]
	eor	r2,r2,r5			@ Ch(e,f,g)
	eor	r0,r7,r7,ror#11
	add	r6,r6,r2			@ h+=Ch(e,f,g)
#if 13==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 13<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r7,r8			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
	eor	r3,r7,r8			@ a^b, b^c in next round
	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r7,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r10,r10,r6			@ d+=h
	eor	r12,r12,r8			@ Maj(a,b,c)
	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 14
# if 14==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r10,r10,ror#5
	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r10,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 14
	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
	ldrb	r12,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r12,lsl#8
	ldrb	r12,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 14==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r10,r10,ror#5
	orr	r2,r2,r12,lsl#24
	eor	r0,r0,r10,ror#19	@ Sigma1(e)
#endif
	ldr	r12,[r14],#4			@ *K256++
	add	r5,r5,r2			@ h+=X[i]
	str	r2,[sp,#14*4]
	eor	r2,r11,r4
	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r10
	add	r5,r5,r12			@ h+=K256[i]
	eor	r2,r2,r4			@ Ch(e,f,g)
	eor	r0,r6,r6,ror#11
	add	r5,r5,r2			@ h+=Ch(e,f,g)
#if 14==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 14<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r6,r7			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
	eor	r12,r6,r7			@ a^b, b^c in next round
	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r6,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r9,r9,r5			@ d+=h
	eor	r3,r3,r7			@ Maj(a,b,c)
	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
#if __ARM_ARCH__>=7
	@ ldr	r2,[r1],#4			@ 15
# if 15==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r9,r9,ror#5
	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
	eor	r0,r0,r9,ror#19	@ Sigma1(e)
# ifndef __ARMEB__
	rev	r2,r2
# endif
#else
	@ ldrb	r2,[r1,#3]			@ 15
	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
	ldrb	r3,[r1,#2]
	ldrb	r0,[r1,#1]
	orr	r2,r2,r3,lsl#8
	ldrb	r3,[r1],#4
	orr	r2,r2,r0,lsl#16
# if 15==15
	str	r1,[sp,#17*4]			@ make room for r1
# endif
	eor	r0,r9,r9,ror#5
	orr	r2,r2,r3,lsl#24
	eor	r0,r0,r9,ror#19	@ Sigma1(e)
#endif
	ldr	r3,[r14],#4			@ *K256++
	add	r4,r4,r2			@ h+=X[i]
	str	r2,[sp,#15*4]
	eor	r2,r10,r11
	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r9
	add	r4,r4,r3			@ h+=K256[i]
	eor	r2,r2,r11			@ Ch(e,f,g)
	eor	r0,r5,r5,ror#11
	add	r4,r4,r2			@ h+=Ch(e,f,g)
#if 15==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 15<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r5,r6			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
	eor	r3,r5,r6			@ a^b, b^c in next round
	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r5,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r8,r8,r4			@ d+=h
	eor	r12,r12,r6			@ Maj(a,b,c)
	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
.Lrounds_16_xx:
	@ ldr	r2,[sp,#1*4]		@ 16
	@ ldr	r1,[sp,#14*4]
	mov	r0,r2,ror#7
	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
	mov	r12,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r12,r12,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#0*4]
	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#9*4]

	add	r12,r12,r0
	eor	r0,r8,r8,ror#5	@ from BODY_00_15
	add	r2,r2,r12
	eor	r0,r0,r8,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r12,[r14],#4			@ *K256++
	add	r11,r11,r2			@ h+=X[i]
	str	r2,[sp,#0*4]
	eor	r2,r9,r10
	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r8
	add	r11,r11,r12			@ h+=K256[i]
	eor	r2,r2,r10			@ Ch(e,f,g)
	eor	r0,r4,r4,ror#11
	add	r11,r11,r2			@ h+=Ch(e,f,g)
#if 16==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 16<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r4,r5			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
	eor	r12,r4,r5			@ a^b, b^c in next round
	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r4,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r7,r7,r11			@ d+=h
	eor	r3,r3,r5			@ Maj(a,b,c)
	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#2*4]		@ 17
	@ ldr	r1,[sp,#15*4]
	mov	r0,r2,ror#7
	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
	mov	r3,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r3,r3,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#1*4]
	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#10*4]

	add	r3,r3,r0
	eor	r0,r7,r7,ror#5	@ from BODY_00_15
	add	r2,r2,r3
	eor	r0,r0,r7,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r3,[r14],#4			@ *K256++
	add	r10,r10,r2			@ h+=X[i]
	str	r2,[sp,#1*4]
	eor	r2,r8,r9
	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r7
	add	r10,r10,r3			@ h+=K256[i]
	eor	r2,r2,r9			@ Ch(e,f,g)
	eor	r0,r11,r11,ror#11
	add	r10,r10,r2			@ h+=Ch(e,f,g)
#if 17==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 17<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r11,r4			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
	eor	r3,r11,r4			@ a^b, b^c in next round
	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r11,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r6,r6,r10			@ d+=h
	eor	r12,r12,r4			@ Maj(a,b,c)
	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#3*4]		@ 18
	@ ldr	r1,[sp,#0*4]
	mov	r0,r2,ror#7
	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
	mov	r12,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r12,r12,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#2*4]
	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#11*4]

	add	r12,r12,r0
	eor	r0,r6,r6,ror#5	@ from BODY_00_15
	add	r2,r2,r12
	eor	r0,r0,r6,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r12,[r14],#4			@ *K256++
	add	r9,r9,r2			@ h+=X[i]
	str	r2,[sp,#2*4]
	eor	r2,r7,r8
	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r6
	add	r9,r9,r12			@ h+=K256[i]
	eor	r2,r2,r8			@ Ch(e,f,g)
	eor	r0,r10,r10,ror#11
	add	r9,r9,r2			@ h+=Ch(e,f,g)
#if 18==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 18<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r10,r11			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
	eor	r12,r10,r11			@ a^b, b^c in next round
	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r10,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r5,r5,r9			@ d+=h
	eor	r3,r3,r11			@ Maj(a,b,c)
	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#4*4]		@ 19
	@ ldr	r1,[sp,#1*4]
	mov	r0,r2,ror#7
	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
	mov	r3,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r3,r3,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#3*4]
	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#12*4]

	add	r3,r3,r0
	eor	r0,r5,r5,ror#5	@ from BODY_00_15
	add	r2,r2,r3
	eor	r0,r0,r5,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r3,[r14],#4			@ *K256++
	add	r8,r8,r2			@ h+=X[i]
	str	r2,[sp,#3*4]
	eor	r2,r6,r7
	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r5
	add	r8,r8,r3			@ h+=K256[i]
	eor	r2,r2,r7			@ Ch(e,f,g)
	eor	r0,r9,r9,ror#11
	add	r8,r8,r2			@ h+=Ch(e,f,g)
#if 19==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 19<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r9,r10			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
	eor	r3,r9,r10			@ a^b, b^c in next round
	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r9,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r4,r4,r8			@ d+=h
	eor	r12,r12,r10			@ Maj(a,b,c)
	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#5*4]		@ 20
	@ ldr	r1,[sp,#2*4]
	mov	r0,r2,ror#7
	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
	mov	r12,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r12,r12,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#4*4]
	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#13*4]

	add	r12,r12,r0
	eor	r0,r4,r4,ror#5	@ from BODY_00_15
	add	r2,r2,r12
	eor	r0,r0,r4,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r12,[r14],#4			@ *K256++
	add	r7,r7,r2			@ h+=X[i]
	str	r2,[sp,#4*4]
	eor	r2,r5,r6
	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r4
	add	r7,r7,r12			@ h+=K256[i]
	eor	r2,r2,r6			@ Ch(e,f,g)
	eor	r0,r8,r8,ror#11
	add	r7,r7,r2			@ h+=Ch(e,f,g)
#if 20==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 20<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r8,r9			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
	eor	r12,r8,r9			@ a^b, b^c in next round
	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r8,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r11,r11,r7			@ d+=h
	eor	r3,r3,r9			@ Maj(a,b,c)
	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#6*4]		@ 21
	@ ldr	r1,[sp,#3*4]
	mov	r0,r2,ror#7
	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
	mov	r3,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r3,r3,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#5*4]
	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#14*4]

	add	r3,r3,r0
	eor	r0,r11,r11,ror#5	@ from BODY_00_15
	add	r2,r2,r3
	eor	r0,r0,r11,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r3,[r14],#4			@ *K256++
	add	r6,r6,r2			@ h+=X[i]
	str	r2,[sp,#5*4]
	eor	r2,r4,r5
	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r11
	add	r6,r6,r3			@ h+=K256[i]
	eor	r2,r2,r5			@ Ch(e,f,g)
	eor	r0,r7,r7,ror#11
	add	r6,r6,r2			@ h+=Ch(e,f,g)
#if 21==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 21<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r7,r8			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
	eor	r3,r7,r8			@ a^b, b^c in next round
	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r7,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r10,r10,r6			@ d+=h
	eor	r12,r12,r8			@ Maj(a,b,c)
	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#7*4]		@ 22
	@ ldr	r1,[sp,#4*4]
	mov	r0,r2,ror#7
	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
	mov	r12,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r12,r12,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#6*4]
	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#15*4]

	add	r12,r12,r0
	eor	r0,r10,r10,ror#5	@ from BODY_00_15
	add	r2,r2,r12
	eor	r0,r0,r10,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r12,[r14],#4			@ *K256++
	add	r5,r5,r2			@ h+=X[i]
	str	r2,[sp,#6*4]
	eor	r2,r11,r4
	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r10
	add	r5,r5,r12			@ h+=K256[i]
	eor	r2,r2,r4			@ Ch(e,f,g)
	eor	r0,r6,r6,ror#11
	add	r5,r5,r2			@ h+=Ch(e,f,g)
#if 22==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 22<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r6,r7			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
	eor	r12,r6,r7			@ a^b, b^c in next round
	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r6,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r9,r9,r5			@ d+=h
	eor	r3,r3,r7			@ Maj(a,b,c)
	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#8*4]		@ 23
	@ ldr	r1,[sp,#5*4]
	mov	r0,r2,ror#7
	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
	mov	r3,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r3,r3,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#7*4]
	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#0*4]

	add	r3,r3,r0
	eor	r0,r9,r9,ror#5	@ from BODY_00_15
	add	r2,r2,r3
	eor	r0,r0,r9,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r3,[r14],#4			@ *K256++
	add	r4,r4,r2			@ h+=X[i]
	str	r2,[sp,#7*4]
	eor	r2,r10,r11
	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r9
	add	r4,r4,r3			@ h+=K256[i]
	eor	r2,r2,r11			@ Ch(e,f,g)
	eor	r0,r5,r5,ror#11
	add	r4,r4,r2			@ h+=Ch(e,f,g)
#if 23==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 23<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r5,r6			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
	eor	r3,r5,r6			@ a^b, b^c in next round
	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r5,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r8,r8,r4			@ d+=h
	eor	r12,r12,r6			@ Maj(a,b,c)
	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#9*4]		@ 24
	@ ldr	r1,[sp,#6*4]
	mov	r0,r2,ror#7
	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
	mov	r12,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r12,r12,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#8*4]
	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#1*4]

	add	r12,r12,r0
	eor	r0,r8,r8,ror#5	@ from BODY_00_15
	add	r2,r2,r12
	eor	r0,r0,r8,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r12,[r14],#4			@ *K256++
	add	r11,r11,r2			@ h+=X[i]
	str	r2,[sp,#8*4]
	eor	r2,r9,r10
	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r8
	add	r11,r11,r12			@ h+=K256[i]
	eor	r2,r2,r10			@ Ch(e,f,g)
	eor	r0,r4,r4,ror#11
	add	r11,r11,r2			@ h+=Ch(e,f,g)
#if 24==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 24<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r4,r5			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
	eor	r12,r4,r5			@ a^b, b^c in next round
	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r4,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r7,r7,r11			@ d+=h
	eor	r3,r3,r5			@ Maj(a,b,c)
	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#10*4]		@ 25
	@ ldr	r1,[sp,#7*4]
	mov	r0,r2,ror#7
	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
	mov	r3,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r3,r3,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#9*4]
	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#2*4]

	add	r3,r3,r0
	eor	r0,r7,r7,ror#5	@ from BODY_00_15
	add	r2,r2,r3
	eor	r0,r0,r7,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r3,[r14],#4			@ *K256++
	add	r10,r10,r2			@ h+=X[i]
	str	r2,[sp,#9*4]
	eor	r2,r8,r9
	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r7
	add	r10,r10,r3			@ h+=K256[i]
	eor	r2,r2,r9			@ Ch(e,f,g)
	eor	r0,r11,r11,ror#11
	add	r10,r10,r2			@ h+=Ch(e,f,g)
#if 25==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 25<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r11,r4			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
	eor	r3,r11,r4			@ a^b, b^c in next round
	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r11,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r6,r6,r10			@ d+=h
	eor	r12,r12,r4			@ Maj(a,b,c)
	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#11*4]		@ 26
	@ ldr	r1,[sp,#8*4]
	mov	r0,r2,ror#7
	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
	mov	r12,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r12,r12,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#10*4]
	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#3*4]

	add	r12,r12,r0
	eor	r0,r6,r6,ror#5	@ from BODY_00_15
	add	r2,r2,r12
	eor	r0,r0,r6,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r12,[r14],#4			@ *K256++
	add	r9,r9,r2			@ h+=X[i]
	str	r2,[sp,#10*4]
	eor	r2,r7,r8
	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r6
	add	r9,r9,r12			@ h+=K256[i]
	eor	r2,r2,r8			@ Ch(e,f,g)
	eor	r0,r10,r10,ror#11
	add	r9,r9,r2			@ h+=Ch(e,f,g)
#if 26==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 26<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r10,r11			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
	eor	r12,r10,r11			@ a^b, b^c in next round
	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r10,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r5,r5,r9			@ d+=h
	eor	r3,r3,r11			@ Maj(a,b,c)
	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#12*4]		@ 27
	@ ldr	r1,[sp,#9*4]
	mov	r0,r2,ror#7
	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
	mov	r3,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r3,r3,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#11*4]
	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#4*4]

	add	r3,r3,r0
	eor	r0,r5,r5,ror#5	@ from BODY_00_15
	add	r2,r2,r3
	eor	r0,r0,r5,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r3,[r14],#4			@ *K256++
	add	r8,r8,r2			@ h+=X[i]
	str	r2,[sp,#11*4]
	eor	r2,r6,r7
	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r5
	add	r8,r8,r3			@ h+=K256[i]
	eor	r2,r2,r7			@ Ch(e,f,g)
	eor	r0,r9,r9,ror#11
	add	r8,r8,r2			@ h+=Ch(e,f,g)
#if 27==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 27<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r9,r10			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
	eor	r3,r9,r10			@ a^b, b^c in next round
	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r9,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r4,r4,r8			@ d+=h
	eor	r12,r12,r10			@ Maj(a,b,c)
	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#13*4]		@ 28
	@ ldr	r1,[sp,#10*4]
	mov	r0,r2,ror#7
	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
	mov	r12,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r12,r12,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#12*4]
	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#5*4]

	add	r12,r12,r0
	eor	r0,r4,r4,ror#5	@ from BODY_00_15
	add	r2,r2,r12
	eor	r0,r0,r4,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r12,[r14],#4			@ *K256++
	add	r7,r7,r2			@ h+=X[i]
	str	r2,[sp,#12*4]
	eor	r2,r5,r6
	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r4
	add	r7,r7,r12			@ h+=K256[i]
	eor	r2,r2,r6			@ Ch(e,f,g)
	eor	r0,r8,r8,ror#11
	add	r7,r7,r2			@ h+=Ch(e,f,g)
#if 28==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 28<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r8,r9			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
	eor	r12,r8,r9			@ a^b, b^c in next round
	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r8,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r11,r11,r7			@ d+=h
	eor	r3,r3,r9			@ Maj(a,b,c)
	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#14*4]		@ 29
	@ ldr	r1,[sp,#11*4]
	mov	r0,r2,ror#7
	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
	mov	r3,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r3,r3,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#13*4]
	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#6*4]

	add	r3,r3,r0
	eor	r0,r11,r11,ror#5	@ from BODY_00_15
	add	r2,r2,r3
	eor	r0,r0,r11,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r3,[r14],#4			@ *K256++
	add	r6,r6,r2			@ h+=X[i]
	str	r2,[sp,#13*4]
	eor	r2,r4,r5
	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r11
	add	r6,r6,r3			@ h+=K256[i]
	eor	r2,r2,r5			@ Ch(e,f,g)
	eor	r0,r7,r7,ror#11
	add	r6,r6,r2			@ h+=Ch(e,f,g)
#if 29==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 29<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r7,r8			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
	eor	r3,r7,r8			@ a^b, b^c in next round
	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r7,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r10,r10,r6			@ d+=h
	eor	r12,r12,r8			@ Maj(a,b,c)
	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#15*4]		@ 30
	@ ldr	r1,[sp,#12*4]
	mov	r0,r2,ror#7
	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
	mov	r12,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r12,r12,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#14*4]
	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#7*4]

	add	r12,r12,r0
	eor	r0,r10,r10,ror#5	@ from BODY_00_15
	add	r2,r2,r12
	eor	r0,r0,r10,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r12,[r14],#4			@ *K256++
	add	r5,r5,r2			@ h+=X[i]
	str	r2,[sp,#14*4]
	eor	r2,r11,r4
	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r10
	add	r5,r5,r12			@ h+=K256[i]
	eor	r2,r2,r4			@ Ch(e,f,g)
	eor	r0,r6,r6,ror#11
	add	r5,r5,r2			@ h+=Ch(e,f,g)
#if 30==31
	and	r12,r12,#0xff
	cmp	r12,#0xf2			@ done?
#endif
#if 30<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r12,r6,r7			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
	eor	r12,r6,r7			@ a^b, b^c in next round
	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r6,ror#20	@ Sigma0(a)
	and	r3,r3,r12			@ (b^c)&=(a^b)
	add	r9,r9,r5			@ d+=h
	eor	r3,r3,r7			@ Maj(a,b,c)
	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
	@ ldr	r2,[sp,#0*4]		@ 31
	@ ldr	r1,[sp,#13*4]
	mov	r0,r2,ror#7
	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
	mov	r3,r1,ror#17
	eor	r0,r0,r2,ror#18
	eor	r3,r3,r1,ror#19
	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
	ldr	r2,[sp,#15*4]
	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
	ldr	r1,[sp,#8*4]

	add	r3,r3,r0
	eor	r0,r9,r9,ror#5	@ from BODY_00_15
	add	r2,r2,r3
	eor	r0,r0,r9,ror#19	@ Sigma1(e)
	add	r2,r2,r1			@ X[i]
	ldr	r3,[r14],#4			@ *K256++
	add	r4,r4,r2			@ h+=X[i]
	str	r2,[sp,#15*4]
	eor	r2,r10,r11
	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
	and	r2,r2,r9
	add	r4,r4,r3			@ h+=K256[i]
	eor	r2,r2,r11			@ Ch(e,f,g)
	eor	r0,r5,r5,ror#11
	add	r4,r4,r2			@ h+=Ch(e,f,g)
#if 31==31
	and	r3,r3,#0xff
	cmp	r3,#0xf2			@ done?
#endif
#if 31<15
# if __ARM_ARCH__>=7
	ldr	r2,[r1],#4			@ prefetch
# else
	ldrb	r2,[r1,#3]
# endif
	eor	r3,r5,r6			@ a^b, b^c in next round
#else
	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
	eor	r3,r5,r6			@ a^b, b^c in next round
	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
#endif
	eor	r0,r0,r5,ror#20	@ Sigma0(a)
	and	r12,r12,r3			@ (b^c)&=(a^b)
	add	r8,r8,r4			@ d+=h
	eor	r12,r12,r6			@ Maj(a,b,c)
	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
#ifdef	__thumb2__
	ite	eq			@ Thumb2 thing, sanity check in ARM
#endif
	ldreq	r3,[sp,#16*4]		@ pull ctx
	bne	.Lrounds_16_xx

	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
	ldr	r0,[r3,#0]
	ldr	r2,[r3,#4]
	ldr	r12,[r3,#8]
	add	r4,r4,r0
	ldr	r0,[r3,#12]
	add	r5,r5,r2
	ldr	r2,[r3,#16]
	add	r6,r6,r12
	ldr	r12,[r3,#20]
	add	r7,r7,r0
	ldr	r0,[r3,#24]
	add	r8,r8,r2
	ldr	r2,[r3,#28]
	add	r9,r9,r12
	ldr	r1,[sp,#17*4]		@ pull inp
	ldr	r12,[sp,#18*4]		@ pull inp+len
	add	r10,r10,r0
	add	r11,r11,r2
	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
	cmp	r1,r12
	sub	r14,r14,#256	@ rewind Ktbl
	bne	.Loop

	add	sp,sp,#19*4	@ destroy frame
#if __ARM_ARCH__>=5
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
#else
	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
	tst	lr,#1
	moveq	pc,lr			@ be binary compatible with V4, yet
.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
#endif
.size	cryptogams_sha256_block_data_order,.-cryptogams_sha256_block_data_order

#if __ARM_MAX_ARCH__>=7
.arch	armv7-a
.fpu	neon

.globl	cryptogams_sha256_block_data_order_neon
.type	cryptogams_sha256_block_data_order_neon,%function
.align	5
.skip	16
cryptogams_sha256_block_data_order_neon:

	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}

	sub	r11,sp,#16*4+16
	adr	r14,K256
	bic	r11,r11,#15		@ align for 128-bit stores
	mov	r12,sp
	mov	sp,r11			@ alloca
	add	r2,r1,r2,lsl#6	@ len to point at the end of inp

	vld1.8	{q0},[r1]!
	vld1.8	{q1},[r1]!
	vld1.8	{q2},[r1]!
	vld1.8	{q3},[r1]!
	vld1.32	{q8},[r14,:128]!
	vld1.32	{q9},[r14,:128]!
	vld1.32	{q10},[r14,:128]!
	vld1.32	{q11},[r14,:128]!
	vrev32.8	q0,q0		@ yes, even on
	str	r0,[sp,#64]
	vrev32.8	q1,q1		@ big-endian
	str	r1,[sp,#68]
	mov	r1,sp
	vrev32.8	q2,q2
	str	r2,[sp,#72]
	vrev32.8	q3,q3
	str	r12,[sp,#76]		@ save original sp
	vadd.i32	q8,q8,q0
	vadd.i32	q9,q9,q1
	vst1.32	{q8},[r1,:128]!
	vadd.i32	q10,q10,q2
	vst1.32	{q9},[r1,:128]!
	vadd.i32	q11,q11,q3
	vst1.32	{q10},[r1,:128]!
	vst1.32	{q11},[r1,:128]!

	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
	sub	r1,r1,#64
	ldr	r2,[sp,#0]
	eor	r12,r12,r12
	eor	r3,r5,r6
	b	.L_00_48

.align	4
.L_00_48:
	vext.8	q8,q0,q1,#4
	add	r11,r11,r2
	eor	r2,r9,r10
	eor	r0,r8,r8,ror#5
	vext.8	q9,q2,q3,#4
	add	r4,r4,r12
	and	r2,r2,r8
	eor	r12,r0,r8,ror#19
	vshr.u32	q10,q8,#7
	eor	r0,r4,r4,ror#11
	eor	r2,r2,r10
	vadd.i32	q0,q0,q9
	add	r11,r11,r12,ror#6
	eor	r12,r4,r5
	vshr.u32	q9,q8,#3
	eor	r0,r0,r4,ror#20
	add	r11,r11,r2
	vsli.32	q10,q8,#25
	ldr	r2,[sp,#4]
	and	r3,r3,r12
	vshr.u32	q11,q8,#18
	add	r7,r7,r11
	add	r11,r11,r0,ror#2
	eor	r3,r3,r5
	veor	q9,q9,q10
	add	r10,r10,r2
	vsli.32	q11,q8,#14
	eor	r2,r8,r9
	eor	r0,r7,r7,ror#5
	vshr.u32	d24,d7,#17
	add	r11,r11,r3
	and	r2,r2,r7
	veor	q9,q9,q11
	eor	r3,r0,r7,ror#19
	eor	r0,r11,r11,ror#11
	vsli.32	d24,d7,#15
	eor	r2,r2,r9
	add	r10,r10,r3,ror#6
	vshr.u32	d25,d7,#10
	eor	r3,r11,r4
	eor	r0,r0,r11,ror#20
	vadd.i32	q0,q0,q9
	add	r10,r10,r2
	ldr	r2,[sp,#8]
	veor	d25,d25,d24
	and	r12,r12,r3
	add	r6,r6,r10
	vshr.u32	d24,d7,#19
	add	r10,r10,r0,ror#2
	eor	r12,r12,r4
	vsli.32	d24,d7,#13
	add	r9,r9,r2
	eor	r2,r7,r8
	veor	d25,d25,d24
	eor	r0,r6,r6,ror#5
	add	r10,r10,r12
	vadd.i32	d0,d0,d25
	and	r2,r2,r6
	eor	r12,r0,r6,ror#19
	vshr.u32	d24,d0,#17
	eor	r0,r10,r10,ror#11
	eor	r2,r2,r8
	vsli.32	d24,d0,#15
	add	r9,r9,r12,ror#6
	eor	r12,r10,r11
	vshr.u32	d25,d0,#10
	eor	r0,r0,r10,ror#20
	add	r9,r9,r2
	veor	d25,d25,d24
	ldr	r2,[sp,#12]
	and	r3,r3,r12
	vshr.u32	d24,d0,#19
	add	r5,r5,r9
	add	r9,r9,r0,ror#2
	eor	r3,r3,r11
	vld1.32	{q8},[r14,:128]!
	add	r8,r8,r2
	vsli.32	d24,d0,#13
	eor	r2,r6,r7
	eor	r0,r5,r5,ror#5
	veor	d25,d25,d24
	add	r9,r9,r3
	and	r2,r2,r5
	vadd.i32	d1,d1,d25
	eor	r3,r0,r5,ror#19
	eor	r0,r9,r9,ror#11
	vadd.i32	q8,q8,q0
	eor	r2,r2,r7
	add	r8,r8,r3,ror#6
	eor	r3,r9,r10
	eor	r0,r0,r9,ror#20
	add	r8,r8,r2
	ldr	r2,[sp,#16]
	and	r12,r12,r3
	add	r4,r4,r8
	vst1.32	{q8},[r1,:128]!
	add	r8,r8,r0,ror#2
	eor	r12,r12,r10
	vext.8	q8,q1,q2,#4
	add	r7,r7,r2
	eor	r2,r5,r6
	eor	r0,r4,r4,ror#5
	vext.8	q9,q3,q0,#4
	add	r8,r8,r12
	and	r2,r2,r4
	eor	r12,r0,r4,ror#19
	vshr.u32	q10,q8,#7
	eor	r0,r8,r8,ror#11
	eor	r2,r2,r6
	vadd.i32	q1,q1,q9
	add	r7,r7,r12,ror#6
	eor	r12,r8,r9
	vshr.u32	q9,q8,#3
	eor	r0,r0,r8,ror#20
	add	r7,r7,r2
	vsli.32	q10,q8,#25
	ldr	r2,[sp,#20]
	and	r3,r3,r12
	vshr.u32	q11,q8,#18
	add	r11,r11,r7
	add	r7,r7,r0,ror#2
	eor	r3,r3,r9
	veor	q9,q9,q10
	add	r6,r6,r2
	vsli.32	q11,q8,#14
	eor	r2,r4,r5
	eor	r0,r11,r11,ror#5
	vshr.u32	d24,d1,#17
	add	r7,r7,r3
	and	r2,r2,r11
	veor	q9,q9,q11
	eor	r3,r0,r11,ror#19
	eor	r0,r7,r7,ror#11
	vsli.32	d24,d1,#15
	eor	r2,r2,r5
	add	r6,r6,r3,ror#6
	vshr.u32	d25,d1,#10
	eor	r3,r7,r8
	eor	r0,r0,r7,ror#20
	vadd.i32	q1,q1,q9
	add	r6,r6,r2
	ldr	r2,[sp,#24]
	veor	d25,d25,d24
	and	r12,r12,r3
	add	r10,r10,r6
	vshr.u32	d24,d1,#19
	add	r6,r6,r0,ror#2
	eor	r12,r12,r8
	vsli.32	d24,d1,#13
	add	r5,r5,r2
	eor	r2,r11,r4
	veor	d25,d25,d24
	eor	r0,r10,r10,ror#5
	add	r6,r6,r12
	vadd.i32	d2,d2,d25
	and	r2,r2,r10
	eor	r12,r0,r10,ror#19
	vshr.u32	d24,d2,#17
	eor	r0,r6,r6,ror#11
	eor	r2,r2,r4
	vsli.32	d24,d2,#15
	add	r5,r5,r12,ror#6
	eor	r12,r6,r7
	vshr.u32	d25,d2,#10
	eor	r0,r0,r6,ror#20
	add	r5,r5,r2
	veor	d25,d25,d24
	ldr	r2,[sp,#28]
	and	r3,r3,r12
	vshr.u32	d24,d2,#19
	add	r9,r9,r5
	add	r5,r5,r0,ror#2
	eor	r3,r3,r7
	vld1.32	{q8},[r14,:128]!
	add	r4,r4,r2
	vsli.32	d24,d2,#13
	eor	r2,r10,r11
	eor	r0,r9,r9,ror#5
	veor	d25,d25,d24
	add	r5,r5,r3
	and	r2,r2,r9
	vadd.i32	d3,d3,d25
	eor	r3,r0,r9,ror#19
	eor	r0,r5,r5,ror#11
	vadd.i32	q8,q8,q1
	eor	r2,r2,r11
	add	r4,r4,r3,ror#6
	eor	r3,r5,r6
	eor	r0,r0,r5,ror#20
	add	r4,r4,r2
	ldr	r2,[sp,#32]
	and	r12,r12,r3
	add	r8,r8,r4
	vst1.32	{q8},[r1,:128]!
	add	r4,r4,r0,ror#2
	eor	r12,r12,r6
	vext.8	q8,q2,q3,#4
	add	r11,r11,r2
	eor	r2,r9,r10
	eor	r0,r8,r8,ror#5
	vext.8	q9,q0,q1,#4
	add	r4,r4,r12
	and	r2,r2,r8
	eor	r12,r0,r8,ror#19
	vshr.u32	q10,q8,#7
	eor	r0,r4,r4,ror#11
	eor	r2,r2,r10
	vadd.i32	q2,q2,q9
	add	r11,r11,r12,ror#6
	eor	r12,r4,r5
	vshr.u32	q9,q8,#3
	eor	r0,r0,r4,ror#20
	add	r11,r11,r2
	vsli.32	q10,q8,#25
	ldr	r2,[sp,#36]
	and	r3,r3,r12
	vshr.u32	q11,q8,#18
	add	r7,r7,r11
	add	r11,r11,r0,ror#2
	eor	r3,r3,r5
	veor	q9,q9,q10
	add	r10,r10,r2
	vsli.32	q11,q8,#14
	eor	r2,r8,r9
	eor	r0,r7,r7,ror#5
	vshr.u32	d24,d3,#17
	add	r11,r11,r3
	and	r2,r2,r7
	veor	q9,q9,q11
	eor	r3,r0,r7,ror#19
	eor	r0,r11,r11,ror#11
	vsli.32	d24,d3,#15
	eor	r2,r2,r9
	add	r10,r10,r3,ror#6
	vshr.u32	d25,d3,#10
	eor	r3,r11,r4
	eor	r0,r0,r11,ror#20
	vadd.i32	q2,q2,q9
	add	r10,r10,r2
	ldr	r2,[sp,#40]
	veor	d25,d25,d24
	and	r12,r12,r3
	add	r6,r6,r10
	vshr.u32	d24,d3,#19
	add	r10,r10,r0,ror#2
	eor	r12,r12,r4
	vsli.32	d24,d3,#13
	add	r9,r9,r2
	eor	r2,r7,r8
	veor	d25,d25,d24
	eor	r0,r6,r6,ror#5
	add	r10,r10,r12
	vadd.i32	d4,d4,d25
	and	r2,r2,r6
	eor	r12,r0,r6,ror#19
	vshr.u32	d24,d4,#17
	eor	r0,r10,r10,ror#11
	eor	r2,r2,r8
	vsli.32	d24,d4,#15
	add	r9,r9,r12,ror#6
	eor	r12,r10,r11
	vshr.u32	d25,d4,#10
	eor	r0,r0,r10,ror#20
	add	r9,r9,r2
	veor	d25,d25,d24
	ldr	r2,[sp,#44]
	and	r3,r3,r12
	vshr.u32	d24,d4,#19
	add	r5,r5,r9
	add	r9,r9,r0,ror#2
	eor	r3,r3,r11
	vld1.32	{q8},[r14,:128]!
	add	r8,r8,r2
	vsli.32	d24,d4,#13
	eor	r2,r6,r7
	eor	r0,r5,r5,ror#5
	veor	d25,d25,d24
	add	r9,r9,r3
	and	r2,r2,r5
	vadd.i32	d5,d5,d25
	eor	r3,r0,r5,ror#19
	eor	r0,r9,r9,ror#11
	vadd.i32	q8,q8,q2
	eor	r2,r2,r7
	add	r8,r8,r3,ror#6
	eor	r3,r9,r10
	eor	r0,r0,r9,ror#20
	add	r8,r8,r2
	ldr	r2,[sp,#48]
	and	r12,r12,r3
	add	r4,r4,r8
	vst1.32	{q8},[r1,:128]!
	add	r8,r8,r0,ror#2
	eor	r12,r12,r10
	vext.8	q8,q3,q0,#4
	add	r7,r7,r2
	eor	r2,r5,r6
	eor	r0,r4,r4,ror#5
	vext.8	q9,q1,q2,#4
	add	r8,r8,r12
	and	r2,r2,r4
	eor	r12,r0,r4,ror#19
	vshr.u32	q10,q8,#7
	eor	r0,r8,r8,ror#11
	eor	r2,r2,r6
	vadd.i32	q3,q3,q9
	add	r7,r7,r12,ror#6
	eor	r12,r8,r9
	vshr.u32	q9,q8,#3
	eor	r0,r0,r8,ror#20
	add	r7,r7,r2
	vsli.32	q10,q8,#25
	ldr	r2,[sp,#52]
	and	r3,r3,r12
	vshr.u32	q11,q8,#18
	add	r11,r11,r7
	add	r7,r7,r0,ror#2
	eor	r3,r3,r9
	veor	q9,q9,q10
	add	r6,r6,r2
	vsli.32	q11,q8,#14
	eor	r2,r4,r5
	eor	r0,r11,r11,ror#5
	vshr.u32	d24,d5,#17
	add	r7,r7,r3
	and	r2,r2,r11
	veor	q9,q9,q11
	eor	r3,r0,r11,ror#19
	eor	r0,r7,r7,ror#11
	vsli.32	d24,d5,#15
	eor	r2,r2,r5
	add	r6,r6,r3,ror#6
	vshr.u32	d25,d5,#10
	eor	r3,r7,r8
	eor	r0,r0,r7,ror#20
	vadd.i32	q3,q3,q9
	add	r6,r6,r2
	ldr	r2,[sp,#56]
	veor	d25,d25,d24
	and	r12,r12,r3
	add	r10,r10,r6
	vshr.u32	d24,d5,#19
	add	r6,r6,r0,ror#2
	eor	r12,r12,r8
	vsli.32	d24,d5,#13
	add	r5,r5,r2
	eor	r2,r11,r4
	veor	d25,d25,d24
	eor	r0,r10,r10,ror#5
	add	r6,r6,r12
	vadd.i32	d6,d6,d25
	and	r2,r2,r10
	eor	r12,r0,r10,ror#19
	vshr.u32	d24,d6,#17
	eor	r0,r6,r6,ror#11
	eor	r2,r2,r4
	vsli.32	d24,d6,#15
	add	r5,r5,r12,ror#6
	eor	r12,r6,r7
	vshr.u32	d25,d6,#10
	eor	r0,r0,r6,ror#20
	add	r5,r5,r2
	veor	d25,d25,d24
	ldr	r2,[sp,#60]
	and	r3,r3,r12
	vshr.u32	d24,d6,#19
	add	r9,r9,r5
	add	r5,r5,r0,ror#2
	eor	r3,r3,r7
	vld1.32	{q8},[r14,:128]!
	add	r4,r4,r2
	vsli.32	d24,d6,#13
	eor	r2,r10,r11
	eor	r0,r9,r9,ror#5
	veor	d25,d25,d24
	add	r5,r5,r3
	and	r2,r2,r9
	vadd.i32	d7,d7,d25
	eor	r3,r0,r9,ror#19
	eor	r0,r5,r5,ror#11
	vadd.i32	q8,q8,q3
	eor	r2,r2,r11
	add	r4,r4,r3,ror#6
	eor	r3,r5,r6
	eor	r0,r0,r5,ror#20
	add	r4,r4,r2
	ldr	r2,[r14]
	and	r12,r12,r3
	add	r8,r8,r4
	vst1.32	{q8},[r1,:128]!
	add	r4,r4,r0,ror#2
	eor	r12,r12,r6
	teq	r2,#0				@ check for K256 terminator
	ldr	r2,[sp,#0]
	sub	r1,r1,#64
	bne	.L_00_48

	ldr	r1,[sp,#68]
	ldr	r0,[sp,#72]
	sub	r14,r14,#256	@ rewind r14
	teq	r1,r0
	it	eq
	subeq	r1,r1,#64		@ avoid SEGV
	vld1.8	{q0},[r1]!		@ load next input block
	vld1.8	{q1},[r1]!
	vld1.8	{q2},[r1]!
	vld1.8	{q3},[r1]!
	it	ne
	strne	r1,[sp,#68]
	mov	r1,sp
	add	r11,r11,r2
	eor	r2,r9,r10
	eor	r0,r8,r8,ror#5
	add	r4,r4,r12
	vld1.32	{q8},[r14,:128]!
	and	r2,r2,r8
	eor	r12,r0,r8,ror#19
	eor	r0,r4,r4,ror#11
	eor	r2,r2,r10
	vrev32.8	q0,q0
	add	r11,r11,r12,ror#6
	eor	r12,r4,r5
	eor	r0,r0,r4,ror#20
	add	r11,r11,r2
	vadd.i32	q8,q8,q0
	ldr	r2,[sp,#4]
	and	r3,r3,r12
	add	r7,r7,r11
	add	r11,r11,r0,ror#2
	eor	r3,r3,r5
	add	r10,r10,r2
	eor	r2,r8,r9
	eor	r0,r7,r7,ror#5
	add	r11,r11,r3
	and	r2,r2,r7
	eor	r3,r0,r7,ror#19
	eor	r0,r11,r11,ror#11
	eor	r2,r2,r9
	add	r10,r10,r3,ror#6
	eor	r3,r11,r4
	eor	r0,r0,r11,ror#20
	add	r10,r10,r2
	ldr	r2,[sp,#8]
	and	r12,r12,r3
	add	r6,r6,r10
	add	r10,r10,r0,ror#2
	eor	r12,r12,r4
	add	r9,r9,r2
	eor	r2,r7,r8
	eor	r0,r6,r6,ror#5
	add	r10,r10,r12
	and	r2,r2,r6
	eor	r12,r0,r6,ror#19
	eor	r0,r10,r10,ror#11
	eor	r2,r2,r8
	add	r9,r9,r12,ror#6
	eor	r12,r10,r11
	eor	r0,r0,r10,ror#20
	add	r9,r9,r2
	ldr	r2,[sp,#12]
	and	r3,r3,r12
	add	r5,r5,r9
	add	r9,r9,r0,ror#2
	eor	r3,r3,r11
	add	r8,r8,r2
	eor	r2,r6,r7
	eor	r0,r5,r5,ror#5
	add	r9,r9,r3
	and	r2,r2,r5
	eor	r3,r0,r5,ror#19
	eor	r0,r9,r9,ror#11
	eor	r2,r2,r7
	add	r8,r8,r3,ror#6
	eor	r3,r9,r10
	eor	r0,r0,r9,ror#20
	add	r8,r8,r2
	ldr	r2,[sp,#16]
	and	r12,r12,r3
	add	r4,r4,r8
	add	r8,r8,r0,ror#2
	eor	r12,r12,r10
	vst1.32	{q8},[r1,:128]!
	add	r7,r7,r2
	eor	r2,r5,r6
	eor	r0,r4,r4,ror#5
	add	r8,r8,r12
	vld1.32	{q8},[r14,:128]!
	and	r2,r2,r4
	eor	r12,r0,r4,ror#19
	eor	r0,r8,r8,ror#11
	eor	r2,r2,r6
	vrev32.8	q1,q1
	add	r7,r7,r12,ror#6
	eor	r12,r8,r9
	eor	r0,r0,r8,ror#20
	add	r7,r7,r2
	vadd.i32	q8,q8,q1
	ldr	r2,[sp,#20]
	and	r3,r3,r12
	add	r11,r11,r7
	add	r7,r7,r0,ror#2
	eor	r3,r3,r9
	add	r6,r6,r2
	eor	r2,r4,r5
	eor	r0,r11,r11,ror#5
	add	r7,r7,r3
	and	r2,r2,r11
	eor	r3,r0,r11,ror#19
	eor	r0,r7,r7,ror#11
	eor	r2,r2,r5
	add	r6,r6,r3,ror#6
	eor	r3,r7,r8
	eor	r0,r0,r7,ror#20
	add	r6,r6,r2
	ldr	r2,[sp,#24]
	and	r12,r12,r3
	add	r10,r10,r6
	add	r6,r6,r0,ror#2
	eor	r12,r12,r8
	add	r5,r5,r2
	eor	r2,r11,r4
	eor	r0,r10,r10,ror#5
	add	r6,r6,r12
	and	r2,r2,r10
	eor	r12,r0,r10,ror#19
	eor	r0,r6,r6,ror#11
	eor	r2,r2,r4
	add	r5,r5,r12,ror#6
	eor	r12,r6,r7
	eor	r0,r0,r6,ror#20
	add	r5,r5,r2
	ldr	r2,[sp,#28]
	and	r3,r3,r12
	add	r9,r9,r5
	add	r5,r5,r0,ror#2
	eor	r3,r3,r7
	add	r4,r4,r2
	eor	r2,r10,r11
	eor	r0,r9,r9,ror#5
	add	r5,r5,r3
	and	r2,r2,r9
	eor	r3,r0,r9,ror#19
	eor	r0,r5,r5,ror#11
	eor	r2,r2,r11
	add	r4,r4,r3,ror#6
	eor	r3,r5,r6
	eor	r0,r0,r5,ror#20
	add	r4,r4,r2
	ldr	r2,[sp,#32]
	and	r12,r12,r3
	add	r8,r8,r4
	add	r4,r4,r0,ror#2
	eor	r12,r12,r6
	vst1.32	{q8},[r1,:128]!
	add	r11,r11,r2
	eor	r2,r9,r10
	eor	r0,r8,r8,ror#5
	add	r4,r4,r12
	vld1.32	{q8},[r14,:128]!
	and	r2,r2,r8
	eor	r12,r0,r8,ror#19
	eor	r0,r4,r4,ror#11
	eor	r2,r2,r10
	vrev32.8	q2,q2
	add	r11,r11,r12,ror#6
	eor	r12,r4,r5
	eor	r0,r0,r4,ror#20
	add	r11,r11,r2
	vadd.i32	q8,q8,q2
	ldr	r2,[sp,#36]
	and	r3,r3,r12
	add	r7,r7,r11
	add	r11,r11,r0,ror#2
	eor	r3,r3,r5
	add	r10,r10,r2
	eor	r2,r8,r9
	eor	r0,r7,r7,ror#5
	add	r11,r11,r3
	and	r2,r2,r7
	eor	r3,r0,r7,ror#19
	eor	r0,r11,r11,ror#11
	eor	r2,r2,r9
	add	r10,r10,r3,ror#6
	eor	r3,r11,r4
	eor	r0,r0,r11,ror#20
	add	r10,r10,r2
	ldr	r2,[sp,#40]
	and	r12,r12,r3
	add	r6,r6,r10
	add	r10,r10,r0,ror#2
	eor	r12,r12,r4
	add	r9,r9,r2
	eor	r2,r7,r8
	eor	r0,r6,r6,ror#5
	add	r10,r10,r12
	and	r2,r2,r6
	eor	r12,r0,r6,ror#19
	eor	r0,r10,r10,ror#11
	eor	r2,r2,r8
	add	r9,r9,r12,ror#6
	eor	r12,r10,r11
	eor	r0,r0,r10,ror#20
	add	r9,r9,r2
	ldr	r2,[sp,#44]
	and	r3,r3,r12
	add	r5,r5,r9
	add	r9,r9,r0,ror#2
	eor	r3,r3,r11
	add	r8,r8,r2
	eor	r2,r6,r7
	eor	r0,r5,r5,ror#5
	add	r9,r9,r3
	and	r2,r2,r5
	eor	r3,r0,r5,ror#19
	eor	r0,r9,r9,ror#11
	eor	r2,r2,r7
	add	r8,r8,r3,ror#6
	eor	r3,r9,r10
	eor	r0,r0,r9,ror#20
	add	r8,r8,r2
	ldr	r2,[sp,#48]
	and	r12,r12,r3
	add	r4,r4,r8
	add	r8,r8,r0,ror#2
	eor	r12,r12,r10
	vst1.32	{q8},[r1,:128]!
	add	r7,r7,r2
	eor	r2,r5,r6
	eor	r0,r4,r4,ror#5
	add	r8,r8,r12
	vld1.32	{q8},[r14,:128]!
	and	r2,r2,r4
	eor	r12,r0,r4,ror#19
	eor	r0,r8,r8,ror#11
	eor	r2,r2,r6
	vrev32.8	q3,q3
	add	r7,r7,r12,ror#6
	eor	r12,r8,r9
	eor	r0,r0,r8,ror#20
	add	r7,r7,r2
	vadd.i32	q8,q8,q3
	ldr	r2,[sp,#52]
	and	r3,r3,r12
	add	r11,r11,r7
	add	r7,r7,r0,ror#2
	eor	r3,r3,r9
	add	r6,r6,r2
	eor	r2,r4,r5
	eor	r0,r11,r11,ror#5
	add	r7,r7,r3
	and	r2,r2,r11
	eor	r3,r0,r11,ror#19
	eor	r0,r7,r7,ror#11
	eor	r2,r2,r5
	add	r6,r6,r3,ror#6
	eor	r3,r7,r8
	eor	r0,r0,r7,ror#20
	add	r6,r6,r2
	ldr	r2,[sp,#56]
	and	r12,r12,r3
	add	r10,r10,r6
	add	r6,r6,r0,ror#2
	eor	r12,r12,r8
	add	r5,r5,r2
	eor	r2,r11,r4
	eor	r0,r10,r10,ror#5
	add	r6,r6,r12
	and	r2,r2,r10
	eor	r12,r0,r10,ror#19
	eor	r0,r6,r6,ror#11
	eor	r2,r2,r4
	add	r5,r5,r12,ror#6
	eor	r12,r6,r7
	eor	r0,r0,r6,ror#20
	add	r5,r5,r2
	ldr	r2,[sp,#60]
	and	r3,r3,r12
	add	r9,r9,r5
	add	r5,r5,r0,ror#2
	eor	r3,r3,r7
	add	r4,r4,r2
	eor	r2,r10,r11
	eor	r0,r9,r9,ror#5
	add	r5,r5,r3
	and	r2,r2,r9
	eor	r3,r0,r9,ror#19
	eor	r0,r5,r5,ror#11
	eor	r2,r2,r11
	add	r4,r4,r3,ror#6
	eor	r3,r5,r6
	eor	r0,r0,r5,ror#20
	add	r4,r4,r2
	ldr	r2,[sp,#64]
	and	r12,r12,r3
	add	r8,r8,r4
	add	r4,r4,r0,ror#2
	eor	r12,r12,r6
	vst1.32	{q8},[r1,:128]!
	ldr	r0,[r2,#0]
	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
	ldr	r12,[r2,#4]
	ldr	r3,[r2,#8]
	ldr	r1,[r2,#12]
	add	r4,r4,r0			@ accumulate
	ldr	r0,[r2,#16]
	add	r5,r5,r12
	ldr	r12,[r2,#20]
	add	r6,r6,r3
	ldr	r3,[r2,#24]
	add	r7,r7,r1
	ldr	r1,[r2,#28]
	add	r8,r8,r0
	str	r4,[r2],#4
	add	r9,r9,r12
	str	r5,[r2],#4
	add	r10,r10,r3
	str	r6,[r2],#4
	add	r11,r11,r1
	str	r7,[r2],#4
	stmia	r2,{r8,r9,r10,r11}

	ittte	ne
	movne	r1,sp
	ldrne	r2,[sp,#0]
	eorne	r12,r12,r12
	ldreq	sp,[sp,#76]			@ restore original sp
	itt	ne
	eorne	r3,r5,r6
	bne	.L_00_48

	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
.size	cryptogams_sha256_block_data_order_neon,.-cryptogams_sha256_block_data_order_neon
#endif
back to top