Home | History | Annotate | Download | only in fipsmodule
      1 @ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
      2 @
      3 @ Licensed under the OpenSSL license (the "License").  You may not use
      4 @ this file except in compliance with the License.  You can obtain a copy
      5 @ in the file LICENSE in the source distribution or at
      6 @ https://www.openssl.org/source/license.html
      7 
      8 
      9 @ ====================================================================
     10 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     11 @ project. The module is, however, dual licensed under OpenSSL and
     12 @ CRYPTOGAMS licenses depending on where you obtain it. For further
     13 @ details see http://www.openssl.org/~appro/cryptogams/.
     14 @
     15 @ Permission to use under GPL terms is granted.
     16 @ ====================================================================
     17 
     18 @ SHA256 block procedure for ARMv4. May 2007.
     19 
     20 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
     21 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
     22 @ byte [on single-issue Xscale PXA250 core].
     23 
     24 @ July 2010.
     25 @
     26 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
     27 @ Cortex A8 core and ~20 cycles per processed byte.
     28 
     29 @ February 2011.
     30 @
     31 @ Profiler-assisted and platform-specific optimization resulted in 16%
     32 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
     33 
     34 @ September 2013.
     35 @
     36 @ Add NEON implementation. On Cortex A8 it was measured to process one
     37 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
     38 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
     39 @ code (meaning that latter performs sub-optimally, nothing was done
     40 @ about it).
     41 
     42 @ May 2014.
     43 @
     44 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
     45 
     46 #ifndef __KERNEL__
     47 # include <openssl/arm_arch.h>
     48 #else
     49 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
     50 # define __ARM_MAX_ARCH__ 7
     51 #endif
     52 
     53 .text
     54 #if defined(__thumb2__)
     55 .syntax	unified
     56 .thumb
     57 #else
     58 .code	32
     59 #endif
     60 
     61 
     62 .align	5
     63 K256:
     64 .word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
     65 .word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
     66 .word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
     67 .word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
     68 .word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
     69 .word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
     70 .word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
     71 .word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
     72 .word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
     73 .word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
     74 .word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
     75 .word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
     76 .word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
     77 .word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
     78 .word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
     79 .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
     80 
     81 .word	0				@ terminator
     82 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
     83 LOPENSSL_armcap:
     84 .word	OPENSSL_armcap_P-Lsha256_block_data_order
     85 #endif
     86 .align	5
     87 
     88 .globl	_sha256_block_data_order
     89 .private_extern	_sha256_block_data_order
     90 #ifdef __thumb2__
     91 .thumb_func	_sha256_block_data_order
     92 #endif
     93 _sha256_block_data_order:
     94 Lsha256_block_data_order:
     95 #if __ARM_ARCH__<7 && !defined(__thumb2__)
     96 	sub	r3,pc,#8		@ _sha256_block_data_order
     97 #else
     98 	adr	r3,Lsha256_block_data_order
     99 #endif
    100 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
    101 	ldr	r12,LOPENSSL_armcap
    102 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
    103 #ifdef	__APPLE__
    104 	ldr	r12,[r12]
    105 #endif
    106 	tst	r12,#ARMV8_SHA256
    107 	bne	LARMv8
    108 	tst	r12,#ARMV7_NEON
    109 	bne	LNEON
    110 #endif
    111 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
    112 	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
    113 	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
    114 	sub	r14,r3,#256+32	@ K256
    115 	sub	sp,sp,#16*4		@ alloca(X[16])
    116 Loop:
    117 # if __ARM_ARCH__>=7
    118 	ldr	r2,[r1],#4
    119 # else
    120 	ldrb	r2,[r1,#3]
    121 # endif
    122 	eor	r3,r5,r6		@ magic
    123 	eor	r12,r12,r12
    124 #if __ARM_ARCH__>=7
    125 	@ ldr	r2,[r1],#4			@ 0
    126 # if 0==15
    127 	str	r1,[sp,#17*4]			@ make room for r1
    128 # endif
    129 	eor	r0,r8,r8,ror#5
    130 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    131 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    132 # ifndef __ARMEB__
    133 	rev	r2,r2
    134 # endif
    135 #else
    136 	@ ldrb	r2,[r1,#3]			@ 0
    137 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    138 	ldrb	r12,[r1,#2]
    139 	ldrb	r0,[r1,#1]
    140 	orr	r2,r2,r12,lsl#8
    141 	ldrb	r12,[r1],#4
    142 	orr	r2,r2,r0,lsl#16
    143 # if 0==15
    144 	str	r1,[sp,#17*4]			@ make room for r1
    145 # endif
    146 	eor	r0,r8,r8,ror#5
    147 	orr	r2,r2,r12,lsl#24
    148 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    149 #endif
    150 	ldr	r12,[r14],#4			@ *K256++
    151 	add	r11,r11,r2			@ h+=X[i]
    152 	str	r2,[sp,#0*4]
    153 	eor	r2,r9,r10
    154 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
    155 	and	r2,r2,r8
    156 	add	r11,r11,r12			@ h+=K256[i]
    157 	eor	r2,r2,r10			@ Ch(e,f,g)
    158 	eor	r0,r4,r4,ror#11
    159 	add	r11,r11,r2			@ h+=Ch(e,f,g)
    160 #if 0==31
    161 	and	r12,r12,#0xff
    162 	cmp	r12,#0xf2			@ done?
    163 #endif
    164 #if 0<15
    165 # if __ARM_ARCH__>=7
    166 	ldr	r2,[r1],#4			@ prefetch
    167 # else
    168 	ldrb	r2,[r1,#3]
    169 # endif
    170 	eor	r12,r4,r5			@ a^b, b^c in next round
    171 #else
    172 	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
    173 	eor	r12,r4,r5			@ a^b, b^c in next round
    174 	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
    175 #endif
    176 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
    177 	and	r3,r3,r12			@ (b^c)&=(a^b)
    178 	add	r7,r7,r11			@ d+=h
    179 	eor	r3,r3,r5			@ Maj(a,b,c)
    180 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
    181 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
    182 #if __ARM_ARCH__>=7
    183 	@ ldr	r2,[r1],#4			@ 1
    184 # if 1==15
    185 	str	r1,[sp,#17*4]			@ make room for r1
    186 # endif
    187 	eor	r0,r7,r7,ror#5
    188 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    189 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    190 # ifndef __ARMEB__
    191 	rev	r2,r2
    192 # endif
    193 #else
    194 	@ ldrb	r2,[r1,#3]			@ 1
    195 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    196 	ldrb	r3,[r1,#2]
    197 	ldrb	r0,[r1,#1]
    198 	orr	r2,r2,r3,lsl#8
    199 	ldrb	r3,[r1],#4
    200 	orr	r2,r2,r0,lsl#16
    201 # if 1==15
    202 	str	r1,[sp,#17*4]			@ make room for r1
    203 # endif
    204 	eor	r0,r7,r7,ror#5
    205 	orr	r2,r2,r3,lsl#24
    206 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    207 #endif
    208 	ldr	r3,[r14],#4			@ *K256++
    209 	add	r10,r10,r2			@ h+=X[i]
    210 	str	r2,[sp,#1*4]
    211 	eor	r2,r8,r9
    212 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
    213 	and	r2,r2,r7
    214 	add	r10,r10,r3			@ h+=K256[i]
    215 	eor	r2,r2,r9			@ Ch(e,f,g)
    216 	eor	r0,r11,r11,ror#11
    217 	add	r10,r10,r2			@ h+=Ch(e,f,g)
    218 #if 1==31
    219 	and	r3,r3,#0xff
    220 	cmp	r3,#0xf2			@ done?
    221 #endif
    222 #if 1<15
    223 # if __ARM_ARCH__>=7
    224 	ldr	r2,[r1],#4			@ prefetch
    225 # else
    226 	ldrb	r2,[r1,#3]
    227 # endif
    228 	eor	r3,r11,r4			@ a^b, b^c in next round
    229 #else
    230 	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
    231 	eor	r3,r11,r4			@ a^b, b^c in next round
    232 	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
    233 #endif
    234 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
    235 	and	r12,r12,r3			@ (b^c)&=(a^b)
    236 	add	r6,r6,r10			@ d+=h
    237 	eor	r12,r12,r4			@ Maj(a,b,c)
    238 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
    239 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
    240 #if __ARM_ARCH__>=7
    241 	@ ldr	r2,[r1],#4			@ 2
    242 # if 2==15
    243 	str	r1,[sp,#17*4]			@ make room for r1
    244 # endif
    245 	eor	r0,r6,r6,ror#5
    246 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    247 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    248 # ifndef __ARMEB__
    249 	rev	r2,r2
    250 # endif
    251 #else
    252 	@ ldrb	r2,[r1,#3]			@ 2
    253 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    254 	ldrb	r12,[r1,#2]
    255 	ldrb	r0,[r1,#1]
    256 	orr	r2,r2,r12,lsl#8
    257 	ldrb	r12,[r1],#4
    258 	orr	r2,r2,r0,lsl#16
    259 # if 2==15
    260 	str	r1,[sp,#17*4]			@ make room for r1
    261 # endif
    262 	eor	r0,r6,r6,ror#5
    263 	orr	r2,r2,r12,lsl#24
    264 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    265 #endif
    266 	ldr	r12,[r14],#4			@ *K256++
    267 	add	r9,r9,r2			@ h+=X[i]
    268 	str	r2,[sp,#2*4]
    269 	eor	r2,r7,r8
    270 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
    271 	and	r2,r2,r6
    272 	add	r9,r9,r12			@ h+=K256[i]
    273 	eor	r2,r2,r8			@ Ch(e,f,g)
    274 	eor	r0,r10,r10,ror#11
    275 	add	r9,r9,r2			@ h+=Ch(e,f,g)
    276 #if 2==31
    277 	and	r12,r12,#0xff
    278 	cmp	r12,#0xf2			@ done?
    279 #endif
    280 #if 2<15
    281 # if __ARM_ARCH__>=7
    282 	ldr	r2,[r1],#4			@ prefetch
    283 # else
    284 	ldrb	r2,[r1,#3]
    285 # endif
    286 	eor	r12,r10,r11			@ a^b, b^c in next round
    287 #else
    288 	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
    289 	eor	r12,r10,r11			@ a^b, b^c in next round
    290 	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
    291 #endif
    292 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
    293 	and	r3,r3,r12			@ (b^c)&=(a^b)
    294 	add	r5,r5,r9			@ d+=h
    295 	eor	r3,r3,r11			@ Maj(a,b,c)
    296 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
    297 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
    298 #if __ARM_ARCH__>=7
    299 	@ ldr	r2,[r1],#4			@ 3
    300 # if 3==15
    301 	str	r1,[sp,#17*4]			@ make room for r1
    302 # endif
    303 	eor	r0,r5,r5,ror#5
    304 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    305 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    306 # ifndef __ARMEB__
    307 	rev	r2,r2
    308 # endif
    309 #else
    310 	@ ldrb	r2,[r1,#3]			@ 3
    311 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    312 	ldrb	r3,[r1,#2]
    313 	ldrb	r0,[r1,#1]
    314 	orr	r2,r2,r3,lsl#8
    315 	ldrb	r3,[r1],#4
    316 	orr	r2,r2,r0,lsl#16
    317 # if 3==15
    318 	str	r1,[sp,#17*4]			@ make room for r1
    319 # endif
    320 	eor	r0,r5,r5,ror#5
    321 	orr	r2,r2,r3,lsl#24
    322 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    323 #endif
    324 	ldr	r3,[r14],#4			@ *K256++
    325 	add	r8,r8,r2			@ h+=X[i]
    326 	str	r2,[sp,#3*4]
    327 	eor	r2,r6,r7
    328 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
    329 	and	r2,r2,r5
    330 	add	r8,r8,r3			@ h+=K256[i]
    331 	eor	r2,r2,r7			@ Ch(e,f,g)
    332 	eor	r0,r9,r9,ror#11
    333 	add	r8,r8,r2			@ h+=Ch(e,f,g)
    334 #if 3==31
    335 	and	r3,r3,#0xff
    336 	cmp	r3,#0xf2			@ done?
    337 #endif
    338 #if 3<15
    339 # if __ARM_ARCH__>=7
    340 	ldr	r2,[r1],#4			@ prefetch
    341 # else
    342 	ldrb	r2,[r1,#3]
    343 # endif
    344 	eor	r3,r9,r10			@ a^b, b^c in next round
    345 #else
    346 	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
    347 	eor	r3,r9,r10			@ a^b, b^c in next round
    348 	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
    349 #endif
    350 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
    351 	and	r12,r12,r3			@ (b^c)&=(a^b)
    352 	add	r4,r4,r8			@ d+=h
    353 	eor	r12,r12,r10			@ Maj(a,b,c)
    354 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
    355 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
    356 #if __ARM_ARCH__>=7
    357 	@ ldr	r2,[r1],#4			@ 4
    358 # if 4==15
    359 	str	r1,[sp,#17*4]			@ make room for r1
    360 # endif
    361 	eor	r0,r4,r4,ror#5
    362 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    363 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    364 # ifndef __ARMEB__
    365 	rev	r2,r2
    366 # endif
    367 #else
    368 	@ ldrb	r2,[r1,#3]			@ 4
    369 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    370 	ldrb	r12,[r1,#2]
    371 	ldrb	r0,[r1,#1]
    372 	orr	r2,r2,r12,lsl#8
    373 	ldrb	r12,[r1],#4
    374 	orr	r2,r2,r0,lsl#16
    375 # if 4==15
    376 	str	r1,[sp,#17*4]			@ make room for r1
    377 # endif
    378 	eor	r0,r4,r4,ror#5
    379 	orr	r2,r2,r12,lsl#24
    380 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    381 #endif
    382 	ldr	r12,[r14],#4			@ *K256++
    383 	add	r7,r7,r2			@ h+=X[i]
    384 	str	r2,[sp,#4*4]
    385 	eor	r2,r5,r6
    386 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
    387 	and	r2,r2,r4
    388 	add	r7,r7,r12			@ h+=K256[i]
    389 	eor	r2,r2,r6			@ Ch(e,f,g)
    390 	eor	r0,r8,r8,ror#11
    391 	add	r7,r7,r2			@ h+=Ch(e,f,g)
    392 #if 4==31
    393 	and	r12,r12,#0xff
    394 	cmp	r12,#0xf2			@ done?
    395 #endif
    396 #if 4<15
    397 # if __ARM_ARCH__>=7
    398 	ldr	r2,[r1],#4			@ prefetch
    399 # else
    400 	ldrb	r2,[r1,#3]
    401 # endif
    402 	eor	r12,r8,r9			@ a^b, b^c in next round
    403 #else
    404 	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
    405 	eor	r12,r8,r9			@ a^b, b^c in next round
    406 	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
    407 #endif
    408 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
    409 	and	r3,r3,r12			@ (b^c)&=(a^b)
    410 	add	r11,r11,r7			@ d+=h
    411 	eor	r3,r3,r9			@ Maj(a,b,c)
    412 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
    413 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
    414 #if __ARM_ARCH__>=7
    415 	@ ldr	r2,[r1],#4			@ 5
    416 # if 5==15
    417 	str	r1,[sp,#17*4]			@ make room for r1
    418 # endif
    419 	eor	r0,r11,r11,ror#5
    420 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    421 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    422 # ifndef __ARMEB__
    423 	rev	r2,r2
    424 # endif
    425 #else
    426 	@ ldrb	r2,[r1,#3]			@ 5
    427 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    428 	ldrb	r3,[r1,#2]
    429 	ldrb	r0,[r1,#1]
    430 	orr	r2,r2,r3,lsl#8
    431 	ldrb	r3,[r1],#4
    432 	orr	r2,r2,r0,lsl#16
    433 # if 5==15
    434 	str	r1,[sp,#17*4]			@ make room for r1
    435 # endif
    436 	eor	r0,r11,r11,ror#5
    437 	orr	r2,r2,r3,lsl#24
    438 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    439 #endif
    440 	ldr	r3,[r14],#4			@ *K256++
    441 	add	r6,r6,r2			@ h+=X[i]
    442 	str	r2,[sp,#5*4]
    443 	eor	r2,r4,r5
    444 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
    445 	and	r2,r2,r11
    446 	add	r6,r6,r3			@ h+=K256[i]
    447 	eor	r2,r2,r5			@ Ch(e,f,g)
    448 	eor	r0,r7,r7,ror#11
    449 	add	r6,r6,r2			@ h+=Ch(e,f,g)
    450 #if 5==31
    451 	and	r3,r3,#0xff
    452 	cmp	r3,#0xf2			@ done?
    453 #endif
    454 #if 5<15
    455 # if __ARM_ARCH__>=7
    456 	ldr	r2,[r1],#4			@ prefetch
    457 # else
    458 	ldrb	r2,[r1,#3]
    459 # endif
    460 	eor	r3,r7,r8			@ a^b, b^c in next round
    461 #else
    462 	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
    463 	eor	r3,r7,r8			@ a^b, b^c in next round
    464 	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
    465 #endif
    466 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
    467 	and	r12,r12,r3			@ (b^c)&=(a^b)
    468 	add	r10,r10,r6			@ d+=h
    469 	eor	r12,r12,r8			@ Maj(a,b,c)
    470 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
    471 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
    472 #if __ARM_ARCH__>=7
    473 	@ ldr	r2,[r1],#4			@ 6
    474 # if 6==15
    475 	str	r1,[sp,#17*4]			@ make room for r1
    476 # endif
    477 	eor	r0,r10,r10,ror#5
    478 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    479 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    480 # ifndef __ARMEB__
    481 	rev	r2,r2
    482 # endif
    483 #else
    484 	@ ldrb	r2,[r1,#3]			@ 6
    485 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    486 	ldrb	r12,[r1,#2]
    487 	ldrb	r0,[r1,#1]
    488 	orr	r2,r2,r12,lsl#8
    489 	ldrb	r12,[r1],#4
    490 	orr	r2,r2,r0,lsl#16
    491 # if 6==15
    492 	str	r1,[sp,#17*4]			@ make room for r1
    493 # endif
    494 	eor	r0,r10,r10,ror#5
    495 	orr	r2,r2,r12,lsl#24
    496 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    497 #endif
    498 	ldr	r12,[r14],#4			@ *K256++
    499 	add	r5,r5,r2			@ h+=X[i]
    500 	str	r2,[sp,#6*4]
    501 	eor	r2,r11,r4
    502 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
    503 	and	r2,r2,r10
    504 	add	r5,r5,r12			@ h+=K256[i]
    505 	eor	r2,r2,r4			@ Ch(e,f,g)
    506 	eor	r0,r6,r6,ror#11
    507 	add	r5,r5,r2			@ h+=Ch(e,f,g)
    508 #if 6==31
    509 	and	r12,r12,#0xff
    510 	cmp	r12,#0xf2			@ done?
    511 #endif
    512 #if 6<15
    513 # if __ARM_ARCH__>=7
    514 	ldr	r2,[r1],#4			@ prefetch
    515 # else
    516 	ldrb	r2,[r1,#3]
    517 # endif
    518 	eor	r12,r6,r7			@ a^b, b^c in next round
    519 #else
    520 	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
    521 	eor	r12,r6,r7			@ a^b, b^c in next round
    522 	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
    523 #endif
    524 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
    525 	and	r3,r3,r12			@ (b^c)&=(a^b)
    526 	add	r9,r9,r5			@ d+=h
    527 	eor	r3,r3,r7			@ Maj(a,b,c)
    528 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
    529 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
    530 #if __ARM_ARCH__>=7
    531 	@ ldr	r2,[r1],#4			@ 7
    532 # if 7==15
    533 	str	r1,[sp,#17*4]			@ make room for r1
    534 # endif
    535 	eor	r0,r9,r9,ror#5
    536 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
    537 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
    538 # ifndef __ARMEB__
    539 	rev	r2,r2
    540 # endif
    541 #else
    542 	@ ldrb	r2,[r1,#3]			@ 7
    543 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
    544 	ldrb	r3,[r1,#2]
    545 	ldrb	r0,[r1,#1]
    546 	orr	r2,r2,r3,lsl#8
    547 	ldrb	r3,[r1],#4
    548 	orr	r2,r2,r0,lsl#16
    549 # if 7==15
    550 	str	r1,[sp,#17*4]			@ make room for r1
    551 # endif
    552 	eor	r0,r9,r9,ror#5
    553 	orr	r2,r2,r3,lsl#24
    554 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
    555 #endif
    556 	ldr	r3,[r14],#4			@ *K256++
    557 	add	r4,r4,r2			@ h+=X[i]
    558 	str	r2,[sp,#7*4]
    559 	eor	r2,r10,r11
    560 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
    561 	and	r2,r2,r9
    562 	add	r4,r4,r3			@ h+=K256[i]
    563 	eor	r2,r2,r11			@ Ch(e,f,g)
    564 	eor	r0,r5,r5,ror#11
    565 	add	r4,r4,r2			@ h+=Ch(e,f,g)
    566 #if 7==31
    567 	and	r3,r3,#0xff
    568 	cmp	r3,#0xf2			@ done?
    569 #endif
    570 #if 7<15
    571 # if __ARM_ARCH__>=7
    572 	ldr	r2,[r1],#4			@ prefetch
    573 # else
    574 	ldrb	r2,[r1,#3]
    575 # endif
    576 	eor	r3,r5,r6			@ a^b, b^c in next round
    577 #else
    578 	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
    579 	eor	r3,r5,r6			@ a^b, b^c in next round
    580 	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
    581 #endif
    582 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
    583 	and	r12,r12,r3			@ (b^c)&=(a^b)
    584 	add	r8,r8,r4			@ d+=h
    585 	eor	r12,r12,r6			@ Maj(a,b,c)
    586 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
    587 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
    588 #if __ARM_ARCH__>=7
    589 	@ ldr	r2,[r1],#4			@ 8
    590 # if 8==15
    591 	str	r1,[sp,#17*4]			@ make room for r1
    592 # endif
    593 	eor	r0,r8,r8,ror#5
    594 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    595 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    596 # ifndef __ARMEB__
    597 	rev	r2,r2
    598 # endif
    599 #else
    600 	@ ldrb	r2,[r1,#3]			@ 8
    601 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    602 	ldrb	r12,[r1,#2]
    603 	ldrb	r0,[r1,#1]
    604 	orr	r2,r2,r12,lsl#8
    605 	ldrb	r12,[r1],#4
    606 	orr	r2,r2,r0,lsl#16
    607 # if 8==15
    608 	str	r1,[sp,#17*4]			@ make room for r1
    609 # endif
    610 	eor	r0,r8,r8,ror#5
    611 	orr	r2,r2,r12,lsl#24
    612 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    613 #endif
    614 	ldr	r12,[r14],#4			@ *K256++
    615 	add	r11,r11,r2			@ h+=X[i]
    616 	str	r2,[sp,#8*4]
    617 	eor	r2,r9,r10
    618 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
    619 	and	r2,r2,r8
    620 	add	r11,r11,r12			@ h+=K256[i]
    621 	eor	r2,r2,r10			@ Ch(e,f,g)
    622 	eor	r0,r4,r4,ror#11
    623 	add	r11,r11,r2			@ h+=Ch(e,f,g)
    624 #if 8==31
    625 	and	r12,r12,#0xff
    626 	cmp	r12,#0xf2			@ done?
    627 #endif
    628 #if 8<15
    629 # if __ARM_ARCH__>=7
    630 	ldr	r2,[r1],#4			@ prefetch
    631 # else
    632 	ldrb	r2,[r1,#3]
    633 # endif
    634 	eor	r12,r4,r5			@ a^b, b^c in next round
    635 #else
    636 	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
    637 	eor	r12,r4,r5			@ a^b, b^c in next round
    638 	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
    639 #endif
    640 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
    641 	and	r3,r3,r12			@ (b^c)&=(a^b)
    642 	add	r7,r7,r11			@ d+=h
    643 	eor	r3,r3,r5			@ Maj(a,b,c)
    644 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
    645 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
    646 #if __ARM_ARCH__>=7
    647 	@ ldr	r2,[r1],#4			@ 9
    648 # if 9==15
    649 	str	r1,[sp,#17*4]			@ make room for r1
    650 # endif
    651 	eor	r0,r7,r7,ror#5
    652 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    653 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    654 # ifndef __ARMEB__
    655 	rev	r2,r2
    656 # endif
    657 #else
    658 	@ ldrb	r2,[r1,#3]			@ 9
    659 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    660 	ldrb	r3,[r1,#2]
    661 	ldrb	r0,[r1,#1]
    662 	orr	r2,r2,r3,lsl#8
    663 	ldrb	r3,[r1],#4
    664 	orr	r2,r2,r0,lsl#16
    665 # if 9==15
    666 	str	r1,[sp,#17*4]			@ make room for r1
    667 # endif
    668 	eor	r0,r7,r7,ror#5
    669 	orr	r2,r2,r3,lsl#24
    670 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    671 #endif
    672 	ldr	r3,[r14],#4			@ *K256++
    673 	add	r10,r10,r2			@ h+=X[i]
    674 	str	r2,[sp,#9*4]
    675 	eor	r2,r8,r9
    676 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
    677 	and	r2,r2,r7
    678 	add	r10,r10,r3			@ h+=K256[i]
    679 	eor	r2,r2,r9			@ Ch(e,f,g)
    680 	eor	r0,r11,r11,ror#11
    681 	add	r10,r10,r2			@ h+=Ch(e,f,g)
    682 #if 9==31
    683 	and	r3,r3,#0xff
    684 	cmp	r3,#0xf2			@ done?
    685 #endif
    686 #if 9<15
    687 # if __ARM_ARCH__>=7
    688 	ldr	r2,[r1],#4			@ prefetch
    689 # else
    690 	ldrb	r2,[r1,#3]
    691 # endif
    692 	eor	r3,r11,r4			@ a^b, b^c in next round
    693 #else
    694 	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
    695 	eor	r3,r11,r4			@ a^b, b^c in next round
    696 	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
    697 #endif
    698 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
    699 	and	r12,r12,r3			@ (b^c)&=(a^b)
    700 	add	r6,r6,r10			@ d+=h
    701 	eor	r12,r12,r4			@ Maj(a,b,c)
    702 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
    703 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
    704 #if __ARM_ARCH__>=7
    705 	@ ldr	r2,[r1],#4			@ 10
    706 # if 10==15
    707 	str	r1,[sp,#17*4]			@ make room for r1
    708 # endif
    709 	eor	r0,r6,r6,ror#5
    710 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    711 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    712 # ifndef __ARMEB__
    713 	rev	r2,r2
    714 # endif
    715 #else
    716 	@ ldrb	r2,[r1,#3]			@ 10
    717 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    718 	ldrb	r12,[r1,#2]
    719 	ldrb	r0,[r1,#1]
    720 	orr	r2,r2,r12,lsl#8
    721 	ldrb	r12,[r1],#4
    722 	orr	r2,r2,r0,lsl#16
    723 # if 10==15
    724 	str	r1,[sp,#17*4]			@ make room for r1
    725 # endif
    726 	eor	r0,r6,r6,ror#5
    727 	orr	r2,r2,r12,lsl#24
    728 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    729 #endif
    730 	ldr	r12,[r14],#4			@ *K256++
    731 	add	r9,r9,r2			@ h+=X[i]
    732 	str	r2,[sp,#10*4]
    733 	eor	r2,r7,r8
    734 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
    735 	and	r2,r2,r6
    736 	add	r9,r9,r12			@ h+=K256[i]
    737 	eor	r2,r2,r8			@ Ch(e,f,g)
    738 	eor	r0,r10,r10,ror#11
    739 	add	r9,r9,r2			@ h+=Ch(e,f,g)
    740 #if 10==31
    741 	and	r12,r12,#0xff
    742 	cmp	r12,#0xf2			@ done?
    743 #endif
    744 #if 10<15
    745 # if __ARM_ARCH__>=7
    746 	ldr	r2,[r1],#4			@ prefetch
    747 # else
    748 	ldrb	r2,[r1,#3]
    749 # endif
    750 	eor	r12,r10,r11			@ a^b, b^c in next round
    751 #else
    752 	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
    753 	eor	r12,r10,r11			@ a^b, b^c in next round
    754 	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
    755 #endif
    756 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
    757 	and	r3,r3,r12			@ (b^c)&=(a^b)
    758 	add	r5,r5,r9			@ d+=h
    759 	eor	r3,r3,r11			@ Maj(a,b,c)
    760 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
    761 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
    762 #if __ARM_ARCH__>=7
    763 	@ ldr	r2,[r1],#4			@ 11
    764 # if 11==15
    765 	str	r1,[sp,#17*4]			@ make room for r1
    766 # endif
    767 	eor	r0,r5,r5,ror#5
    768 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    769 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    770 # ifndef __ARMEB__
    771 	rev	r2,r2
    772 # endif
    773 #else
    774 	@ ldrb	r2,[r1,#3]			@ 11
    775 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    776 	ldrb	r3,[r1,#2]
    777 	ldrb	r0,[r1,#1]
    778 	orr	r2,r2,r3,lsl#8
    779 	ldrb	r3,[r1],#4
    780 	orr	r2,r2,r0,lsl#16
    781 # if 11==15
    782 	str	r1,[sp,#17*4]			@ make room for r1
    783 # endif
    784 	eor	r0,r5,r5,ror#5
    785 	orr	r2,r2,r3,lsl#24
    786 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    787 #endif
    788 	ldr	r3,[r14],#4			@ *K256++
    789 	add	r8,r8,r2			@ h+=X[i]
    790 	str	r2,[sp,#11*4]
    791 	eor	r2,r6,r7
    792 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
    793 	and	r2,r2,r5
    794 	add	r8,r8,r3			@ h+=K256[i]
    795 	eor	r2,r2,r7			@ Ch(e,f,g)
    796 	eor	r0,r9,r9,ror#11
    797 	add	r8,r8,r2			@ h+=Ch(e,f,g)
    798 #if 11==31
    799 	and	r3,r3,#0xff
    800 	cmp	r3,#0xf2			@ done?
    801 #endif
    802 #if 11<15
    803 # if __ARM_ARCH__>=7
    804 	ldr	r2,[r1],#4			@ prefetch
    805 # else
    806 	ldrb	r2,[r1,#3]
    807 # endif
    808 	eor	r3,r9,r10			@ a^b, b^c in next round
    809 #else
    810 	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
    811 	eor	r3,r9,r10			@ a^b, b^c in next round
    812 	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
    813 #endif
    814 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
    815 	and	r12,r12,r3			@ (b^c)&=(a^b)
    816 	add	r4,r4,r8			@ d+=h
    817 	eor	r12,r12,r10			@ Maj(a,b,c)
    818 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
    819 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
    820 #if __ARM_ARCH__>=7
    821 	@ ldr	r2,[r1],#4			@ 12
    822 # if 12==15
    823 	str	r1,[sp,#17*4]			@ make room for r1
    824 # endif
    825 	eor	r0,r4,r4,ror#5
    826 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    827 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    828 # ifndef __ARMEB__
    829 	rev	r2,r2
    830 # endif
    831 #else
    832 	@ ldrb	r2,[r1,#3]			@ 12
    833 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    834 	ldrb	r12,[r1,#2]
    835 	ldrb	r0,[r1,#1]
    836 	orr	r2,r2,r12,lsl#8
    837 	ldrb	r12,[r1],#4
    838 	orr	r2,r2,r0,lsl#16
    839 # if 12==15
    840 	str	r1,[sp,#17*4]			@ make room for r1
    841 # endif
    842 	eor	r0,r4,r4,ror#5
    843 	orr	r2,r2,r12,lsl#24
    844 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    845 #endif
    846 	ldr	r12,[r14],#4			@ *K256++
    847 	add	r7,r7,r2			@ h+=X[i]
    848 	str	r2,[sp,#12*4]
    849 	eor	r2,r5,r6
    850 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
    851 	and	r2,r2,r4
    852 	add	r7,r7,r12			@ h+=K256[i]
    853 	eor	r2,r2,r6			@ Ch(e,f,g)
    854 	eor	r0,r8,r8,ror#11
    855 	add	r7,r7,r2			@ h+=Ch(e,f,g)
    856 #if 12==31
    857 	and	r12,r12,#0xff
    858 	cmp	r12,#0xf2			@ done?
    859 #endif
    860 #if 12<15
    861 # if __ARM_ARCH__>=7
    862 	ldr	r2,[r1],#4			@ prefetch
    863 # else
    864 	ldrb	r2,[r1,#3]
    865 # endif
    866 	eor	r12,r8,r9			@ a^b, b^c in next round
    867 #else
    868 	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
    869 	eor	r12,r8,r9			@ a^b, b^c in next round
    870 	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
    871 #endif
    872 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
    873 	and	r3,r3,r12			@ (b^c)&=(a^b)
    874 	add	r11,r11,r7			@ d+=h
    875 	eor	r3,r3,r9			@ Maj(a,b,c)
    876 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
    877 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
    878 #if __ARM_ARCH__>=7
    879 	@ ldr	r2,[r1],#4			@ 13
    880 # if 13==15
    881 	str	r1,[sp,#17*4]			@ make room for r1
    882 # endif
    883 	eor	r0,r11,r11,ror#5
    884 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    885 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    886 # ifndef __ARMEB__
    887 	rev	r2,r2
    888 # endif
    889 #else
    890 	@ ldrb	r2,[r1,#3]			@ 13
    891 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    892 	ldrb	r3,[r1,#2]
    893 	ldrb	r0,[r1,#1]
    894 	orr	r2,r2,r3,lsl#8
    895 	ldrb	r3,[r1],#4
    896 	orr	r2,r2,r0,lsl#16
    897 # if 13==15
    898 	str	r1,[sp,#17*4]			@ make room for r1
    899 # endif
    900 	eor	r0,r11,r11,ror#5
    901 	orr	r2,r2,r3,lsl#24
    902 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    903 #endif
    904 	ldr	r3,[r14],#4			@ *K256++
    905 	add	r6,r6,r2			@ h+=X[i]
    906 	str	r2,[sp,#13*4]
    907 	eor	r2,r4,r5
    908 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
    909 	and	r2,r2,r11
    910 	add	r6,r6,r3			@ h+=K256[i]
    911 	eor	r2,r2,r5			@ Ch(e,f,g)
    912 	eor	r0,r7,r7,ror#11
    913 	add	r6,r6,r2			@ h+=Ch(e,f,g)
    914 #if 13==31
    915 	and	r3,r3,#0xff
    916 	cmp	r3,#0xf2			@ done?
    917 #endif
    918 #if 13<15
    919 # if __ARM_ARCH__>=7
    920 	ldr	r2,[r1],#4			@ prefetch
    921 # else
    922 	ldrb	r2,[r1,#3]
    923 # endif
    924 	eor	r3,r7,r8			@ a^b, b^c in next round
    925 #else
    926 	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
    927 	eor	r3,r7,r8			@ a^b, b^c in next round
    928 	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
    929 #endif
    930 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
    931 	and	r12,r12,r3			@ (b^c)&=(a^b)
    932 	add	r10,r10,r6			@ d+=h
    933 	eor	r12,r12,r8			@ Maj(a,b,c)
    934 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
    935 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
    936 #if __ARM_ARCH__>=7
    937 	@ ldr	r2,[r1],#4			@ 14
    938 # if 14==15
    939 	str	r1,[sp,#17*4]			@ make room for r1
    940 # endif
    941 	eor	r0,r10,r10,ror#5
    942 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    943 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    944 # ifndef __ARMEB__
    945 	rev	r2,r2
    946 # endif
    947 #else
    948 	@ ldrb	r2,[r1,#3]			@ 14
    949 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    950 	ldrb	r12,[r1,#2]
    951 	ldrb	r0,[r1,#1]
    952 	orr	r2,r2,r12,lsl#8
    953 	ldrb	r12,[r1],#4
    954 	orr	r2,r2,r0,lsl#16
    955 # if 14==15
    956 	str	r1,[sp,#17*4]			@ make room for r1
    957 # endif
    958 	eor	r0,r10,r10,ror#5
    959 	orr	r2,r2,r12,lsl#24
    960 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    961 #endif
    962 	ldr	r12,[r14],#4			@ *K256++
    963 	add	r5,r5,r2			@ h+=X[i]
    964 	str	r2,[sp,#14*4]
    965 	eor	r2,r11,r4
    966 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
    967 	and	r2,r2,r10
    968 	add	r5,r5,r12			@ h+=K256[i]
    969 	eor	r2,r2,r4			@ Ch(e,f,g)
    970 	eor	r0,r6,r6,ror#11
    971 	add	r5,r5,r2			@ h+=Ch(e,f,g)
    972 #if 14==31
    973 	and	r12,r12,#0xff
    974 	cmp	r12,#0xf2			@ done?
    975 #endif
    976 #if 14<15
    977 # if __ARM_ARCH__>=7
    978 	ldr	r2,[r1],#4			@ prefetch
    979 # else
    980 	ldrb	r2,[r1,#3]
    981 # endif
    982 	eor	r12,r6,r7			@ a^b, b^c in next round
    983 #else
    984 	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
    985 	eor	r12,r6,r7			@ a^b, b^c in next round
    986 	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
    987 #endif
    988 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
    989 	and	r3,r3,r12			@ (b^c)&=(a^b)
    990 	add	r9,r9,r5			@ d+=h
    991 	eor	r3,r3,r7			@ Maj(a,b,c)
    992 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
    993 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
    994 #if __ARM_ARCH__>=7
    995 	@ ldr	r2,[r1],#4			@ 15
    996 # if 15==15
    997 	str	r1,[sp,#17*4]			@ make room for r1
    998 # endif
    999 	eor	r0,r9,r9,ror#5
   1000 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1001 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1002 # ifndef __ARMEB__
   1003 	rev	r2,r2
   1004 # endif
   1005 #else
   1006 	@ ldrb	r2,[r1,#3]			@ 15
   1007 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1008 	ldrb	r3,[r1,#2]
   1009 	ldrb	r0,[r1,#1]
   1010 	orr	r2,r2,r3,lsl#8
   1011 	ldrb	r3,[r1],#4
   1012 	orr	r2,r2,r0,lsl#16
   1013 # if 15==15
   1014 	str	r1,[sp,#17*4]			@ make room for r1
   1015 # endif
   1016 	eor	r0,r9,r9,ror#5
   1017 	orr	r2,r2,r3,lsl#24
   1018 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1019 #endif
   1020 	ldr	r3,[r14],#4			@ *K256++
   1021 	add	r4,r4,r2			@ h+=X[i]
   1022 	str	r2,[sp,#15*4]
   1023 	eor	r2,r10,r11
   1024 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
   1025 	and	r2,r2,r9
   1026 	add	r4,r4,r3			@ h+=K256[i]
   1027 	eor	r2,r2,r11			@ Ch(e,f,g)
   1028 	eor	r0,r5,r5,ror#11
   1029 	add	r4,r4,r2			@ h+=Ch(e,f,g)
   1030 #if 15==31
   1031 	and	r3,r3,#0xff
   1032 	cmp	r3,#0xf2			@ done?
   1033 #endif
   1034 #if 15<15
   1035 # if __ARM_ARCH__>=7
   1036 	ldr	r2,[r1],#4			@ prefetch
   1037 # else
   1038 	ldrb	r2,[r1,#3]
   1039 # endif
   1040 	eor	r3,r5,r6			@ a^b, b^c in next round
   1041 #else
   1042 	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
   1043 	eor	r3,r5,r6			@ a^b, b^c in next round
   1044 	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
   1045 #endif
   1046 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
   1047 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1048 	add	r8,r8,r4			@ d+=h
   1049 	eor	r12,r12,r6			@ Maj(a,b,c)
   1050 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
   1051 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
   1052 Lrounds_16_xx:
   1053 	@ ldr	r2,[sp,#1*4]		@ 16
   1054 	@ ldr	r1,[sp,#14*4]
   1055 	mov	r0,r2,ror#7
   1056 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
   1057 	mov	r12,r1,ror#17
   1058 	eor	r0,r0,r2,ror#18
   1059 	eor	r12,r12,r1,ror#19
   1060 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1061 	ldr	r2,[sp,#0*4]
   1062 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1063 	ldr	r1,[sp,#9*4]
   1064 
   1065 	add	r12,r12,r0
   1066 	eor	r0,r8,r8,ror#5	@ from BODY_00_15
   1067 	add	r2,r2,r12
   1068 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
   1069 	add	r2,r2,r1			@ X[i]
   1070 	ldr	r12,[r14],#4			@ *K256++
   1071 	add	r11,r11,r2			@ h+=X[i]
   1072 	str	r2,[sp,#0*4]
   1073 	eor	r2,r9,r10
   1074 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
   1075 	and	r2,r2,r8
   1076 	add	r11,r11,r12			@ h+=K256[i]
   1077 	eor	r2,r2,r10			@ Ch(e,f,g)
   1078 	eor	r0,r4,r4,ror#11
   1079 	add	r11,r11,r2			@ h+=Ch(e,f,g)
   1080 #if 16==31
   1081 	and	r12,r12,#0xff
   1082 	cmp	r12,#0xf2			@ done?
   1083 #endif
   1084 #if 16<15
   1085 # if __ARM_ARCH__>=7
   1086 	ldr	r2,[r1],#4			@ prefetch
   1087 # else
   1088 	ldrb	r2,[r1,#3]
   1089 # endif
   1090 	eor	r12,r4,r5			@ a^b, b^c in next round
   1091 #else
   1092 	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
   1093 	eor	r12,r4,r5			@ a^b, b^c in next round
   1094 	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
   1095 #endif
   1096 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
   1097 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1098 	add	r7,r7,r11			@ d+=h
   1099 	eor	r3,r3,r5			@ Maj(a,b,c)
   1100 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
   1101 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
   1102 	@ ldr	r2,[sp,#2*4]		@ 17
   1103 	@ ldr	r1,[sp,#15*4]
   1104 	mov	r0,r2,ror#7
   1105 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
   1106 	mov	r3,r1,ror#17
   1107 	eor	r0,r0,r2,ror#18
   1108 	eor	r3,r3,r1,ror#19
   1109 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1110 	ldr	r2,[sp,#1*4]
   1111 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1112 	ldr	r1,[sp,#10*4]
   1113 
   1114 	add	r3,r3,r0
   1115 	eor	r0,r7,r7,ror#5	@ from BODY_00_15
   1116 	add	r2,r2,r3
   1117 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
   1118 	add	r2,r2,r1			@ X[i]
   1119 	ldr	r3,[r14],#4			@ *K256++
   1120 	add	r10,r10,r2			@ h+=X[i]
   1121 	str	r2,[sp,#1*4]
   1122 	eor	r2,r8,r9
   1123 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
   1124 	and	r2,r2,r7
   1125 	add	r10,r10,r3			@ h+=K256[i]
   1126 	eor	r2,r2,r9			@ Ch(e,f,g)
   1127 	eor	r0,r11,r11,ror#11
   1128 	add	r10,r10,r2			@ h+=Ch(e,f,g)
   1129 #if 17==31
   1130 	and	r3,r3,#0xff
   1131 	cmp	r3,#0xf2			@ done?
   1132 #endif
   1133 #if 17<15
   1134 # if __ARM_ARCH__>=7
   1135 	ldr	r2,[r1],#4			@ prefetch
   1136 # else
   1137 	ldrb	r2,[r1,#3]
   1138 # endif
   1139 	eor	r3,r11,r4			@ a^b, b^c in next round
   1140 #else
   1141 	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
   1142 	eor	r3,r11,r4			@ a^b, b^c in next round
   1143 	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
   1144 #endif
   1145 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
   1146 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1147 	add	r6,r6,r10			@ d+=h
   1148 	eor	r12,r12,r4			@ Maj(a,b,c)
   1149 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
   1150 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
   1151 	@ ldr	r2,[sp,#3*4]		@ 18
   1152 	@ ldr	r1,[sp,#0*4]
   1153 	mov	r0,r2,ror#7
   1154 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
   1155 	mov	r12,r1,ror#17
   1156 	eor	r0,r0,r2,ror#18
   1157 	eor	r12,r12,r1,ror#19
   1158 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1159 	ldr	r2,[sp,#2*4]
   1160 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1161 	ldr	r1,[sp,#11*4]
   1162 
   1163 	add	r12,r12,r0
   1164 	eor	r0,r6,r6,ror#5	@ from BODY_00_15
   1165 	add	r2,r2,r12
   1166 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
   1167 	add	r2,r2,r1			@ X[i]
   1168 	ldr	r12,[r14],#4			@ *K256++
   1169 	add	r9,r9,r2			@ h+=X[i]
   1170 	str	r2,[sp,#2*4]
   1171 	eor	r2,r7,r8
   1172 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
   1173 	and	r2,r2,r6
   1174 	add	r9,r9,r12			@ h+=K256[i]
   1175 	eor	r2,r2,r8			@ Ch(e,f,g)
   1176 	eor	r0,r10,r10,ror#11
   1177 	add	r9,r9,r2			@ h+=Ch(e,f,g)
   1178 #if 18==31
   1179 	and	r12,r12,#0xff
   1180 	cmp	r12,#0xf2			@ done?
   1181 #endif
   1182 #if 18<15
   1183 # if __ARM_ARCH__>=7
   1184 	ldr	r2,[r1],#4			@ prefetch
   1185 # else
   1186 	ldrb	r2,[r1,#3]
   1187 # endif
   1188 	eor	r12,r10,r11			@ a^b, b^c in next round
   1189 #else
   1190 	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
   1191 	eor	r12,r10,r11			@ a^b, b^c in next round
   1192 	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
   1193 #endif
   1194 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
   1195 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1196 	add	r5,r5,r9			@ d+=h
   1197 	eor	r3,r3,r11			@ Maj(a,b,c)
   1198 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
   1199 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
   1200 	@ ldr	r2,[sp,#4*4]		@ 19
   1201 	@ ldr	r1,[sp,#1*4]
   1202 	mov	r0,r2,ror#7
   1203 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
   1204 	mov	r3,r1,ror#17
   1205 	eor	r0,r0,r2,ror#18
   1206 	eor	r3,r3,r1,ror#19
   1207 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1208 	ldr	r2,[sp,#3*4]
   1209 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1210 	ldr	r1,[sp,#12*4]
   1211 
   1212 	add	r3,r3,r0
   1213 	eor	r0,r5,r5,ror#5	@ from BODY_00_15
   1214 	add	r2,r2,r3
   1215 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
   1216 	add	r2,r2,r1			@ X[i]
   1217 	ldr	r3,[r14],#4			@ *K256++
   1218 	add	r8,r8,r2			@ h+=X[i]
   1219 	str	r2,[sp,#3*4]
   1220 	eor	r2,r6,r7
   1221 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
   1222 	and	r2,r2,r5
   1223 	add	r8,r8,r3			@ h+=K256[i]
   1224 	eor	r2,r2,r7			@ Ch(e,f,g)
   1225 	eor	r0,r9,r9,ror#11
   1226 	add	r8,r8,r2			@ h+=Ch(e,f,g)
   1227 #if 19==31
   1228 	and	r3,r3,#0xff
   1229 	cmp	r3,#0xf2			@ done?
   1230 #endif
   1231 #if 19<15
   1232 # if __ARM_ARCH__>=7
   1233 	ldr	r2,[r1],#4			@ prefetch
   1234 # else
   1235 	ldrb	r2,[r1,#3]
   1236 # endif
   1237 	eor	r3,r9,r10			@ a^b, b^c in next round
   1238 #else
   1239 	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
   1240 	eor	r3,r9,r10			@ a^b, b^c in next round
   1241 	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
   1242 #endif
   1243 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
   1244 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1245 	add	r4,r4,r8			@ d+=h
   1246 	eor	r12,r12,r10			@ Maj(a,b,c)
   1247 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
   1248 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
   1249 	@ ldr	r2,[sp,#5*4]		@ 20
   1250 	@ ldr	r1,[sp,#2*4]
   1251 	mov	r0,r2,ror#7
   1252 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
   1253 	mov	r12,r1,ror#17
   1254 	eor	r0,r0,r2,ror#18
   1255 	eor	r12,r12,r1,ror#19
   1256 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1257 	ldr	r2,[sp,#4*4]
   1258 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1259 	ldr	r1,[sp,#13*4]
   1260 
   1261 	add	r12,r12,r0
   1262 	eor	r0,r4,r4,ror#5	@ from BODY_00_15
   1263 	add	r2,r2,r12
   1264 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
   1265 	add	r2,r2,r1			@ X[i]
   1266 	ldr	r12,[r14],#4			@ *K256++
   1267 	add	r7,r7,r2			@ h+=X[i]
   1268 	str	r2,[sp,#4*4]
   1269 	eor	r2,r5,r6
   1270 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
   1271 	and	r2,r2,r4
   1272 	add	r7,r7,r12			@ h+=K256[i]
   1273 	eor	r2,r2,r6			@ Ch(e,f,g)
   1274 	eor	r0,r8,r8,ror#11
   1275 	add	r7,r7,r2			@ h+=Ch(e,f,g)
   1276 #if 20==31
   1277 	and	r12,r12,#0xff
   1278 	cmp	r12,#0xf2			@ done?
   1279 #endif
   1280 #if 20<15
   1281 # if __ARM_ARCH__>=7
   1282 	ldr	r2,[r1],#4			@ prefetch
   1283 # else
   1284 	ldrb	r2,[r1,#3]
   1285 # endif
   1286 	eor	r12,r8,r9			@ a^b, b^c in next round
   1287 #else
   1288 	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
   1289 	eor	r12,r8,r9			@ a^b, b^c in next round
   1290 	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
   1291 #endif
   1292 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
   1293 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1294 	add	r11,r11,r7			@ d+=h
   1295 	eor	r3,r3,r9			@ Maj(a,b,c)
   1296 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
   1297 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
   1298 	@ ldr	r2,[sp,#6*4]		@ 21
   1299 	@ ldr	r1,[sp,#3*4]
   1300 	mov	r0,r2,ror#7
   1301 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
   1302 	mov	r3,r1,ror#17
   1303 	eor	r0,r0,r2,ror#18
   1304 	eor	r3,r3,r1,ror#19
   1305 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1306 	ldr	r2,[sp,#5*4]
   1307 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1308 	ldr	r1,[sp,#14*4]
   1309 
   1310 	add	r3,r3,r0
   1311 	eor	r0,r11,r11,ror#5	@ from BODY_00_15
   1312 	add	r2,r2,r3
   1313 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
   1314 	add	r2,r2,r1			@ X[i]
   1315 	ldr	r3,[r14],#4			@ *K256++
   1316 	add	r6,r6,r2			@ h+=X[i]
   1317 	str	r2,[sp,#5*4]
   1318 	eor	r2,r4,r5
   1319 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
   1320 	and	r2,r2,r11
   1321 	add	r6,r6,r3			@ h+=K256[i]
   1322 	eor	r2,r2,r5			@ Ch(e,f,g)
   1323 	eor	r0,r7,r7,ror#11
   1324 	add	r6,r6,r2			@ h+=Ch(e,f,g)
   1325 #if 21==31
   1326 	and	r3,r3,#0xff
   1327 	cmp	r3,#0xf2			@ done?
   1328 #endif
   1329 #if 21<15
   1330 # if __ARM_ARCH__>=7
   1331 	ldr	r2,[r1],#4			@ prefetch
   1332 # else
   1333 	ldrb	r2,[r1,#3]
   1334 # endif
   1335 	eor	r3,r7,r8			@ a^b, b^c in next round
   1336 #else
   1337 	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
   1338 	eor	r3,r7,r8			@ a^b, b^c in next round
   1339 	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
   1340 #endif
   1341 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
   1342 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1343 	add	r10,r10,r6			@ d+=h
   1344 	eor	r12,r12,r8			@ Maj(a,b,c)
   1345 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
   1346 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
   1347 	@ ldr	r2,[sp,#7*4]		@ 22
   1348 	@ ldr	r1,[sp,#4*4]
   1349 	mov	r0,r2,ror#7
   1350 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
   1351 	mov	r12,r1,ror#17
   1352 	eor	r0,r0,r2,ror#18
   1353 	eor	r12,r12,r1,ror#19
   1354 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1355 	ldr	r2,[sp,#6*4]
   1356 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1357 	ldr	r1,[sp,#15*4]
   1358 
   1359 	add	r12,r12,r0
   1360 	eor	r0,r10,r10,ror#5	@ from BODY_00_15
   1361 	add	r2,r2,r12
   1362 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
   1363 	add	r2,r2,r1			@ X[i]
   1364 	ldr	r12,[r14],#4			@ *K256++
   1365 	add	r5,r5,r2			@ h+=X[i]
   1366 	str	r2,[sp,#6*4]
   1367 	eor	r2,r11,r4
   1368 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
   1369 	and	r2,r2,r10
   1370 	add	r5,r5,r12			@ h+=K256[i]
   1371 	eor	r2,r2,r4			@ Ch(e,f,g)
   1372 	eor	r0,r6,r6,ror#11
   1373 	add	r5,r5,r2			@ h+=Ch(e,f,g)
   1374 #if 22==31
   1375 	and	r12,r12,#0xff
   1376 	cmp	r12,#0xf2			@ done?
   1377 #endif
   1378 #if 22<15
   1379 # if __ARM_ARCH__>=7
   1380 	ldr	r2,[r1],#4			@ prefetch
   1381 # else
   1382 	ldrb	r2,[r1,#3]
   1383 # endif
   1384 	eor	r12,r6,r7			@ a^b, b^c in next round
   1385 #else
   1386 	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
   1387 	eor	r12,r6,r7			@ a^b, b^c in next round
   1388 	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
   1389 #endif
   1390 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
   1391 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1392 	add	r9,r9,r5			@ d+=h
   1393 	eor	r3,r3,r7			@ Maj(a,b,c)
   1394 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
   1395 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
   1396 	@ ldr	r2,[sp,#8*4]		@ 23
   1397 	@ ldr	r1,[sp,#5*4]
   1398 	mov	r0,r2,ror#7
   1399 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1400 	mov	r3,r1,ror#17
   1401 	eor	r0,r0,r2,ror#18
   1402 	eor	r3,r3,r1,ror#19
   1403 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1404 	ldr	r2,[sp,#7*4]
   1405 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1406 	ldr	r1,[sp,#0*4]
   1407 
   1408 	add	r3,r3,r0
   1409 	eor	r0,r9,r9,ror#5	@ from BODY_00_15
   1410 	add	r2,r2,r3
   1411 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1412 	add	r2,r2,r1			@ X[i]
   1413 	ldr	r3,[r14],#4			@ *K256++
   1414 	add	r4,r4,r2			@ h+=X[i]
   1415 	str	r2,[sp,#7*4]
   1416 	eor	r2,r10,r11
   1417 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
   1418 	and	r2,r2,r9
   1419 	add	r4,r4,r3			@ h+=K256[i]
   1420 	eor	r2,r2,r11			@ Ch(e,f,g)
   1421 	eor	r0,r5,r5,ror#11
   1422 	add	r4,r4,r2			@ h+=Ch(e,f,g)
   1423 #if 23==31
   1424 	and	r3,r3,#0xff
   1425 	cmp	r3,#0xf2			@ done?
   1426 #endif
   1427 #if 23<15
   1428 # if __ARM_ARCH__>=7
   1429 	ldr	r2,[r1],#4			@ prefetch
   1430 # else
   1431 	ldrb	r2,[r1,#3]
   1432 # endif
   1433 	eor	r3,r5,r6			@ a^b, b^c in next round
   1434 #else
   1435 	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
   1436 	eor	r3,r5,r6			@ a^b, b^c in next round
   1437 	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
   1438 #endif
   1439 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
   1440 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1441 	add	r8,r8,r4			@ d+=h
   1442 	eor	r12,r12,r6			@ Maj(a,b,c)
   1443 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
   1444 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
   1445 	@ ldr	r2,[sp,#9*4]		@ 24
   1446 	@ ldr	r1,[sp,#6*4]
   1447 	mov	r0,r2,ror#7
   1448 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
   1449 	mov	r12,r1,ror#17
   1450 	eor	r0,r0,r2,ror#18
   1451 	eor	r12,r12,r1,ror#19
   1452 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1453 	ldr	r2,[sp,#8*4]
   1454 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1455 	ldr	r1,[sp,#1*4]
   1456 
   1457 	add	r12,r12,r0
   1458 	eor	r0,r8,r8,ror#5	@ from BODY_00_15
   1459 	add	r2,r2,r12
   1460 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
   1461 	add	r2,r2,r1			@ X[i]
   1462 	ldr	r12,[r14],#4			@ *K256++
   1463 	add	r11,r11,r2			@ h+=X[i]
   1464 	str	r2,[sp,#8*4]
   1465 	eor	r2,r9,r10
   1466 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
   1467 	and	r2,r2,r8
   1468 	add	r11,r11,r12			@ h+=K256[i]
   1469 	eor	r2,r2,r10			@ Ch(e,f,g)
   1470 	eor	r0,r4,r4,ror#11
   1471 	add	r11,r11,r2			@ h+=Ch(e,f,g)
   1472 #if 24==31
   1473 	and	r12,r12,#0xff
   1474 	cmp	r12,#0xf2			@ done?
   1475 #endif
   1476 #if 24<15
   1477 # if __ARM_ARCH__>=7
   1478 	ldr	r2,[r1],#4			@ prefetch
   1479 # else
   1480 	ldrb	r2,[r1,#3]
   1481 # endif
   1482 	eor	r12,r4,r5			@ a^b, b^c in next round
   1483 #else
   1484 	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
   1485 	eor	r12,r4,r5			@ a^b, b^c in next round
   1486 	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
   1487 #endif
   1488 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
   1489 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1490 	add	r7,r7,r11			@ d+=h
   1491 	eor	r3,r3,r5			@ Maj(a,b,c)
   1492 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
   1493 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
   1494 	@ ldr	r2,[sp,#10*4]		@ 25
   1495 	@ ldr	r1,[sp,#7*4]
   1496 	mov	r0,r2,ror#7
   1497 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
   1498 	mov	r3,r1,ror#17
   1499 	eor	r0,r0,r2,ror#18
   1500 	eor	r3,r3,r1,ror#19
   1501 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1502 	ldr	r2,[sp,#9*4]
   1503 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1504 	ldr	r1,[sp,#2*4]
   1505 
   1506 	add	r3,r3,r0
   1507 	eor	r0,r7,r7,ror#5	@ from BODY_00_15
   1508 	add	r2,r2,r3
   1509 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
   1510 	add	r2,r2,r1			@ X[i]
   1511 	ldr	r3,[r14],#4			@ *K256++
   1512 	add	r10,r10,r2			@ h+=X[i]
   1513 	str	r2,[sp,#9*4]
   1514 	eor	r2,r8,r9
   1515 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
   1516 	and	r2,r2,r7
   1517 	add	r10,r10,r3			@ h+=K256[i]
   1518 	eor	r2,r2,r9			@ Ch(e,f,g)
   1519 	eor	r0,r11,r11,ror#11
   1520 	add	r10,r10,r2			@ h+=Ch(e,f,g)
   1521 #if 25==31
   1522 	and	r3,r3,#0xff
   1523 	cmp	r3,#0xf2			@ done?
   1524 #endif
   1525 #if 25<15
   1526 # if __ARM_ARCH__>=7
   1527 	ldr	r2,[r1],#4			@ prefetch
   1528 # else
   1529 	ldrb	r2,[r1,#3]
   1530 # endif
   1531 	eor	r3,r11,r4			@ a^b, b^c in next round
   1532 #else
   1533 	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
   1534 	eor	r3,r11,r4			@ a^b, b^c in next round
   1535 	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
   1536 #endif
   1537 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
   1538 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1539 	add	r6,r6,r10			@ d+=h
   1540 	eor	r12,r12,r4			@ Maj(a,b,c)
   1541 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
   1542 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
   1543 	@ ldr	r2,[sp,#11*4]		@ 26
   1544 	@ ldr	r1,[sp,#8*4]
   1545 	mov	r0,r2,ror#7
   1546 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
   1547 	mov	r12,r1,ror#17
   1548 	eor	r0,r0,r2,ror#18
   1549 	eor	r12,r12,r1,ror#19
   1550 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1551 	ldr	r2,[sp,#10*4]
   1552 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1553 	ldr	r1,[sp,#3*4]
   1554 
   1555 	add	r12,r12,r0
   1556 	eor	r0,r6,r6,ror#5	@ from BODY_00_15
   1557 	add	r2,r2,r12
   1558 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
   1559 	add	r2,r2,r1			@ X[i]
   1560 	ldr	r12,[r14],#4			@ *K256++
   1561 	add	r9,r9,r2			@ h+=X[i]
   1562 	str	r2,[sp,#10*4]
   1563 	eor	r2,r7,r8
   1564 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
   1565 	and	r2,r2,r6
   1566 	add	r9,r9,r12			@ h+=K256[i]
   1567 	eor	r2,r2,r8			@ Ch(e,f,g)
   1568 	eor	r0,r10,r10,ror#11
   1569 	add	r9,r9,r2			@ h+=Ch(e,f,g)
   1570 #if 26==31
   1571 	and	r12,r12,#0xff
   1572 	cmp	r12,#0xf2			@ done?
   1573 #endif
   1574 #if 26<15
   1575 # if __ARM_ARCH__>=7
   1576 	ldr	r2,[r1],#4			@ prefetch
   1577 # else
   1578 	ldrb	r2,[r1,#3]
   1579 # endif
   1580 	eor	r12,r10,r11			@ a^b, b^c in next round
   1581 #else
   1582 	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
   1583 	eor	r12,r10,r11			@ a^b, b^c in next round
   1584 	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
   1585 #endif
   1586 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
   1587 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1588 	add	r5,r5,r9			@ d+=h
   1589 	eor	r3,r3,r11			@ Maj(a,b,c)
   1590 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
   1591 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
   1592 	@ ldr	r2,[sp,#12*4]		@ 27
   1593 	@ ldr	r1,[sp,#9*4]
   1594 	mov	r0,r2,ror#7
   1595 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
   1596 	mov	r3,r1,ror#17
   1597 	eor	r0,r0,r2,ror#18
   1598 	eor	r3,r3,r1,ror#19
   1599 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1600 	ldr	r2,[sp,#11*4]
   1601 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1602 	ldr	r1,[sp,#4*4]
   1603 
   1604 	add	r3,r3,r0
   1605 	eor	r0,r5,r5,ror#5	@ from BODY_00_15
   1606 	add	r2,r2,r3
   1607 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
   1608 	add	r2,r2,r1			@ X[i]
   1609 	ldr	r3,[r14],#4			@ *K256++
   1610 	add	r8,r8,r2			@ h+=X[i]
   1611 	str	r2,[sp,#11*4]
   1612 	eor	r2,r6,r7
   1613 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
   1614 	and	r2,r2,r5
   1615 	add	r8,r8,r3			@ h+=K256[i]
   1616 	eor	r2,r2,r7			@ Ch(e,f,g)
   1617 	eor	r0,r9,r9,ror#11
   1618 	add	r8,r8,r2			@ h+=Ch(e,f,g)
   1619 #if 27==31
   1620 	and	r3,r3,#0xff
   1621 	cmp	r3,#0xf2			@ done?
   1622 #endif
   1623 #if 27<15
   1624 # if __ARM_ARCH__>=7
   1625 	ldr	r2,[r1],#4			@ prefetch
   1626 # else
   1627 	ldrb	r2,[r1,#3]
   1628 # endif
   1629 	eor	r3,r9,r10			@ a^b, b^c in next round
   1630 #else
   1631 	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
   1632 	eor	r3,r9,r10			@ a^b, b^c in next round
   1633 	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
   1634 #endif
   1635 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
   1636 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1637 	add	r4,r4,r8			@ d+=h
   1638 	eor	r12,r12,r10			@ Maj(a,b,c)
   1639 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
   1640 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
   1641 	@ ldr	r2,[sp,#13*4]		@ 28
   1642 	@ ldr	r1,[sp,#10*4]
   1643 	mov	r0,r2,ror#7
   1644 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
   1645 	mov	r12,r1,ror#17
   1646 	eor	r0,r0,r2,ror#18
   1647 	eor	r12,r12,r1,ror#19
   1648 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1649 	ldr	r2,[sp,#12*4]
   1650 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1651 	ldr	r1,[sp,#5*4]
   1652 
   1653 	add	r12,r12,r0
   1654 	eor	r0,r4,r4,ror#5	@ from BODY_00_15
   1655 	add	r2,r2,r12
   1656 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
   1657 	add	r2,r2,r1			@ X[i]
   1658 	ldr	r12,[r14],#4			@ *K256++
   1659 	add	r7,r7,r2			@ h+=X[i]
   1660 	str	r2,[sp,#12*4]
   1661 	eor	r2,r5,r6
   1662 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
   1663 	and	r2,r2,r4
   1664 	add	r7,r7,r12			@ h+=K256[i]
   1665 	eor	r2,r2,r6			@ Ch(e,f,g)
   1666 	eor	r0,r8,r8,ror#11
   1667 	add	r7,r7,r2			@ h+=Ch(e,f,g)
   1668 #if 28==31
   1669 	and	r12,r12,#0xff
   1670 	cmp	r12,#0xf2			@ done?
   1671 #endif
   1672 #if 28<15
   1673 # if __ARM_ARCH__>=7
   1674 	ldr	r2,[r1],#4			@ prefetch
   1675 # else
   1676 	ldrb	r2,[r1,#3]
   1677 # endif
   1678 	eor	r12,r8,r9			@ a^b, b^c in next round
   1679 #else
   1680 	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
   1681 	eor	r12,r8,r9			@ a^b, b^c in next round
   1682 	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
   1683 #endif
   1684 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
   1685 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1686 	add	r11,r11,r7			@ d+=h
   1687 	eor	r3,r3,r9			@ Maj(a,b,c)
   1688 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
   1689 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
   1690 	@ ldr	r2,[sp,#14*4]		@ 29
   1691 	@ ldr	r1,[sp,#11*4]
   1692 	mov	r0,r2,ror#7
   1693 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
   1694 	mov	r3,r1,ror#17
   1695 	eor	r0,r0,r2,ror#18
   1696 	eor	r3,r3,r1,ror#19
   1697 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1698 	ldr	r2,[sp,#13*4]
   1699 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1700 	ldr	r1,[sp,#6*4]
   1701 
   1702 	add	r3,r3,r0
   1703 	eor	r0,r11,r11,ror#5	@ from BODY_00_15
   1704 	add	r2,r2,r3
   1705 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
   1706 	add	r2,r2,r1			@ X[i]
   1707 	ldr	r3,[r14],#4			@ *K256++
   1708 	add	r6,r6,r2			@ h+=X[i]
   1709 	str	r2,[sp,#13*4]
   1710 	eor	r2,r4,r5
   1711 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
   1712 	and	r2,r2,r11
   1713 	add	r6,r6,r3			@ h+=K256[i]
   1714 	eor	r2,r2,r5			@ Ch(e,f,g)
   1715 	eor	r0,r7,r7,ror#11
   1716 	add	r6,r6,r2			@ h+=Ch(e,f,g)
   1717 #if 29==31
   1718 	and	r3,r3,#0xff
   1719 	cmp	r3,#0xf2			@ done?
   1720 #endif
   1721 #if 29<15
   1722 # if __ARM_ARCH__>=7
   1723 	ldr	r2,[r1],#4			@ prefetch
   1724 # else
   1725 	ldrb	r2,[r1,#3]
   1726 # endif
   1727 	eor	r3,r7,r8			@ a^b, b^c in next round
   1728 #else
   1729 	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
   1730 	eor	r3,r7,r8			@ a^b, b^c in next round
   1731 	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
   1732 #endif
   1733 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
   1734 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1735 	add	r10,r10,r6			@ d+=h
   1736 	eor	r12,r12,r8			@ Maj(a,b,c)
   1737 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
   1738 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
   1739 	@ ldr	r2,[sp,#15*4]		@ 30
   1740 	@ ldr	r1,[sp,#12*4]
   1741 	mov	r0,r2,ror#7
   1742 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
   1743 	mov	r12,r1,ror#17
   1744 	eor	r0,r0,r2,ror#18
   1745 	eor	r12,r12,r1,ror#19
   1746 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1747 	ldr	r2,[sp,#14*4]
   1748 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1749 	ldr	r1,[sp,#7*4]
   1750 
   1751 	add	r12,r12,r0
   1752 	eor	r0,r10,r10,ror#5	@ from BODY_00_15
   1753 	add	r2,r2,r12
   1754 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
   1755 	add	r2,r2,r1			@ X[i]
   1756 	ldr	r12,[r14],#4			@ *K256++
   1757 	add	r5,r5,r2			@ h+=X[i]
   1758 	str	r2,[sp,#14*4]
   1759 	eor	r2,r11,r4
   1760 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
   1761 	and	r2,r2,r10
   1762 	add	r5,r5,r12			@ h+=K256[i]
   1763 	eor	r2,r2,r4			@ Ch(e,f,g)
   1764 	eor	r0,r6,r6,ror#11
   1765 	add	r5,r5,r2			@ h+=Ch(e,f,g)
   1766 #if 30==31
   1767 	and	r12,r12,#0xff
   1768 	cmp	r12,#0xf2			@ done?
   1769 #endif
   1770 #if 30<15
   1771 # if __ARM_ARCH__>=7
   1772 	ldr	r2,[r1],#4			@ prefetch
   1773 # else
   1774 	ldrb	r2,[r1,#3]
   1775 # endif
   1776 	eor	r12,r6,r7			@ a^b, b^c in next round
   1777 #else
   1778 	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
   1779 	eor	r12,r6,r7			@ a^b, b^c in next round
   1780 	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
   1781 #endif
   1782 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
   1783 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1784 	add	r9,r9,r5			@ d+=h
   1785 	eor	r3,r3,r7			@ Maj(a,b,c)
   1786 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
   1787 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
   1788 	@ ldr	r2,[sp,#0*4]		@ 31
   1789 	@ ldr	r1,[sp,#13*4]
   1790 	mov	r0,r2,ror#7
   1791 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1792 	mov	r3,r1,ror#17
   1793 	eor	r0,r0,r2,ror#18
   1794 	eor	r3,r3,r1,ror#19
   1795 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1796 	ldr	r2,[sp,#15*4]
   1797 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1798 	ldr	r1,[sp,#8*4]
   1799 
   1800 	add	r3,r3,r0
   1801 	eor	r0,r9,r9,ror#5	@ from BODY_00_15
   1802 	add	r2,r2,r3
   1803 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1804 	add	r2,r2,r1			@ X[i]
   1805 	ldr	r3,[r14],#4			@ *K256++
   1806 	add	r4,r4,r2			@ h+=X[i]
   1807 	str	r2,[sp,#15*4]
   1808 	eor	r2,r10,r11
   1809 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
   1810 	and	r2,r2,r9
   1811 	add	r4,r4,r3			@ h+=K256[i]
   1812 	eor	r2,r2,r11			@ Ch(e,f,g)
   1813 	eor	r0,r5,r5,ror#11
   1814 	add	r4,r4,r2			@ h+=Ch(e,f,g)
   1815 #if 31==31
   1816 	and	r3,r3,#0xff
   1817 	cmp	r3,#0xf2			@ done?
   1818 #endif
   1819 #if 31<15
   1820 # if __ARM_ARCH__>=7
   1821 	ldr	r2,[r1],#4			@ prefetch
   1822 # else
   1823 	ldrb	r2,[r1,#3]
   1824 # endif
   1825 	eor	r3,r5,r6			@ a^b, b^c in next round
   1826 #else
   1827 	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
   1828 	eor	r3,r5,r6			@ a^b, b^c in next round
   1829 	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
   1830 #endif
   1831 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
   1832 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1833 	add	r8,r8,r4			@ d+=h
   1834 	eor	r12,r12,r6			@ Maj(a,b,c)
   1835 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
   1836 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
   1837 #if __ARM_ARCH__>=7
   1838 	ite	eq			@ Thumb2 thing, sanity check in ARM
   1839 #endif
   1840 	ldreq	r3,[sp,#16*4]		@ pull ctx
   1841 	bne	Lrounds_16_xx
   1842 
   1843 	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
   1844 	ldr	r0,[r3,#0]
   1845 	ldr	r2,[r3,#4]
   1846 	ldr	r12,[r3,#8]
   1847 	add	r4,r4,r0
   1848 	ldr	r0,[r3,#12]
   1849 	add	r5,r5,r2
   1850 	ldr	r2,[r3,#16]
   1851 	add	r6,r6,r12
   1852 	ldr	r12,[r3,#20]
   1853 	add	r7,r7,r0
   1854 	ldr	r0,[r3,#24]
   1855 	add	r8,r8,r2
   1856 	ldr	r2,[r3,#28]
   1857 	add	r9,r9,r12
   1858 	ldr	r1,[sp,#17*4]		@ pull inp
   1859 	ldr	r12,[sp,#18*4]		@ pull inp+len
   1860 	add	r10,r10,r0
   1861 	add	r11,r11,r2
   1862 	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
   1863 	cmp	r1,r12
   1864 	sub	r14,r14,#256	@ rewind Ktbl
   1865 	bne	Loop
   1866 
   1867 	add	sp,sp,#19*4	@ destroy frame
   1868 #if __ARM_ARCH__>=5
   1869 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
   1870 #else
   1871 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
   1872 	tst	lr,#1
   1873 	moveq	pc,lr			@ be binary compatible with V4, yet
   1874 .word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   1875 #endif
   1876 
   1877 #if __ARM_MAX_ARCH__>=7
   1878 
   1879 
   1880 
   1881 .globl	_sha256_block_data_order_neon
   1882 .private_extern	_sha256_block_data_order_neon
   1883 #ifdef __thumb2__
   1884 .thumb_func	_sha256_block_data_order_neon
   1885 #endif
   1886 .align	5
   1887 .skip	16
   1888 _sha256_block_data_order_neon:
   1889 LNEON:
   1890 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
   1891 
   1892 	sub	r11,sp,#16*4+16
   1893 	adr	r14,K256
   1894 	bic	r11,r11,#15		@ align for 128-bit stores
   1895 	mov	r12,sp
   1896 	mov	sp,r11			@ alloca
   1897 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
   1898 
   1899 	vld1.8	{q0},[r1]!
   1900 	vld1.8	{q1},[r1]!
   1901 	vld1.8	{q2},[r1]!
   1902 	vld1.8	{q3},[r1]!
   1903 	vld1.32	{q8},[r14,:128]!
   1904 	vld1.32	{q9},[r14,:128]!
   1905 	vld1.32	{q10},[r14,:128]!
   1906 	vld1.32	{q11},[r14,:128]!
   1907 	vrev32.8	q0,q0		@ yes, even on
   1908 	str	r0,[sp,#64]
   1909 	vrev32.8	q1,q1		@ big-endian
   1910 	str	r1,[sp,#68]
   1911 	mov	r1,sp
   1912 	vrev32.8	q2,q2
   1913 	str	r2,[sp,#72]
   1914 	vrev32.8	q3,q3
   1915 	str	r12,[sp,#76]		@ save original sp
   1916 	vadd.i32	q8,q8,q0
   1917 	vadd.i32	q9,q9,q1
   1918 	vst1.32	{q8},[r1,:128]!
   1919 	vadd.i32	q10,q10,q2
   1920 	vst1.32	{q9},[r1,:128]!
   1921 	vadd.i32	q11,q11,q3
   1922 	vst1.32	{q10},[r1,:128]!
   1923 	vst1.32	{q11},[r1,:128]!
   1924 
   1925 	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
   1926 	sub	r1,r1,#64
   1927 	ldr	r2,[sp,#0]
   1928 	eor	r12,r12,r12
   1929 	eor	r3,r5,r6
   1930 	b	L_00_48
   1931 
   1932 .align	4
   1933 L_00_48:
   1934 	vext.8	q8,q0,q1,#4
   1935 	add	r11,r11,r2
   1936 	eor	r2,r9,r10
   1937 	eor	r0,r8,r8,ror#5
   1938 	vext.8	q9,q2,q3,#4
   1939 	add	r4,r4,r12
   1940 	and	r2,r2,r8
   1941 	eor	r12,r0,r8,ror#19
   1942 	vshr.u32	q10,q8,#7
   1943 	eor	r0,r4,r4,ror#11
   1944 	eor	r2,r2,r10
   1945 	vadd.i32	q0,q0,q9
   1946 	add	r11,r11,r12,ror#6
   1947 	eor	r12,r4,r5
   1948 	vshr.u32	q9,q8,#3
   1949 	eor	r0,r0,r4,ror#20
   1950 	add	r11,r11,r2
   1951 	vsli.32	q10,q8,#25
   1952 	ldr	r2,[sp,#4]
   1953 	and	r3,r3,r12
   1954 	vshr.u32	q11,q8,#18
   1955 	add	r7,r7,r11
   1956 	add	r11,r11,r0,ror#2
   1957 	eor	r3,r3,r5
   1958 	veor	q9,q9,q10
   1959 	add	r10,r10,r2
   1960 	vsli.32	q11,q8,#14
   1961 	eor	r2,r8,r9
   1962 	eor	r0,r7,r7,ror#5
   1963 	vshr.u32	d24,d7,#17
   1964 	add	r11,r11,r3
   1965 	and	r2,r2,r7
   1966 	veor	q9,q9,q11
   1967 	eor	r3,r0,r7,ror#19
   1968 	eor	r0,r11,r11,ror#11
   1969 	vsli.32	d24,d7,#15
   1970 	eor	r2,r2,r9
   1971 	add	r10,r10,r3,ror#6
   1972 	vshr.u32	d25,d7,#10
   1973 	eor	r3,r11,r4
   1974 	eor	r0,r0,r11,ror#20
   1975 	vadd.i32	q0,q0,q9
   1976 	add	r10,r10,r2
   1977 	ldr	r2,[sp,#8]
   1978 	veor	d25,d25,d24
   1979 	and	r12,r12,r3
   1980 	add	r6,r6,r10
   1981 	vshr.u32	d24,d7,#19
   1982 	add	r10,r10,r0,ror#2
   1983 	eor	r12,r12,r4
   1984 	vsli.32	d24,d7,#13
   1985 	add	r9,r9,r2
   1986 	eor	r2,r7,r8
   1987 	veor	d25,d25,d24
   1988 	eor	r0,r6,r6,ror#5
   1989 	add	r10,r10,r12
   1990 	vadd.i32	d0,d0,d25
   1991 	and	r2,r2,r6
   1992 	eor	r12,r0,r6,ror#19
   1993 	vshr.u32	d24,d0,#17
   1994 	eor	r0,r10,r10,ror#11
   1995 	eor	r2,r2,r8
   1996 	vsli.32	d24,d0,#15
   1997 	add	r9,r9,r12,ror#6
   1998 	eor	r12,r10,r11
   1999 	vshr.u32	d25,d0,#10
   2000 	eor	r0,r0,r10,ror#20
   2001 	add	r9,r9,r2
   2002 	veor	d25,d25,d24
   2003 	ldr	r2,[sp,#12]
   2004 	and	r3,r3,r12
   2005 	vshr.u32	d24,d0,#19
   2006 	add	r5,r5,r9
   2007 	add	r9,r9,r0,ror#2
   2008 	eor	r3,r3,r11
   2009 	vld1.32	{q8},[r14,:128]!
   2010 	add	r8,r8,r2
   2011 	vsli.32	d24,d0,#13
   2012 	eor	r2,r6,r7
   2013 	eor	r0,r5,r5,ror#5
   2014 	veor	d25,d25,d24
   2015 	add	r9,r9,r3
   2016 	and	r2,r2,r5
   2017 	vadd.i32	d1,d1,d25
   2018 	eor	r3,r0,r5,ror#19
   2019 	eor	r0,r9,r9,ror#11
   2020 	vadd.i32	q8,q8,q0
   2021 	eor	r2,r2,r7
   2022 	add	r8,r8,r3,ror#6
   2023 	eor	r3,r9,r10
   2024 	eor	r0,r0,r9,ror#20
   2025 	add	r8,r8,r2
   2026 	ldr	r2,[sp,#16]
   2027 	and	r12,r12,r3
   2028 	add	r4,r4,r8
   2029 	vst1.32	{q8},[r1,:128]!
   2030 	add	r8,r8,r0,ror#2
   2031 	eor	r12,r12,r10
   2032 	vext.8	q8,q1,q2,#4
   2033 	add	r7,r7,r2
   2034 	eor	r2,r5,r6
   2035 	eor	r0,r4,r4,ror#5
   2036 	vext.8	q9,q3,q0,#4
   2037 	add	r8,r8,r12
   2038 	and	r2,r2,r4
   2039 	eor	r12,r0,r4,ror#19
   2040 	vshr.u32	q10,q8,#7
   2041 	eor	r0,r8,r8,ror#11
   2042 	eor	r2,r2,r6
   2043 	vadd.i32	q1,q1,q9
   2044 	add	r7,r7,r12,ror#6
   2045 	eor	r12,r8,r9
   2046 	vshr.u32	q9,q8,#3
   2047 	eor	r0,r0,r8,ror#20
   2048 	add	r7,r7,r2
   2049 	vsli.32	q10,q8,#25
   2050 	ldr	r2,[sp,#20]
   2051 	and	r3,r3,r12
   2052 	vshr.u32	q11,q8,#18
   2053 	add	r11,r11,r7
   2054 	add	r7,r7,r0,ror#2
   2055 	eor	r3,r3,r9
   2056 	veor	q9,q9,q10
   2057 	add	r6,r6,r2
   2058 	vsli.32	q11,q8,#14
   2059 	eor	r2,r4,r5
   2060 	eor	r0,r11,r11,ror#5
   2061 	vshr.u32	d24,d1,#17
   2062 	add	r7,r7,r3
   2063 	and	r2,r2,r11
   2064 	veor	q9,q9,q11
   2065 	eor	r3,r0,r11,ror#19
   2066 	eor	r0,r7,r7,ror#11
   2067 	vsli.32	d24,d1,#15
   2068 	eor	r2,r2,r5
   2069 	add	r6,r6,r3,ror#6
   2070 	vshr.u32	d25,d1,#10
   2071 	eor	r3,r7,r8
   2072 	eor	r0,r0,r7,ror#20
   2073 	vadd.i32	q1,q1,q9
   2074 	add	r6,r6,r2
   2075 	ldr	r2,[sp,#24]
   2076 	veor	d25,d25,d24
   2077 	and	r12,r12,r3
   2078 	add	r10,r10,r6
   2079 	vshr.u32	d24,d1,#19
   2080 	add	r6,r6,r0,ror#2
   2081 	eor	r12,r12,r8
   2082 	vsli.32	d24,d1,#13
   2083 	add	r5,r5,r2
   2084 	eor	r2,r11,r4
   2085 	veor	d25,d25,d24
   2086 	eor	r0,r10,r10,ror#5
   2087 	add	r6,r6,r12
   2088 	vadd.i32	d2,d2,d25
   2089 	and	r2,r2,r10
   2090 	eor	r12,r0,r10,ror#19
   2091 	vshr.u32	d24,d2,#17
   2092 	eor	r0,r6,r6,ror#11
   2093 	eor	r2,r2,r4
   2094 	vsli.32	d24,d2,#15
   2095 	add	r5,r5,r12,ror#6
   2096 	eor	r12,r6,r7
   2097 	vshr.u32	d25,d2,#10
   2098 	eor	r0,r0,r6,ror#20
   2099 	add	r5,r5,r2
   2100 	veor	d25,d25,d24
   2101 	ldr	r2,[sp,#28]
   2102 	and	r3,r3,r12
   2103 	vshr.u32	d24,d2,#19
   2104 	add	r9,r9,r5
   2105 	add	r5,r5,r0,ror#2
   2106 	eor	r3,r3,r7
   2107 	vld1.32	{q8},[r14,:128]!
   2108 	add	r4,r4,r2
   2109 	vsli.32	d24,d2,#13
   2110 	eor	r2,r10,r11
   2111 	eor	r0,r9,r9,ror#5
   2112 	veor	d25,d25,d24
   2113 	add	r5,r5,r3
   2114 	and	r2,r2,r9
   2115 	vadd.i32	d3,d3,d25
   2116 	eor	r3,r0,r9,ror#19
   2117 	eor	r0,r5,r5,ror#11
   2118 	vadd.i32	q8,q8,q1
   2119 	eor	r2,r2,r11
   2120 	add	r4,r4,r3,ror#6
   2121 	eor	r3,r5,r6
   2122 	eor	r0,r0,r5,ror#20
   2123 	add	r4,r4,r2
   2124 	ldr	r2,[sp,#32]
   2125 	and	r12,r12,r3
   2126 	add	r8,r8,r4
   2127 	vst1.32	{q8},[r1,:128]!
   2128 	add	r4,r4,r0,ror#2
   2129 	eor	r12,r12,r6
   2130 	vext.8	q8,q2,q3,#4
   2131 	add	r11,r11,r2
   2132 	eor	r2,r9,r10
   2133 	eor	r0,r8,r8,ror#5
   2134 	vext.8	q9,q0,q1,#4
   2135 	add	r4,r4,r12
   2136 	and	r2,r2,r8
   2137 	eor	r12,r0,r8,ror#19
   2138 	vshr.u32	q10,q8,#7
   2139 	eor	r0,r4,r4,ror#11
   2140 	eor	r2,r2,r10
   2141 	vadd.i32	q2,q2,q9
   2142 	add	r11,r11,r12,ror#6
   2143 	eor	r12,r4,r5
   2144 	vshr.u32	q9,q8,#3
   2145 	eor	r0,r0,r4,ror#20
   2146 	add	r11,r11,r2
   2147 	vsli.32	q10,q8,#25
   2148 	ldr	r2,[sp,#36]
   2149 	and	r3,r3,r12
   2150 	vshr.u32	q11,q8,#18
   2151 	add	r7,r7,r11
   2152 	add	r11,r11,r0,ror#2
   2153 	eor	r3,r3,r5
   2154 	veor	q9,q9,q10
   2155 	add	r10,r10,r2
   2156 	vsli.32	q11,q8,#14
   2157 	eor	r2,r8,r9
   2158 	eor	r0,r7,r7,ror#5
   2159 	vshr.u32	d24,d3,#17
   2160 	add	r11,r11,r3
   2161 	and	r2,r2,r7
   2162 	veor	q9,q9,q11
   2163 	eor	r3,r0,r7,ror#19
   2164 	eor	r0,r11,r11,ror#11
   2165 	vsli.32	d24,d3,#15
   2166 	eor	r2,r2,r9
   2167 	add	r10,r10,r3,ror#6
   2168 	vshr.u32	d25,d3,#10
   2169 	eor	r3,r11,r4
   2170 	eor	r0,r0,r11,ror#20
   2171 	vadd.i32	q2,q2,q9
   2172 	add	r10,r10,r2
   2173 	ldr	r2,[sp,#40]
   2174 	veor	d25,d25,d24
   2175 	and	r12,r12,r3
   2176 	add	r6,r6,r10
   2177 	vshr.u32	d24,d3,#19
   2178 	add	r10,r10,r0,ror#2
   2179 	eor	r12,r12,r4
   2180 	vsli.32	d24,d3,#13
   2181 	add	r9,r9,r2
   2182 	eor	r2,r7,r8
   2183 	veor	d25,d25,d24
   2184 	eor	r0,r6,r6,ror#5
   2185 	add	r10,r10,r12
   2186 	vadd.i32	d4,d4,d25
   2187 	and	r2,r2,r6
   2188 	eor	r12,r0,r6,ror#19
   2189 	vshr.u32	d24,d4,#17
   2190 	eor	r0,r10,r10,ror#11
   2191 	eor	r2,r2,r8
   2192 	vsli.32	d24,d4,#15
   2193 	add	r9,r9,r12,ror#6
   2194 	eor	r12,r10,r11
   2195 	vshr.u32	d25,d4,#10
   2196 	eor	r0,r0,r10,ror#20
   2197 	add	r9,r9,r2
   2198 	veor	d25,d25,d24
   2199 	ldr	r2,[sp,#44]
   2200 	and	r3,r3,r12
   2201 	vshr.u32	d24,d4,#19
   2202 	add	r5,r5,r9
   2203 	add	r9,r9,r0,ror#2
   2204 	eor	r3,r3,r11
   2205 	vld1.32	{q8},[r14,:128]!
   2206 	add	r8,r8,r2
   2207 	vsli.32	d24,d4,#13
   2208 	eor	r2,r6,r7
   2209 	eor	r0,r5,r5,ror#5
   2210 	veor	d25,d25,d24
   2211 	add	r9,r9,r3
   2212 	and	r2,r2,r5
   2213 	vadd.i32	d5,d5,d25
   2214 	eor	r3,r0,r5,ror#19
   2215 	eor	r0,r9,r9,ror#11
   2216 	vadd.i32	q8,q8,q2
   2217 	eor	r2,r2,r7
   2218 	add	r8,r8,r3,ror#6
   2219 	eor	r3,r9,r10
   2220 	eor	r0,r0,r9,ror#20
   2221 	add	r8,r8,r2
   2222 	ldr	r2,[sp,#48]
   2223 	and	r12,r12,r3
   2224 	add	r4,r4,r8
   2225 	vst1.32	{q8},[r1,:128]!
   2226 	add	r8,r8,r0,ror#2
   2227 	eor	r12,r12,r10
   2228 	vext.8	q8,q3,q0,#4
   2229 	add	r7,r7,r2
   2230 	eor	r2,r5,r6
   2231 	eor	r0,r4,r4,ror#5
   2232 	vext.8	q9,q1,q2,#4
   2233 	add	r8,r8,r12
   2234 	and	r2,r2,r4
   2235 	eor	r12,r0,r4,ror#19
   2236 	vshr.u32	q10,q8,#7
   2237 	eor	r0,r8,r8,ror#11
   2238 	eor	r2,r2,r6
   2239 	vadd.i32	q3,q3,q9
   2240 	add	r7,r7,r12,ror#6
   2241 	eor	r12,r8,r9
   2242 	vshr.u32	q9,q8,#3
   2243 	eor	r0,r0,r8,ror#20
   2244 	add	r7,r7,r2
   2245 	vsli.32	q10,q8,#25
   2246 	ldr	r2,[sp,#52]
   2247 	and	r3,r3,r12
   2248 	vshr.u32	q11,q8,#18
   2249 	add	r11,r11,r7
   2250 	add	r7,r7,r0,ror#2
   2251 	eor	r3,r3,r9
   2252 	veor	q9,q9,q10
   2253 	add	r6,r6,r2
   2254 	vsli.32	q11,q8,#14
   2255 	eor	r2,r4,r5
   2256 	eor	r0,r11,r11,ror#5
   2257 	vshr.u32	d24,d5,#17
   2258 	add	r7,r7,r3
   2259 	and	r2,r2,r11
   2260 	veor	q9,q9,q11
   2261 	eor	r3,r0,r11,ror#19
   2262 	eor	r0,r7,r7,ror#11
   2263 	vsli.32	d24,d5,#15
   2264 	eor	r2,r2,r5
   2265 	add	r6,r6,r3,ror#6
   2266 	vshr.u32	d25,d5,#10
   2267 	eor	r3,r7,r8
   2268 	eor	r0,r0,r7,ror#20
   2269 	vadd.i32	q3,q3,q9
   2270 	add	r6,r6,r2
   2271 	ldr	r2,[sp,#56]
   2272 	veor	d25,d25,d24
   2273 	and	r12,r12,r3
   2274 	add	r10,r10,r6
   2275 	vshr.u32	d24,d5,#19
   2276 	add	r6,r6,r0,ror#2
   2277 	eor	r12,r12,r8
   2278 	vsli.32	d24,d5,#13
   2279 	add	r5,r5,r2
   2280 	eor	r2,r11,r4
   2281 	veor	d25,d25,d24
   2282 	eor	r0,r10,r10,ror#5
   2283 	add	r6,r6,r12
   2284 	vadd.i32	d6,d6,d25
   2285 	and	r2,r2,r10
   2286 	eor	r12,r0,r10,ror#19
   2287 	vshr.u32	d24,d6,#17
   2288 	eor	r0,r6,r6,ror#11
   2289 	eor	r2,r2,r4
   2290 	vsli.32	d24,d6,#15
   2291 	add	r5,r5,r12,ror#6
   2292 	eor	r12,r6,r7
   2293 	vshr.u32	d25,d6,#10
   2294 	eor	r0,r0,r6,ror#20
   2295 	add	r5,r5,r2
   2296 	veor	d25,d25,d24
   2297 	ldr	r2,[sp,#60]
   2298 	and	r3,r3,r12
   2299 	vshr.u32	d24,d6,#19
   2300 	add	r9,r9,r5
   2301 	add	r5,r5,r0,ror#2
   2302 	eor	r3,r3,r7
   2303 	vld1.32	{q8},[r14,:128]!
   2304 	add	r4,r4,r2
   2305 	vsli.32	d24,d6,#13
   2306 	eor	r2,r10,r11
   2307 	eor	r0,r9,r9,ror#5
   2308 	veor	d25,d25,d24
   2309 	add	r5,r5,r3
   2310 	and	r2,r2,r9
   2311 	vadd.i32	d7,d7,d25
   2312 	eor	r3,r0,r9,ror#19
   2313 	eor	r0,r5,r5,ror#11
   2314 	vadd.i32	q8,q8,q3
   2315 	eor	r2,r2,r11
   2316 	add	r4,r4,r3,ror#6
   2317 	eor	r3,r5,r6
   2318 	eor	r0,r0,r5,ror#20
   2319 	add	r4,r4,r2
   2320 	ldr	r2,[r14]
   2321 	and	r12,r12,r3
   2322 	add	r8,r8,r4
   2323 	vst1.32	{q8},[r1,:128]!
   2324 	add	r4,r4,r0,ror#2
   2325 	eor	r12,r12,r6
   2326 	teq	r2,#0				@ check for K256 terminator
   2327 	ldr	r2,[sp,#0]
   2328 	sub	r1,r1,#64
   2329 	bne	L_00_48
   2330 
   2331 	ldr	r1,[sp,#68]
   2332 	ldr	r0,[sp,#72]
   2333 	sub	r14,r14,#256	@ rewind r14
   2334 	teq	r1,r0
   2335 	it	eq
   2336 	subeq	r1,r1,#64		@ avoid SEGV
   2337 	vld1.8	{q0},[r1]!		@ load next input block
   2338 	vld1.8	{q1},[r1]!
   2339 	vld1.8	{q2},[r1]!
   2340 	vld1.8	{q3},[r1]!
   2341 	it	ne
   2342 	strne	r1,[sp,#68]
   2343 	mov	r1,sp
   2344 	add	r11,r11,r2
   2345 	eor	r2,r9,r10
   2346 	eor	r0,r8,r8,ror#5
   2347 	add	r4,r4,r12
   2348 	vld1.32	{q8},[r14,:128]!
   2349 	and	r2,r2,r8
   2350 	eor	r12,r0,r8,ror#19
   2351 	eor	r0,r4,r4,ror#11
   2352 	eor	r2,r2,r10
   2353 	vrev32.8	q0,q0
   2354 	add	r11,r11,r12,ror#6
   2355 	eor	r12,r4,r5
   2356 	eor	r0,r0,r4,ror#20
   2357 	add	r11,r11,r2
   2358 	vadd.i32	q8,q8,q0
   2359 	ldr	r2,[sp,#4]
   2360 	and	r3,r3,r12
   2361 	add	r7,r7,r11
   2362 	add	r11,r11,r0,ror#2
   2363 	eor	r3,r3,r5
   2364 	add	r10,r10,r2
   2365 	eor	r2,r8,r9
   2366 	eor	r0,r7,r7,ror#5
   2367 	add	r11,r11,r3
   2368 	and	r2,r2,r7
   2369 	eor	r3,r0,r7,ror#19
   2370 	eor	r0,r11,r11,ror#11
   2371 	eor	r2,r2,r9
   2372 	add	r10,r10,r3,ror#6
   2373 	eor	r3,r11,r4
   2374 	eor	r0,r0,r11,ror#20
   2375 	add	r10,r10,r2
   2376 	ldr	r2,[sp,#8]
   2377 	and	r12,r12,r3
   2378 	add	r6,r6,r10
   2379 	add	r10,r10,r0,ror#2
   2380 	eor	r12,r12,r4
   2381 	add	r9,r9,r2
   2382 	eor	r2,r7,r8
   2383 	eor	r0,r6,r6,ror#5
   2384 	add	r10,r10,r12
   2385 	and	r2,r2,r6
   2386 	eor	r12,r0,r6,ror#19
   2387 	eor	r0,r10,r10,ror#11
   2388 	eor	r2,r2,r8
   2389 	add	r9,r9,r12,ror#6
   2390 	eor	r12,r10,r11
   2391 	eor	r0,r0,r10,ror#20
   2392 	add	r9,r9,r2
   2393 	ldr	r2,[sp,#12]
   2394 	and	r3,r3,r12
   2395 	add	r5,r5,r9
   2396 	add	r9,r9,r0,ror#2
   2397 	eor	r3,r3,r11
   2398 	add	r8,r8,r2
   2399 	eor	r2,r6,r7
   2400 	eor	r0,r5,r5,ror#5
   2401 	add	r9,r9,r3
   2402 	and	r2,r2,r5
   2403 	eor	r3,r0,r5,ror#19
   2404 	eor	r0,r9,r9,ror#11
   2405 	eor	r2,r2,r7
   2406 	add	r8,r8,r3,ror#6
   2407 	eor	r3,r9,r10
   2408 	eor	r0,r0,r9,ror#20
   2409 	add	r8,r8,r2
   2410 	ldr	r2,[sp,#16]
   2411 	and	r12,r12,r3
   2412 	add	r4,r4,r8
   2413 	add	r8,r8,r0,ror#2
   2414 	eor	r12,r12,r10
   2415 	vst1.32	{q8},[r1,:128]!
   2416 	add	r7,r7,r2
   2417 	eor	r2,r5,r6
   2418 	eor	r0,r4,r4,ror#5
   2419 	add	r8,r8,r12
   2420 	vld1.32	{q8},[r14,:128]!
   2421 	and	r2,r2,r4
   2422 	eor	r12,r0,r4,ror#19
   2423 	eor	r0,r8,r8,ror#11
   2424 	eor	r2,r2,r6
   2425 	vrev32.8	q1,q1
   2426 	add	r7,r7,r12,ror#6
   2427 	eor	r12,r8,r9
   2428 	eor	r0,r0,r8,ror#20
   2429 	add	r7,r7,r2
   2430 	vadd.i32	q8,q8,q1
   2431 	ldr	r2,[sp,#20]
   2432 	and	r3,r3,r12
   2433 	add	r11,r11,r7
   2434 	add	r7,r7,r0,ror#2
   2435 	eor	r3,r3,r9
   2436 	add	r6,r6,r2
   2437 	eor	r2,r4,r5
   2438 	eor	r0,r11,r11,ror#5
   2439 	add	r7,r7,r3
   2440 	and	r2,r2,r11
   2441 	eor	r3,r0,r11,ror#19
   2442 	eor	r0,r7,r7,ror#11
   2443 	eor	r2,r2,r5
   2444 	add	r6,r6,r3,ror#6
   2445 	eor	r3,r7,r8
   2446 	eor	r0,r0,r7,ror#20
   2447 	add	r6,r6,r2
   2448 	ldr	r2,[sp,#24]
   2449 	and	r12,r12,r3
   2450 	add	r10,r10,r6
   2451 	add	r6,r6,r0,ror#2
   2452 	eor	r12,r12,r8
   2453 	add	r5,r5,r2
   2454 	eor	r2,r11,r4
   2455 	eor	r0,r10,r10,ror#5
   2456 	add	r6,r6,r12
   2457 	and	r2,r2,r10
   2458 	eor	r12,r0,r10,ror#19
   2459 	eor	r0,r6,r6,ror#11
   2460 	eor	r2,r2,r4
   2461 	add	r5,r5,r12,ror#6
   2462 	eor	r12,r6,r7
   2463 	eor	r0,r0,r6,ror#20
   2464 	add	r5,r5,r2
   2465 	ldr	r2,[sp,#28]
   2466 	and	r3,r3,r12
   2467 	add	r9,r9,r5
   2468 	add	r5,r5,r0,ror#2
   2469 	eor	r3,r3,r7
   2470 	add	r4,r4,r2
   2471 	eor	r2,r10,r11
   2472 	eor	r0,r9,r9,ror#5
   2473 	add	r5,r5,r3
   2474 	and	r2,r2,r9
   2475 	eor	r3,r0,r9,ror#19
   2476 	eor	r0,r5,r5,ror#11
   2477 	eor	r2,r2,r11
   2478 	add	r4,r4,r3,ror#6
   2479 	eor	r3,r5,r6
   2480 	eor	r0,r0,r5,ror#20
   2481 	add	r4,r4,r2
   2482 	ldr	r2,[sp,#32]
   2483 	and	r12,r12,r3
   2484 	add	r8,r8,r4
   2485 	add	r4,r4,r0,ror#2
   2486 	eor	r12,r12,r6
   2487 	vst1.32	{q8},[r1,:128]!
   2488 	add	r11,r11,r2
   2489 	eor	r2,r9,r10
   2490 	eor	r0,r8,r8,ror#5
   2491 	add	r4,r4,r12
   2492 	vld1.32	{q8},[r14,:128]!
   2493 	and	r2,r2,r8
   2494 	eor	r12,r0,r8,ror#19
   2495 	eor	r0,r4,r4,ror#11
   2496 	eor	r2,r2,r10
   2497 	vrev32.8	q2,q2
   2498 	add	r11,r11,r12,ror#6
   2499 	eor	r12,r4,r5
   2500 	eor	r0,r0,r4,ror#20
   2501 	add	r11,r11,r2
   2502 	vadd.i32	q8,q8,q2
   2503 	ldr	r2,[sp,#36]
   2504 	and	r3,r3,r12
   2505 	add	r7,r7,r11
   2506 	add	r11,r11,r0,ror#2
   2507 	eor	r3,r3,r5
   2508 	add	r10,r10,r2
   2509 	eor	r2,r8,r9
   2510 	eor	r0,r7,r7,ror#5
   2511 	add	r11,r11,r3
   2512 	and	r2,r2,r7
   2513 	eor	r3,r0,r7,ror#19
   2514 	eor	r0,r11,r11,ror#11
   2515 	eor	r2,r2,r9
   2516 	add	r10,r10,r3,ror#6
   2517 	eor	r3,r11,r4
   2518 	eor	r0,r0,r11,ror#20
   2519 	add	r10,r10,r2
   2520 	ldr	r2,[sp,#40]
   2521 	and	r12,r12,r3
   2522 	add	r6,r6,r10
   2523 	add	r10,r10,r0,ror#2
   2524 	eor	r12,r12,r4
   2525 	add	r9,r9,r2
   2526 	eor	r2,r7,r8
   2527 	eor	r0,r6,r6,ror#5
   2528 	add	r10,r10,r12
   2529 	and	r2,r2,r6
   2530 	eor	r12,r0,r6,ror#19
   2531 	eor	r0,r10,r10,ror#11
   2532 	eor	r2,r2,r8
   2533 	add	r9,r9,r12,ror#6
   2534 	eor	r12,r10,r11
   2535 	eor	r0,r0,r10,ror#20
   2536 	add	r9,r9,r2
   2537 	ldr	r2,[sp,#44]
   2538 	and	r3,r3,r12
   2539 	add	r5,r5,r9
   2540 	add	r9,r9,r0,ror#2
   2541 	eor	r3,r3,r11
   2542 	add	r8,r8,r2
   2543 	eor	r2,r6,r7
   2544 	eor	r0,r5,r5,ror#5
   2545 	add	r9,r9,r3
   2546 	and	r2,r2,r5
   2547 	eor	r3,r0,r5,ror#19
   2548 	eor	r0,r9,r9,ror#11
   2549 	eor	r2,r2,r7
   2550 	add	r8,r8,r3,ror#6
   2551 	eor	r3,r9,r10
   2552 	eor	r0,r0,r9,ror#20
   2553 	add	r8,r8,r2
   2554 	ldr	r2,[sp,#48]
   2555 	and	r12,r12,r3
   2556 	add	r4,r4,r8
   2557 	add	r8,r8,r0,ror#2
   2558 	eor	r12,r12,r10
   2559 	vst1.32	{q8},[r1,:128]!
   2560 	add	r7,r7,r2
   2561 	eor	r2,r5,r6
   2562 	eor	r0,r4,r4,ror#5
   2563 	add	r8,r8,r12
   2564 	vld1.32	{q8},[r14,:128]!
   2565 	and	r2,r2,r4
   2566 	eor	r12,r0,r4,ror#19
   2567 	eor	r0,r8,r8,ror#11
   2568 	eor	r2,r2,r6
   2569 	vrev32.8	q3,q3
   2570 	add	r7,r7,r12,ror#6
   2571 	eor	r12,r8,r9
   2572 	eor	r0,r0,r8,ror#20
   2573 	add	r7,r7,r2
   2574 	vadd.i32	q8,q8,q3
   2575 	ldr	r2,[sp,#52]
   2576 	and	r3,r3,r12
   2577 	add	r11,r11,r7
   2578 	add	r7,r7,r0,ror#2
   2579 	eor	r3,r3,r9
   2580 	add	r6,r6,r2
   2581 	eor	r2,r4,r5
   2582 	eor	r0,r11,r11,ror#5
   2583 	add	r7,r7,r3
   2584 	and	r2,r2,r11
   2585 	eor	r3,r0,r11,ror#19
   2586 	eor	r0,r7,r7,ror#11
   2587 	eor	r2,r2,r5
   2588 	add	r6,r6,r3,ror#6
   2589 	eor	r3,r7,r8
   2590 	eor	r0,r0,r7,ror#20
   2591 	add	r6,r6,r2
   2592 	ldr	r2,[sp,#56]
   2593 	and	r12,r12,r3
   2594 	add	r10,r10,r6
   2595 	add	r6,r6,r0,ror#2
   2596 	eor	r12,r12,r8
   2597 	add	r5,r5,r2
   2598 	eor	r2,r11,r4
   2599 	eor	r0,r10,r10,ror#5
   2600 	add	r6,r6,r12
   2601 	and	r2,r2,r10
   2602 	eor	r12,r0,r10,ror#19
   2603 	eor	r0,r6,r6,ror#11
   2604 	eor	r2,r2,r4
   2605 	add	r5,r5,r12,ror#6
   2606 	eor	r12,r6,r7
   2607 	eor	r0,r0,r6,ror#20
   2608 	add	r5,r5,r2
   2609 	ldr	r2,[sp,#60]
   2610 	and	r3,r3,r12
   2611 	add	r9,r9,r5
   2612 	add	r5,r5,r0,ror#2
   2613 	eor	r3,r3,r7
   2614 	add	r4,r4,r2
   2615 	eor	r2,r10,r11
   2616 	eor	r0,r9,r9,ror#5
   2617 	add	r5,r5,r3
   2618 	and	r2,r2,r9
   2619 	eor	r3,r0,r9,ror#19
   2620 	eor	r0,r5,r5,ror#11
   2621 	eor	r2,r2,r11
   2622 	add	r4,r4,r3,ror#6
   2623 	eor	r3,r5,r6
   2624 	eor	r0,r0,r5,ror#20
   2625 	add	r4,r4,r2
   2626 	ldr	r2,[sp,#64]
   2627 	and	r12,r12,r3
   2628 	add	r8,r8,r4
   2629 	add	r4,r4,r0,ror#2
   2630 	eor	r12,r12,r6
   2631 	vst1.32	{q8},[r1,:128]!
   2632 	ldr	r0,[r2,#0]
   2633 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
   2634 	ldr	r12,[r2,#4]
   2635 	ldr	r3,[r2,#8]
   2636 	ldr	r1,[r2,#12]
   2637 	add	r4,r4,r0			@ accumulate
   2638 	ldr	r0,[r2,#16]
   2639 	add	r5,r5,r12
   2640 	ldr	r12,[r2,#20]
   2641 	add	r6,r6,r3
   2642 	ldr	r3,[r2,#24]
   2643 	add	r7,r7,r1
   2644 	ldr	r1,[r2,#28]
   2645 	add	r8,r8,r0
   2646 	str	r4,[r2],#4
   2647 	add	r9,r9,r12
   2648 	str	r5,[r2],#4
   2649 	add	r10,r10,r3
   2650 	str	r6,[r2],#4
   2651 	add	r11,r11,r1
   2652 	str	r7,[r2],#4
   2653 	stmia	r2,{r8,r9,r10,r11}
   2654 
   2655 	ittte	ne
   2656 	movne	r1,sp
   2657 	ldrne	r2,[sp,#0]
   2658 	eorne	r12,r12,r12
   2659 	ldreq	sp,[sp,#76]			@ restore original sp
   2660 	itt	ne
   2661 	eorne	r3,r5,r6
   2662 	bne	L_00_48
   2663 
   2664 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
   2665 
   2666 #endif
   2667 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
   2668 
   2669 # if defined(__thumb2__)
   2670 #  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
   2671 # else
   2672 #  define INST(a,b,c,d)	.byte	a,b,c,d
   2673 # endif
   2674 
   2675 #ifdef __thumb2__
   2676 .thumb_func	sha256_block_data_order_armv8
   2677 #endif
   2678 .align	5
   2679 sha256_block_data_order_armv8:
   2680 LARMv8:
   2681 	vld1.32	{q0,q1},[r0]
   2682 	sub	r3,r3,#256+32
   2683 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
   2684 	b	Loop_v8
   2685 
   2686 .align	4
   2687 Loop_v8:
   2688 	vld1.8	{q8,q9},[r1]!
   2689 	vld1.8	{q10,q11},[r1]!
   2690 	vld1.32	{q12},[r3]!
   2691 	vrev32.8	q8,q8
   2692 	vrev32.8	q9,q9
   2693 	vrev32.8	q10,q10
   2694 	vrev32.8	q11,q11
   2695 	vmov	q14,q0	@ offload
   2696 	vmov	q15,q1
   2697 	teq	r1,r2
   2698 	vld1.32	{q13},[r3]!
   2699 	vadd.i32	q12,q12,q8
   2700 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
   2701 	vmov	q2,q0
   2702 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2703 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2704 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
   2705 	vld1.32	{q12},[r3]!
   2706 	vadd.i32	q13,q13,q9
   2707 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
   2708 	vmov	q2,q0
   2709 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2710 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2711 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
   2712 	vld1.32	{q13},[r3]!
   2713 	vadd.i32	q12,q12,q10
   2714 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
   2715 	vmov	q2,q0
   2716 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2717 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2718 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
   2719 	vld1.32	{q12},[r3]!
   2720 	vadd.i32	q13,q13,q11
   2721 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
   2722 	vmov	q2,q0
   2723 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2724 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2725 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
   2726 	vld1.32	{q13},[r3]!
   2727 	vadd.i32	q12,q12,q8
   2728 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
   2729 	vmov	q2,q0
   2730 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2731 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2732 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
   2733 	vld1.32	{q12},[r3]!
   2734 	vadd.i32	q13,q13,q9
   2735 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
   2736 	vmov	q2,q0
   2737 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2738 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2739 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
   2740 	vld1.32	{q13},[r3]!
   2741 	vadd.i32	q12,q12,q10
   2742 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
   2743 	vmov	q2,q0
   2744 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2745 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2746 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
   2747 	vld1.32	{q12},[r3]!
   2748 	vadd.i32	q13,q13,q11
   2749 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
   2750 	vmov	q2,q0
   2751 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2752 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2753 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
   2754 	vld1.32	{q13},[r3]!
   2755 	vadd.i32	q12,q12,q8
   2756 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
   2757 	vmov	q2,q0
   2758 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2759 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2760 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
   2761 	vld1.32	{q12},[r3]!
   2762 	vadd.i32	q13,q13,q9
   2763 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
   2764 	vmov	q2,q0
   2765 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2766 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2767 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
   2768 	vld1.32	{q13},[r3]!
   2769 	vadd.i32	q12,q12,q10
   2770 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
   2771 	vmov	q2,q0
   2772 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2773 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2774 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
   2775 	vld1.32	{q12},[r3]!
   2776 	vadd.i32	q13,q13,q11
   2777 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
   2778 	vmov	q2,q0
   2779 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2780 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2781 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
   2782 	vld1.32	{q13},[r3]!
   2783 	vadd.i32	q12,q12,q8
   2784 	vmov	q2,q0
   2785 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2786 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2787 
   2788 	vld1.32	{q12},[r3]!
   2789 	vadd.i32	q13,q13,q9
   2790 	vmov	q2,q0
   2791 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2792 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2793 
   2794 	vld1.32	{q13},[r3]
   2795 	vadd.i32	q12,q12,q10
   2796 	sub	r3,r3,#256-16	@ rewind
   2797 	vmov	q2,q0
   2798 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2799 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2800 
   2801 	vadd.i32	q13,q13,q11
   2802 	vmov	q2,q0
   2803 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2804 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2805 
   2806 	vadd.i32	q0,q0,q14
   2807 	vadd.i32	q1,q1,q15
   2808 	it	ne
   2809 	bne	Loop_v8
   2810 
   2811 	vst1.32	{q0,q1},[r0]
   2812 
   2813 	bx	lr		@ bx lr
   2814 
   2815 #endif
   2816 .byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   2817 .align	2
   2818 .align	2
   2819 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
   2820 .comm	_OPENSSL_armcap_P,4
   2821 .non_lazy_symbol_pointer
   2822 OPENSSL_armcap_P:
   2823 .indirect_symbol	_OPENSSL_armcap_P
   2824 .long	0
   2825 .private_extern	_OPENSSL_armcap_P
   2826 #endif
   2827