Home | History | Annotate | Download | only in fipsmodule
      1 @ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
      2 @
      3 @ Licensed under the OpenSSL license (the "License").  You may not use
      4 @ this file except in compliance with the License.  You can obtain a copy
      5 @ in the file LICENSE in the source distribution or at
      6 @ https://www.openssl.org/source/license.html
      7 
      8 
      9 @ ====================================================================
     10 @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
     11 @ project. The module is, however, dual licensed under OpenSSL and
     12 @ CRYPTOGAMS licenses depending on where you obtain it. For further
     13 @ details see http://www.openssl.org/~appro/cryptogams/.
     14 @
     15 @ Permission to use under GPL terms is granted.
     16 @ ====================================================================
     17 
     18 @ SHA256 block procedure for ARMv4. May 2007.
     19 
     20 @ Performance is ~2x better than gcc 3.4 generated code and in "abso-
     21 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
     22 @ byte [on single-issue Xscale PXA250 core].
     23 
     24 @ July 2010.
     25 @
     26 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on
     27 @ Cortex A8 core and ~20 cycles per processed byte.
     28 
     29 @ February 2011.
     30 @
     31 @ Profiler-assisted and platform-specific optimization resulted in 16%
     32 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
     33 
     34 @ September 2013.
     35 @
     36 @ Add NEON implementation. On Cortex A8 it was measured to process one
     37 @ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
     38 @ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
     39 @ code (meaning that latter performs sub-optimally, nothing was done
     40 @ about it).
     41 
     42 @ May 2014.
     43 @
     44 @ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
     45 
     46 #ifndef __KERNEL__
     47 # include <openssl/arm_arch.h>
     48 #else
     49 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
     50 # define __ARM_MAX_ARCH__ 7
     51 #endif
     52 
     53 @ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
     54 @ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
     55 @ instructions are manually-encoded. (See unsha256.)
     56 
     57 
     58 .text
     59 #if defined(__thumb2__)
     60 .syntax	unified
     61 .thumb
     62 #else
     63 .code	32
     64 #endif
     65 
     66 
     67 .align	5
     68 K256:
     69 .word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
     70 .word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
     71 .word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
     72 .word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
     73 .word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
     74 .word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
     75 .word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
     76 .word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
     77 .word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
     78 .word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
     79 .word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
     80 .word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
     81 .word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
     82 .word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
     83 .word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
     84 .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
     85 
     86 .word	0				@ terminator
     87 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
     88 LOPENSSL_armcap:
     89 .word	OPENSSL_armcap_P-Lsha256_block_data_order
     90 #endif
     91 .align	5
     92 
     93 .globl	_sha256_block_data_order
     94 .private_extern	_sha256_block_data_order
     95 #ifdef __thumb2__
     96 .thumb_func	_sha256_block_data_order
     97 #endif
     98 _sha256_block_data_order:
     99 Lsha256_block_data_order:
    100 #if __ARM_ARCH__<7 && !defined(__thumb2__)
    101 	sub	r3,pc,#8		@ _sha256_block_data_order
    102 #else
    103 	adr	r3,Lsha256_block_data_order
    104 #endif
    105 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
    106 	ldr	r12,LOPENSSL_armcap
    107 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
    108 #ifdef	__APPLE__
    109 	ldr	r12,[r12]
    110 #endif
    111 	tst	r12,#ARMV8_SHA256
    112 	bne	LARMv8
    113 	tst	r12,#ARMV7_NEON
    114 	bne	LNEON
    115 #endif
    116 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
    117 	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
    118 	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
    119 	sub	r14,r3,#256+32	@ K256
    120 	sub	sp,sp,#16*4		@ alloca(X[16])
    121 Loop:
    122 # if __ARM_ARCH__>=7
    123 	ldr	r2,[r1],#4
    124 # else
    125 	ldrb	r2,[r1,#3]
    126 # endif
    127 	eor	r3,r5,r6		@ magic
    128 	eor	r12,r12,r12
    129 #if __ARM_ARCH__>=7
    130 	@ ldr	r2,[r1],#4			@ 0
    131 # if 0==15
    132 	str	r1,[sp,#17*4]			@ make room for r1
    133 # endif
    134 	eor	r0,r8,r8,ror#5
    135 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    136 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    137 # ifndef __ARMEB__
    138 	rev	r2,r2
    139 # endif
    140 #else
    141 	@ ldrb	r2,[r1,#3]			@ 0
    142 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    143 	ldrb	r12,[r1,#2]
    144 	ldrb	r0,[r1,#1]
    145 	orr	r2,r2,r12,lsl#8
    146 	ldrb	r12,[r1],#4
    147 	orr	r2,r2,r0,lsl#16
    148 # if 0==15
    149 	str	r1,[sp,#17*4]			@ make room for r1
    150 # endif
    151 	eor	r0,r8,r8,ror#5
    152 	orr	r2,r2,r12,lsl#24
    153 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    154 #endif
    155 	ldr	r12,[r14],#4			@ *K256++
    156 	add	r11,r11,r2			@ h+=X[i]
    157 	str	r2,[sp,#0*4]
    158 	eor	r2,r9,r10
    159 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
    160 	and	r2,r2,r8
    161 	add	r11,r11,r12			@ h+=K256[i]
    162 	eor	r2,r2,r10			@ Ch(e,f,g)
    163 	eor	r0,r4,r4,ror#11
    164 	add	r11,r11,r2			@ h+=Ch(e,f,g)
    165 #if 0==31
    166 	and	r12,r12,#0xff
    167 	cmp	r12,#0xf2			@ done?
    168 #endif
    169 #if 0<15
    170 # if __ARM_ARCH__>=7
    171 	ldr	r2,[r1],#4			@ prefetch
    172 # else
    173 	ldrb	r2,[r1,#3]
    174 # endif
    175 	eor	r12,r4,r5			@ a^b, b^c in next round
    176 #else
    177 	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
    178 	eor	r12,r4,r5			@ a^b, b^c in next round
    179 	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
    180 #endif
    181 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
    182 	and	r3,r3,r12			@ (b^c)&=(a^b)
    183 	add	r7,r7,r11			@ d+=h
    184 	eor	r3,r3,r5			@ Maj(a,b,c)
    185 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
    186 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
    187 #if __ARM_ARCH__>=7
    188 	@ ldr	r2,[r1],#4			@ 1
    189 # if 1==15
    190 	str	r1,[sp,#17*4]			@ make room for r1
    191 # endif
    192 	eor	r0,r7,r7,ror#5
    193 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    194 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    195 # ifndef __ARMEB__
    196 	rev	r2,r2
    197 # endif
    198 #else
    199 	@ ldrb	r2,[r1,#3]			@ 1
    200 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    201 	ldrb	r3,[r1,#2]
    202 	ldrb	r0,[r1,#1]
    203 	orr	r2,r2,r3,lsl#8
    204 	ldrb	r3,[r1],#4
    205 	orr	r2,r2,r0,lsl#16
    206 # if 1==15
    207 	str	r1,[sp,#17*4]			@ make room for r1
    208 # endif
    209 	eor	r0,r7,r7,ror#5
    210 	orr	r2,r2,r3,lsl#24
    211 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    212 #endif
    213 	ldr	r3,[r14],#4			@ *K256++
    214 	add	r10,r10,r2			@ h+=X[i]
    215 	str	r2,[sp,#1*4]
    216 	eor	r2,r8,r9
    217 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
    218 	and	r2,r2,r7
    219 	add	r10,r10,r3			@ h+=K256[i]
    220 	eor	r2,r2,r9			@ Ch(e,f,g)
    221 	eor	r0,r11,r11,ror#11
    222 	add	r10,r10,r2			@ h+=Ch(e,f,g)
    223 #if 1==31
    224 	and	r3,r3,#0xff
    225 	cmp	r3,#0xf2			@ done?
    226 #endif
    227 #if 1<15
    228 # if __ARM_ARCH__>=7
    229 	ldr	r2,[r1],#4			@ prefetch
    230 # else
    231 	ldrb	r2,[r1,#3]
    232 # endif
    233 	eor	r3,r11,r4			@ a^b, b^c in next round
    234 #else
    235 	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
    236 	eor	r3,r11,r4			@ a^b, b^c in next round
    237 	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
    238 #endif
    239 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
    240 	and	r12,r12,r3			@ (b^c)&=(a^b)
    241 	add	r6,r6,r10			@ d+=h
    242 	eor	r12,r12,r4			@ Maj(a,b,c)
    243 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
    244 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
    245 #if __ARM_ARCH__>=7
    246 	@ ldr	r2,[r1],#4			@ 2
    247 # if 2==15
    248 	str	r1,[sp,#17*4]			@ make room for r1
    249 # endif
    250 	eor	r0,r6,r6,ror#5
    251 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    252 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    253 # ifndef __ARMEB__
    254 	rev	r2,r2
    255 # endif
    256 #else
    257 	@ ldrb	r2,[r1,#3]			@ 2
    258 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    259 	ldrb	r12,[r1,#2]
    260 	ldrb	r0,[r1,#1]
    261 	orr	r2,r2,r12,lsl#8
    262 	ldrb	r12,[r1],#4
    263 	orr	r2,r2,r0,lsl#16
    264 # if 2==15
    265 	str	r1,[sp,#17*4]			@ make room for r1
    266 # endif
    267 	eor	r0,r6,r6,ror#5
    268 	orr	r2,r2,r12,lsl#24
    269 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    270 #endif
    271 	ldr	r12,[r14],#4			@ *K256++
    272 	add	r9,r9,r2			@ h+=X[i]
    273 	str	r2,[sp,#2*4]
    274 	eor	r2,r7,r8
    275 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
    276 	and	r2,r2,r6
    277 	add	r9,r9,r12			@ h+=K256[i]
    278 	eor	r2,r2,r8			@ Ch(e,f,g)
    279 	eor	r0,r10,r10,ror#11
    280 	add	r9,r9,r2			@ h+=Ch(e,f,g)
    281 #if 2==31
    282 	and	r12,r12,#0xff
    283 	cmp	r12,#0xf2			@ done?
    284 #endif
    285 #if 2<15
    286 # if __ARM_ARCH__>=7
    287 	ldr	r2,[r1],#4			@ prefetch
    288 # else
    289 	ldrb	r2,[r1,#3]
    290 # endif
    291 	eor	r12,r10,r11			@ a^b, b^c in next round
    292 #else
    293 	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
    294 	eor	r12,r10,r11			@ a^b, b^c in next round
    295 	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
    296 #endif
    297 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
    298 	and	r3,r3,r12			@ (b^c)&=(a^b)
    299 	add	r5,r5,r9			@ d+=h
    300 	eor	r3,r3,r11			@ Maj(a,b,c)
    301 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
    302 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
    303 #if __ARM_ARCH__>=7
    304 	@ ldr	r2,[r1],#4			@ 3
    305 # if 3==15
    306 	str	r1,[sp,#17*4]			@ make room for r1
    307 # endif
    308 	eor	r0,r5,r5,ror#5
    309 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    310 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    311 # ifndef __ARMEB__
    312 	rev	r2,r2
    313 # endif
    314 #else
    315 	@ ldrb	r2,[r1,#3]			@ 3
    316 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    317 	ldrb	r3,[r1,#2]
    318 	ldrb	r0,[r1,#1]
    319 	orr	r2,r2,r3,lsl#8
    320 	ldrb	r3,[r1],#4
    321 	orr	r2,r2,r0,lsl#16
    322 # if 3==15
    323 	str	r1,[sp,#17*4]			@ make room for r1
    324 # endif
    325 	eor	r0,r5,r5,ror#5
    326 	orr	r2,r2,r3,lsl#24
    327 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    328 #endif
    329 	ldr	r3,[r14],#4			@ *K256++
    330 	add	r8,r8,r2			@ h+=X[i]
    331 	str	r2,[sp,#3*4]
    332 	eor	r2,r6,r7
    333 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
    334 	and	r2,r2,r5
    335 	add	r8,r8,r3			@ h+=K256[i]
    336 	eor	r2,r2,r7			@ Ch(e,f,g)
    337 	eor	r0,r9,r9,ror#11
    338 	add	r8,r8,r2			@ h+=Ch(e,f,g)
    339 #if 3==31
    340 	and	r3,r3,#0xff
    341 	cmp	r3,#0xf2			@ done?
    342 #endif
    343 #if 3<15
    344 # if __ARM_ARCH__>=7
    345 	ldr	r2,[r1],#4			@ prefetch
    346 # else
    347 	ldrb	r2,[r1,#3]
    348 # endif
    349 	eor	r3,r9,r10			@ a^b, b^c in next round
    350 #else
    351 	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
    352 	eor	r3,r9,r10			@ a^b, b^c in next round
    353 	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
    354 #endif
    355 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
    356 	and	r12,r12,r3			@ (b^c)&=(a^b)
    357 	add	r4,r4,r8			@ d+=h
    358 	eor	r12,r12,r10			@ Maj(a,b,c)
    359 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
    360 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
    361 #if __ARM_ARCH__>=7
    362 	@ ldr	r2,[r1],#4			@ 4
    363 # if 4==15
    364 	str	r1,[sp,#17*4]			@ make room for r1
    365 # endif
    366 	eor	r0,r4,r4,ror#5
    367 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    368 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    369 # ifndef __ARMEB__
    370 	rev	r2,r2
    371 # endif
    372 #else
    373 	@ ldrb	r2,[r1,#3]			@ 4
    374 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    375 	ldrb	r12,[r1,#2]
    376 	ldrb	r0,[r1,#1]
    377 	orr	r2,r2,r12,lsl#8
    378 	ldrb	r12,[r1],#4
    379 	orr	r2,r2,r0,lsl#16
    380 # if 4==15
    381 	str	r1,[sp,#17*4]			@ make room for r1
    382 # endif
    383 	eor	r0,r4,r4,ror#5
    384 	orr	r2,r2,r12,lsl#24
    385 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    386 #endif
    387 	ldr	r12,[r14],#4			@ *K256++
    388 	add	r7,r7,r2			@ h+=X[i]
    389 	str	r2,[sp,#4*4]
    390 	eor	r2,r5,r6
    391 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
    392 	and	r2,r2,r4
    393 	add	r7,r7,r12			@ h+=K256[i]
    394 	eor	r2,r2,r6			@ Ch(e,f,g)
    395 	eor	r0,r8,r8,ror#11
    396 	add	r7,r7,r2			@ h+=Ch(e,f,g)
    397 #if 4==31
    398 	and	r12,r12,#0xff
    399 	cmp	r12,#0xf2			@ done?
    400 #endif
    401 #if 4<15
    402 # if __ARM_ARCH__>=7
    403 	ldr	r2,[r1],#4			@ prefetch
    404 # else
    405 	ldrb	r2,[r1,#3]
    406 # endif
    407 	eor	r12,r8,r9			@ a^b, b^c in next round
    408 #else
    409 	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
    410 	eor	r12,r8,r9			@ a^b, b^c in next round
    411 	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
    412 #endif
    413 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
    414 	and	r3,r3,r12			@ (b^c)&=(a^b)
    415 	add	r11,r11,r7			@ d+=h
    416 	eor	r3,r3,r9			@ Maj(a,b,c)
    417 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
    418 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
    419 #if __ARM_ARCH__>=7
    420 	@ ldr	r2,[r1],#4			@ 5
    421 # if 5==15
    422 	str	r1,[sp,#17*4]			@ make room for r1
    423 # endif
    424 	eor	r0,r11,r11,ror#5
    425 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    426 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    427 # ifndef __ARMEB__
    428 	rev	r2,r2
    429 # endif
    430 #else
    431 	@ ldrb	r2,[r1,#3]			@ 5
    432 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    433 	ldrb	r3,[r1,#2]
    434 	ldrb	r0,[r1,#1]
    435 	orr	r2,r2,r3,lsl#8
    436 	ldrb	r3,[r1],#4
    437 	orr	r2,r2,r0,lsl#16
    438 # if 5==15
    439 	str	r1,[sp,#17*4]			@ make room for r1
    440 # endif
    441 	eor	r0,r11,r11,ror#5
    442 	orr	r2,r2,r3,lsl#24
    443 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    444 #endif
    445 	ldr	r3,[r14],#4			@ *K256++
    446 	add	r6,r6,r2			@ h+=X[i]
    447 	str	r2,[sp,#5*4]
    448 	eor	r2,r4,r5
    449 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
    450 	and	r2,r2,r11
    451 	add	r6,r6,r3			@ h+=K256[i]
    452 	eor	r2,r2,r5			@ Ch(e,f,g)
    453 	eor	r0,r7,r7,ror#11
    454 	add	r6,r6,r2			@ h+=Ch(e,f,g)
    455 #if 5==31
    456 	and	r3,r3,#0xff
    457 	cmp	r3,#0xf2			@ done?
    458 #endif
    459 #if 5<15
    460 # if __ARM_ARCH__>=7
    461 	ldr	r2,[r1],#4			@ prefetch
    462 # else
    463 	ldrb	r2,[r1,#3]
    464 # endif
    465 	eor	r3,r7,r8			@ a^b, b^c in next round
    466 #else
    467 	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
    468 	eor	r3,r7,r8			@ a^b, b^c in next round
    469 	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
    470 #endif
    471 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
    472 	and	r12,r12,r3			@ (b^c)&=(a^b)
    473 	add	r10,r10,r6			@ d+=h
    474 	eor	r12,r12,r8			@ Maj(a,b,c)
    475 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
    476 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
    477 #if __ARM_ARCH__>=7
    478 	@ ldr	r2,[r1],#4			@ 6
    479 # if 6==15
    480 	str	r1,[sp,#17*4]			@ make room for r1
    481 # endif
    482 	eor	r0,r10,r10,ror#5
    483 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    484 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    485 # ifndef __ARMEB__
    486 	rev	r2,r2
    487 # endif
    488 #else
    489 	@ ldrb	r2,[r1,#3]			@ 6
    490 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    491 	ldrb	r12,[r1,#2]
    492 	ldrb	r0,[r1,#1]
    493 	orr	r2,r2,r12,lsl#8
    494 	ldrb	r12,[r1],#4
    495 	orr	r2,r2,r0,lsl#16
    496 # if 6==15
    497 	str	r1,[sp,#17*4]			@ make room for r1
    498 # endif
    499 	eor	r0,r10,r10,ror#5
    500 	orr	r2,r2,r12,lsl#24
    501 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    502 #endif
    503 	ldr	r12,[r14],#4			@ *K256++
    504 	add	r5,r5,r2			@ h+=X[i]
    505 	str	r2,[sp,#6*4]
    506 	eor	r2,r11,r4
    507 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
    508 	and	r2,r2,r10
    509 	add	r5,r5,r12			@ h+=K256[i]
    510 	eor	r2,r2,r4			@ Ch(e,f,g)
    511 	eor	r0,r6,r6,ror#11
    512 	add	r5,r5,r2			@ h+=Ch(e,f,g)
    513 #if 6==31
    514 	and	r12,r12,#0xff
    515 	cmp	r12,#0xf2			@ done?
    516 #endif
    517 #if 6<15
    518 # if __ARM_ARCH__>=7
    519 	ldr	r2,[r1],#4			@ prefetch
    520 # else
    521 	ldrb	r2,[r1,#3]
    522 # endif
    523 	eor	r12,r6,r7			@ a^b, b^c in next round
    524 #else
    525 	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
    526 	eor	r12,r6,r7			@ a^b, b^c in next round
    527 	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
    528 #endif
    529 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
    530 	and	r3,r3,r12			@ (b^c)&=(a^b)
    531 	add	r9,r9,r5			@ d+=h
    532 	eor	r3,r3,r7			@ Maj(a,b,c)
    533 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
    534 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
    535 #if __ARM_ARCH__>=7
    536 	@ ldr	r2,[r1],#4			@ 7
    537 # if 7==15
    538 	str	r1,[sp,#17*4]			@ make room for r1
    539 # endif
    540 	eor	r0,r9,r9,ror#5
    541 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
    542 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
    543 # ifndef __ARMEB__
    544 	rev	r2,r2
    545 # endif
    546 #else
    547 	@ ldrb	r2,[r1,#3]			@ 7
    548 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
    549 	ldrb	r3,[r1,#2]
    550 	ldrb	r0,[r1,#1]
    551 	orr	r2,r2,r3,lsl#8
    552 	ldrb	r3,[r1],#4
    553 	orr	r2,r2,r0,lsl#16
    554 # if 7==15
    555 	str	r1,[sp,#17*4]			@ make room for r1
    556 # endif
    557 	eor	r0,r9,r9,ror#5
    558 	orr	r2,r2,r3,lsl#24
    559 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
    560 #endif
    561 	ldr	r3,[r14],#4			@ *K256++
    562 	add	r4,r4,r2			@ h+=X[i]
    563 	str	r2,[sp,#7*4]
    564 	eor	r2,r10,r11
    565 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
    566 	and	r2,r2,r9
    567 	add	r4,r4,r3			@ h+=K256[i]
    568 	eor	r2,r2,r11			@ Ch(e,f,g)
    569 	eor	r0,r5,r5,ror#11
    570 	add	r4,r4,r2			@ h+=Ch(e,f,g)
    571 #if 7==31
    572 	and	r3,r3,#0xff
    573 	cmp	r3,#0xf2			@ done?
    574 #endif
    575 #if 7<15
    576 # if __ARM_ARCH__>=7
    577 	ldr	r2,[r1],#4			@ prefetch
    578 # else
    579 	ldrb	r2,[r1,#3]
    580 # endif
    581 	eor	r3,r5,r6			@ a^b, b^c in next round
    582 #else
    583 	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
    584 	eor	r3,r5,r6			@ a^b, b^c in next round
    585 	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
    586 #endif
    587 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
    588 	and	r12,r12,r3			@ (b^c)&=(a^b)
    589 	add	r8,r8,r4			@ d+=h
    590 	eor	r12,r12,r6			@ Maj(a,b,c)
    591 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
    592 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
    593 #if __ARM_ARCH__>=7
    594 	@ ldr	r2,[r1],#4			@ 8
    595 # if 8==15
    596 	str	r1,[sp,#17*4]			@ make room for r1
    597 # endif
    598 	eor	r0,r8,r8,ror#5
    599 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    600 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    601 # ifndef __ARMEB__
    602 	rev	r2,r2
    603 # endif
    604 #else
    605 	@ ldrb	r2,[r1,#3]			@ 8
    606 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    607 	ldrb	r12,[r1,#2]
    608 	ldrb	r0,[r1,#1]
    609 	orr	r2,r2,r12,lsl#8
    610 	ldrb	r12,[r1],#4
    611 	orr	r2,r2,r0,lsl#16
    612 # if 8==15
    613 	str	r1,[sp,#17*4]			@ make room for r1
    614 # endif
    615 	eor	r0,r8,r8,ror#5
    616 	orr	r2,r2,r12,lsl#24
    617 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    618 #endif
    619 	ldr	r12,[r14],#4			@ *K256++
    620 	add	r11,r11,r2			@ h+=X[i]
    621 	str	r2,[sp,#8*4]
    622 	eor	r2,r9,r10
    623 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
    624 	and	r2,r2,r8
    625 	add	r11,r11,r12			@ h+=K256[i]
    626 	eor	r2,r2,r10			@ Ch(e,f,g)
    627 	eor	r0,r4,r4,ror#11
    628 	add	r11,r11,r2			@ h+=Ch(e,f,g)
    629 #if 8==31
    630 	and	r12,r12,#0xff
    631 	cmp	r12,#0xf2			@ done?
    632 #endif
    633 #if 8<15
    634 # if __ARM_ARCH__>=7
    635 	ldr	r2,[r1],#4			@ prefetch
    636 # else
    637 	ldrb	r2,[r1,#3]
    638 # endif
    639 	eor	r12,r4,r5			@ a^b, b^c in next round
    640 #else
    641 	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
    642 	eor	r12,r4,r5			@ a^b, b^c in next round
    643 	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
    644 #endif
    645 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
    646 	and	r3,r3,r12			@ (b^c)&=(a^b)
    647 	add	r7,r7,r11			@ d+=h
    648 	eor	r3,r3,r5			@ Maj(a,b,c)
    649 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
    650 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
    651 #if __ARM_ARCH__>=7
    652 	@ ldr	r2,[r1],#4			@ 9
    653 # if 9==15
    654 	str	r1,[sp,#17*4]			@ make room for r1
    655 # endif
    656 	eor	r0,r7,r7,ror#5
    657 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    658 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    659 # ifndef __ARMEB__
    660 	rev	r2,r2
    661 # endif
    662 #else
    663 	@ ldrb	r2,[r1,#3]			@ 9
    664 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    665 	ldrb	r3,[r1,#2]
    666 	ldrb	r0,[r1,#1]
    667 	orr	r2,r2,r3,lsl#8
    668 	ldrb	r3,[r1],#4
    669 	orr	r2,r2,r0,lsl#16
    670 # if 9==15
    671 	str	r1,[sp,#17*4]			@ make room for r1
    672 # endif
    673 	eor	r0,r7,r7,ror#5
    674 	orr	r2,r2,r3,lsl#24
    675 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    676 #endif
    677 	ldr	r3,[r14],#4			@ *K256++
    678 	add	r10,r10,r2			@ h+=X[i]
    679 	str	r2,[sp,#9*4]
    680 	eor	r2,r8,r9
    681 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
    682 	and	r2,r2,r7
    683 	add	r10,r10,r3			@ h+=K256[i]
    684 	eor	r2,r2,r9			@ Ch(e,f,g)
    685 	eor	r0,r11,r11,ror#11
    686 	add	r10,r10,r2			@ h+=Ch(e,f,g)
    687 #if 9==31
    688 	and	r3,r3,#0xff
    689 	cmp	r3,#0xf2			@ done?
    690 #endif
    691 #if 9<15
    692 # if __ARM_ARCH__>=7
    693 	ldr	r2,[r1],#4			@ prefetch
    694 # else
    695 	ldrb	r2,[r1,#3]
    696 # endif
    697 	eor	r3,r11,r4			@ a^b, b^c in next round
    698 #else
    699 	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
    700 	eor	r3,r11,r4			@ a^b, b^c in next round
    701 	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
    702 #endif
    703 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
    704 	and	r12,r12,r3			@ (b^c)&=(a^b)
    705 	add	r6,r6,r10			@ d+=h
    706 	eor	r12,r12,r4			@ Maj(a,b,c)
    707 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
    708 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
    709 #if __ARM_ARCH__>=7
    710 	@ ldr	r2,[r1],#4			@ 10
    711 # if 10==15
    712 	str	r1,[sp,#17*4]			@ make room for r1
    713 # endif
    714 	eor	r0,r6,r6,ror#5
    715 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    716 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    717 # ifndef __ARMEB__
    718 	rev	r2,r2
    719 # endif
    720 #else
    721 	@ ldrb	r2,[r1,#3]			@ 10
    722 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    723 	ldrb	r12,[r1,#2]
    724 	ldrb	r0,[r1,#1]
    725 	orr	r2,r2,r12,lsl#8
    726 	ldrb	r12,[r1],#4
    727 	orr	r2,r2,r0,lsl#16
    728 # if 10==15
    729 	str	r1,[sp,#17*4]			@ make room for r1
    730 # endif
    731 	eor	r0,r6,r6,ror#5
    732 	orr	r2,r2,r12,lsl#24
    733 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    734 #endif
    735 	ldr	r12,[r14],#4			@ *K256++
    736 	add	r9,r9,r2			@ h+=X[i]
    737 	str	r2,[sp,#10*4]
    738 	eor	r2,r7,r8
    739 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
    740 	and	r2,r2,r6
    741 	add	r9,r9,r12			@ h+=K256[i]
    742 	eor	r2,r2,r8			@ Ch(e,f,g)
    743 	eor	r0,r10,r10,ror#11
    744 	add	r9,r9,r2			@ h+=Ch(e,f,g)
    745 #if 10==31
    746 	and	r12,r12,#0xff
    747 	cmp	r12,#0xf2			@ done?
    748 #endif
    749 #if 10<15
    750 # if __ARM_ARCH__>=7
    751 	ldr	r2,[r1],#4			@ prefetch
    752 # else
    753 	ldrb	r2,[r1,#3]
    754 # endif
    755 	eor	r12,r10,r11			@ a^b, b^c in next round
    756 #else
    757 	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
    758 	eor	r12,r10,r11			@ a^b, b^c in next round
    759 	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
    760 #endif
    761 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
    762 	and	r3,r3,r12			@ (b^c)&=(a^b)
    763 	add	r5,r5,r9			@ d+=h
    764 	eor	r3,r3,r11			@ Maj(a,b,c)
    765 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
    766 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
    767 #if __ARM_ARCH__>=7
    768 	@ ldr	r2,[r1],#4			@ 11
    769 # if 11==15
    770 	str	r1,[sp,#17*4]			@ make room for r1
    771 # endif
    772 	eor	r0,r5,r5,ror#5
    773 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    774 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    775 # ifndef __ARMEB__
    776 	rev	r2,r2
    777 # endif
    778 #else
    779 	@ ldrb	r2,[r1,#3]			@ 11
    780 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    781 	ldrb	r3,[r1,#2]
    782 	ldrb	r0,[r1,#1]
    783 	orr	r2,r2,r3,lsl#8
    784 	ldrb	r3,[r1],#4
    785 	orr	r2,r2,r0,lsl#16
    786 # if 11==15
    787 	str	r1,[sp,#17*4]			@ make room for r1
    788 # endif
    789 	eor	r0,r5,r5,ror#5
    790 	orr	r2,r2,r3,lsl#24
    791 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    792 #endif
    793 	ldr	r3,[r14],#4			@ *K256++
    794 	add	r8,r8,r2			@ h+=X[i]
    795 	str	r2,[sp,#11*4]
    796 	eor	r2,r6,r7
    797 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
    798 	and	r2,r2,r5
    799 	add	r8,r8,r3			@ h+=K256[i]
    800 	eor	r2,r2,r7			@ Ch(e,f,g)
    801 	eor	r0,r9,r9,ror#11
    802 	add	r8,r8,r2			@ h+=Ch(e,f,g)
    803 #if 11==31
    804 	and	r3,r3,#0xff
    805 	cmp	r3,#0xf2			@ done?
    806 #endif
    807 #if 11<15
    808 # if __ARM_ARCH__>=7
    809 	ldr	r2,[r1],#4			@ prefetch
    810 # else
    811 	ldrb	r2,[r1,#3]
    812 # endif
    813 	eor	r3,r9,r10			@ a^b, b^c in next round
    814 #else
    815 	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
    816 	eor	r3,r9,r10			@ a^b, b^c in next round
    817 	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
    818 #endif
    819 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
    820 	and	r12,r12,r3			@ (b^c)&=(a^b)
    821 	add	r4,r4,r8			@ d+=h
    822 	eor	r12,r12,r10			@ Maj(a,b,c)
    823 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
    824 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
    825 #if __ARM_ARCH__>=7
    826 	@ ldr	r2,[r1],#4			@ 12
    827 # if 12==15
    828 	str	r1,[sp,#17*4]			@ make room for r1
    829 # endif
    830 	eor	r0,r4,r4,ror#5
    831 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    832 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    833 # ifndef __ARMEB__
    834 	rev	r2,r2
    835 # endif
    836 #else
    837 	@ ldrb	r2,[r1,#3]			@ 12
    838 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    839 	ldrb	r12,[r1,#2]
    840 	ldrb	r0,[r1,#1]
    841 	orr	r2,r2,r12,lsl#8
    842 	ldrb	r12,[r1],#4
    843 	orr	r2,r2,r0,lsl#16
    844 # if 12==15
    845 	str	r1,[sp,#17*4]			@ make room for r1
    846 # endif
    847 	eor	r0,r4,r4,ror#5
    848 	orr	r2,r2,r12,lsl#24
    849 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    850 #endif
    851 	ldr	r12,[r14],#4			@ *K256++
    852 	add	r7,r7,r2			@ h+=X[i]
    853 	str	r2,[sp,#12*4]
    854 	eor	r2,r5,r6
    855 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
    856 	and	r2,r2,r4
    857 	add	r7,r7,r12			@ h+=K256[i]
    858 	eor	r2,r2,r6			@ Ch(e,f,g)
    859 	eor	r0,r8,r8,ror#11
    860 	add	r7,r7,r2			@ h+=Ch(e,f,g)
    861 #if 12==31
    862 	and	r12,r12,#0xff
    863 	cmp	r12,#0xf2			@ done?
    864 #endif
    865 #if 12<15
    866 # if __ARM_ARCH__>=7
    867 	ldr	r2,[r1],#4			@ prefetch
    868 # else
    869 	ldrb	r2,[r1,#3]
    870 # endif
    871 	eor	r12,r8,r9			@ a^b, b^c in next round
    872 #else
    873 	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
    874 	eor	r12,r8,r9			@ a^b, b^c in next round
    875 	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
    876 #endif
    877 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
    878 	and	r3,r3,r12			@ (b^c)&=(a^b)
    879 	add	r11,r11,r7			@ d+=h
    880 	eor	r3,r3,r9			@ Maj(a,b,c)
    881 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
    882 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
    883 #if __ARM_ARCH__>=7
    884 	@ ldr	r2,[r1],#4			@ 13
    885 # if 13==15
    886 	str	r1,[sp,#17*4]			@ make room for r1
    887 # endif
    888 	eor	r0,r11,r11,ror#5
    889 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    890 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    891 # ifndef __ARMEB__
    892 	rev	r2,r2
    893 # endif
    894 #else
    895 	@ ldrb	r2,[r1,#3]			@ 13
    896 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    897 	ldrb	r3,[r1,#2]
    898 	ldrb	r0,[r1,#1]
    899 	orr	r2,r2,r3,lsl#8
    900 	ldrb	r3,[r1],#4
    901 	orr	r2,r2,r0,lsl#16
    902 # if 13==15
    903 	str	r1,[sp,#17*4]			@ make room for r1
    904 # endif
    905 	eor	r0,r11,r11,ror#5
    906 	orr	r2,r2,r3,lsl#24
    907 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    908 #endif
    909 	ldr	r3,[r14],#4			@ *K256++
    910 	add	r6,r6,r2			@ h+=X[i]
    911 	str	r2,[sp,#13*4]
    912 	eor	r2,r4,r5
    913 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
    914 	and	r2,r2,r11
    915 	add	r6,r6,r3			@ h+=K256[i]
    916 	eor	r2,r2,r5			@ Ch(e,f,g)
    917 	eor	r0,r7,r7,ror#11
    918 	add	r6,r6,r2			@ h+=Ch(e,f,g)
    919 #if 13==31
    920 	and	r3,r3,#0xff
    921 	cmp	r3,#0xf2			@ done?
    922 #endif
    923 #if 13<15
    924 # if __ARM_ARCH__>=7
    925 	ldr	r2,[r1],#4			@ prefetch
    926 # else
    927 	ldrb	r2,[r1,#3]
    928 # endif
    929 	eor	r3,r7,r8			@ a^b, b^c in next round
    930 #else
    931 	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
    932 	eor	r3,r7,r8			@ a^b, b^c in next round
    933 	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
    934 #endif
    935 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
    936 	and	r12,r12,r3			@ (b^c)&=(a^b)
    937 	add	r10,r10,r6			@ d+=h
    938 	eor	r12,r12,r8			@ Maj(a,b,c)
    939 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
    940 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
    941 #if __ARM_ARCH__>=7
    942 	@ ldr	r2,[r1],#4			@ 14
    943 # if 14==15
    944 	str	r1,[sp,#17*4]			@ make room for r1
    945 # endif
    946 	eor	r0,r10,r10,ror#5
    947 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    948 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    949 # ifndef __ARMEB__
    950 	rev	r2,r2
    951 # endif
    952 #else
    953 	@ ldrb	r2,[r1,#3]			@ 14
    954 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    955 	ldrb	r12,[r1,#2]
    956 	ldrb	r0,[r1,#1]
    957 	orr	r2,r2,r12,lsl#8
    958 	ldrb	r12,[r1],#4
    959 	orr	r2,r2,r0,lsl#16
    960 # if 14==15
    961 	str	r1,[sp,#17*4]			@ make room for r1
    962 # endif
    963 	eor	r0,r10,r10,ror#5
    964 	orr	r2,r2,r12,lsl#24
    965 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    966 #endif
    967 	ldr	r12,[r14],#4			@ *K256++
    968 	add	r5,r5,r2			@ h+=X[i]
    969 	str	r2,[sp,#14*4]
    970 	eor	r2,r11,r4
    971 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
    972 	and	r2,r2,r10
    973 	add	r5,r5,r12			@ h+=K256[i]
    974 	eor	r2,r2,r4			@ Ch(e,f,g)
    975 	eor	r0,r6,r6,ror#11
    976 	add	r5,r5,r2			@ h+=Ch(e,f,g)
    977 #if 14==31
    978 	and	r12,r12,#0xff
    979 	cmp	r12,#0xf2			@ done?
    980 #endif
    981 #if 14<15
    982 # if __ARM_ARCH__>=7
    983 	ldr	r2,[r1],#4			@ prefetch
    984 # else
    985 	ldrb	r2,[r1,#3]
    986 # endif
    987 	eor	r12,r6,r7			@ a^b, b^c in next round
    988 #else
    989 	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
    990 	eor	r12,r6,r7			@ a^b, b^c in next round
    991 	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
    992 #endif
    993 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
    994 	and	r3,r3,r12			@ (b^c)&=(a^b)
    995 	add	r9,r9,r5			@ d+=h
    996 	eor	r3,r3,r7			@ Maj(a,b,c)
    997 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
    998 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
    999 #if __ARM_ARCH__>=7
   1000 	@ ldr	r2,[r1],#4			@ 15
   1001 # if 15==15
   1002 	str	r1,[sp,#17*4]			@ make room for r1
   1003 # endif
   1004 	eor	r0,r9,r9,ror#5
   1005 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1006 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1007 # ifndef __ARMEB__
   1008 	rev	r2,r2
   1009 # endif
   1010 #else
   1011 	@ ldrb	r2,[r1,#3]			@ 15
   1012 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1013 	ldrb	r3,[r1,#2]
   1014 	ldrb	r0,[r1,#1]
   1015 	orr	r2,r2,r3,lsl#8
   1016 	ldrb	r3,[r1],#4
   1017 	orr	r2,r2,r0,lsl#16
   1018 # if 15==15
   1019 	str	r1,[sp,#17*4]			@ make room for r1
   1020 # endif
   1021 	eor	r0,r9,r9,ror#5
   1022 	orr	r2,r2,r3,lsl#24
   1023 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1024 #endif
   1025 	ldr	r3,[r14],#4			@ *K256++
   1026 	add	r4,r4,r2			@ h+=X[i]
   1027 	str	r2,[sp,#15*4]
   1028 	eor	r2,r10,r11
   1029 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
   1030 	and	r2,r2,r9
   1031 	add	r4,r4,r3			@ h+=K256[i]
   1032 	eor	r2,r2,r11			@ Ch(e,f,g)
   1033 	eor	r0,r5,r5,ror#11
   1034 	add	r4,r4,r2			@ h+=Ch(e,f,g)
   1035 #if 15==31
   1036 	and	r3,r3,#0xff
   1037 	cmp	r3,#0xf2			@ done?
   1038 #endif
   1039 #if 15<15
   1040 # if __ARM_ARCH__>=7
   1041 	ldr	r2,[r1],#4			@ prefetch
   1042 # else
   1043 	ldrb	r2,[r1,#3]
   1044 # endif
   1045 	eor	r3,r5,r6			@ a^b, b^c in next round
   1046 #else
   1047 	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
   1048 	eor	r3,r5,r6			@ a^b, b^c in next round
   1049 	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
   1050 #endif
   1051 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
   1052 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1053 	add	r8,r8,r4			@ d+=h
   1054 	eor	r12,r12,r6			@ Maj(a,b,c)
   1055 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
   1056 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
   1057 Lrounds_16_xx:
   1058 	@ ldr	r2,[sp,#1*4]		@ 16
   1059 	@ ldr	r1,[sp,#14*4]
   1060 	mov	r0,r2,ror#7
   1061 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
   1062 	mov	r12,r1,ror#17
   1063 	eor	r0,r0,r2,ror#18
   1064 	eor	r12,r12,r1,ror#19
   1065 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1066 	ldr	r2,[sp,#0*4]
   1067 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1068 	ldr	r1,[sp,#9*4]
   1069 
   1070 	add	r12,r12,r0
   1071 	eor	r0,r8,r8,ror#5	@ from BODY_00_15
   1072 	add	r2,r2,r12
   1073 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
   1074 	add	r2,r2,r1			@ X[i]
   1075 	ldr	r12,[r14],#4			@ *K256++
   1076 	add	r11,r11,r2			@ h+=X[i]
   1077 	str	r2,[sp,#0*4]
   1078 	eor	r2,r9,r10
   1079 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
   1080 	and	r2,r2,r8
   1081 	add	r11,r11,r12			@ h+=K256[i]
   1082 	eor	r2,r2,r10			@ Ch(e,f,g)
   1083 	eor	r0,r4,r4,ror#11
   1084 	add	r11,r11,r2			@ h+=Ch(e,f,g)
   1085 #if 16==31
   1086 	and	r12,r12,#0xff
   1087 	cmp	r12,#0xf2			@ done?
   1088 #endif
   1089 #if 16<15
   1090 # if __ARM_ARCH__>=7
   1091 	ldr	r2,[r1],#4			@ prefetch
   1092 # else
   1093 	ldrb	r2,[r1,#3]
   1094 # endif
   1095 	eor	r12,r4,r5			@ a^b, b^c in next round
   1096 #else
   1097 	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
   1098 	eor	r12,r4,r5			@ a^b, b^c in next round
   1099 	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
   1100 #endif
   1101 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
   1102 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1103 	add	r7,r7,r11			@ d+=h
   1104 	eor	r3,r3,r5			@ Maj(a,b,c)
   1105 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
   1106 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
   1107 	@ ldr	r2,[sp,#2*4]		@ 17
   1108 	@ ldr	r1,[sp,#15*4]
   1109 	mov	r0,r2,ror#7
   1110 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
   1111 	mov	r3,r1,ror#17
   1112 	eor	r0,r0,r2,ror#18
   1113 	eor	r3,r3,r1,ror#19
   1114 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1115 	ldr	r2,[sp,#1*4]
   1116 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1117 	ldr	r1,[sp,#10*4]
   1118 
   1119 	add	r3,r3,r0
   1120 	eor	r0,r7,r7,ror#5	@ from BODY_00_15
   1121 	add	r2,r2,r3
   1122 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
   1123 	add	r2,r2,r1			@ X[i]
   1124 	ldr	r3,[r14],#4			@ *K256++
   1125 	add	r10,r10,r2			@ h+=X[i]
   1126 	str	r2,[sp,#1*4]
   1127 	eor	r2,r8,r9
   1128 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
   1129 	and	r2,r2,r7
   1130 	add	r10,r10,r3			@ h+=K256[i]
   1131 	eor	r2,r2,r9			@ Ch(e,f,g)
   1132 	eor	r0,r11,r11,ror#11
   1133 	add	r10,r10,r2			@ h+=Ch(e,f,g)
   1134 #if 17==31
   1135 	and	r3,r3,#0xff
   1136 	cmp	r3,#0xf2			@ done?
   1137 #endif
   1138 #if 17<15
   1139 # if __ARM_ARCH__>=7
   1140 	ldr	r2,[r1],#4			@ prefetch
   1141 # else
   1142 	ldrb	r2,[r1,#3]
   1143 # endif
   1144 	eor	r3,r11,r4			@ a^b, b^c in next round
   1145 #else
   1146 	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
   1147 	eor	r3,r11,r4			@ a^b, b^c in next round
   1148 	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
   1149 #endif
   1150 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
   1151 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1152 	add	r6,r6,r10			@ d+=h
   1153 	eor	r12,r12,r4			@ Maj(a,b,c)
   1154 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
   1155 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
   1156 	@ ldr	r2,[sp,#3*4]		@ 18
   1157 	@ ldr	r1,[sp,#0*4]
   1158 	mov	r0,r2,ror#7
   1159 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
   1160 	mov	r12,r1,ror#17
   1161 	eor	r0,r0,r2,ror#18
   1162 	eor	r12,r12,r1,ror#19
   1163 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1164 	ldr	r2,[sp,#2*4]
   1165 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1166 	ldr	r1,[sp,#11*4]
   1167 
   1168 	add	r12,r12,r0
   1169 	eor	r0,r6,r6,ror#5	@ from BODY_00_15
   1170 	add	r2,r2,r12
   1171 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
   1172 	add	r2,r2,r1			@ X[i]
   1173 	ldr	r12,[r14],#4			@ *K256++
   1174 	add	r9,r9,r2			@ h+=X[i]
   1175 	str	r2,[sp,#2*4]
   1176 	eor	r2,r7,r8
   1177 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
   1178 	and	r2,r2,r6
   1179 	add	r9,r9,r12			@ h+=K256[i]
   1180 	eor	r2,r2,r8			@ Ch(e,f,g)
   1181 	eor	r0,r10,r10,ror#11
   1182 	add	r9,r9,r2			@ h+=Ch(e,f,g)
   1183 #if 18==31
   1184 	and	r12,r12,#0xff
   1185 	cmp	r12,#0xf2			@ done?
   1186 #endif
   1187 #if 18<15
   1188 # if __ARM_ARCH__>=7
   1189 	ldr	r2,[r1],#4			@ prefetch
   1190 # else
   1191 	ldrb	r2,[r1,#3]
   1192 # endif
   1193 	eor	r12,r10,r11			@ a^b, b^c in next round
   1194 #else
   1195 	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
   1196 	eor	r12,r10,r11			@ a^b, b^c in next round
   1197 	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
   1198 #endif
   1199 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
   1200 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1201 	add	r5,r5,r9			@ d+=h
   1202 	eor	r3,r3,r11			@ Maj(a,b,c)
   1203 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
   1204 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
   1205 	@ ldr	r2,[sp,#4*4]		@ 19
   1206 	@ ldr	r1,[sp,#1*4]
   1207 	mov	r0,r2,ror#7
   1208 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
   1209 	mov	r3,r1,ror#17
   1210 	eor	r0,r0,r2,ror#18
   1211 	eor	r3,r3,r1,ror#19
   1212 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1213 	ldr	r2,[sp,#3*4]
   1214 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1215 	ldr	r1,[sp,#12*4]
   1216 
   1217 	add	r3,r3,r0
   1218 	eor	r0,r5,r5,ror#5	@ from BODY_00_15
   1219 	add	r2,r2,r3
   1220 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
   1221 	add	r2,r2,r1			@ X[i]
   1222 	ldr	r3,[r14],#4			@ *K256++
   1223 	add	r8,r8,r2			@ h+=X[i]
   1224 	str	r2,[sp,#3*4]
   1225 	eor	r2,r6,r7
   1226 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
   1227 	and	r2,r2,r5
   1228 	add	r8,r8,r3			@ h+=K256[i]
   1229 	eor	r2,r2,r7			@ Ch(e,f,g)
   1230 	eor	r0,r9,r9,ror#11
   1231 	add	r8,r8,r2			@ h+=Ch(e,f,g)
   1232 #if 19==31
   1233 	and	r3,r3,#0xff
   1234 	cmp	r3,#0xf2			@ done?
   1235 #endif
   1236 #if 19<15
   1237 # if __ARM_ARCH__>=7
   1238 	ldr	r2,[r1],#4			@ prefetch
   1239 # else
   1240 	ldrb	r2,[r1,#3]
   1241 # endif
   1242 	eor	r3,r9,r10			@ a^b, b^c in next round
   1243 #else
   1244 	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
   1245 	eor	r3,r9,r10			@ a^b, b^c in next round
   1246 	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
   1247 #endif
   1248 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
   1249 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1250 	add	r4,r4,r8			@ d+=h
   1251 	eor	r12,r12,r10			@ Maj(a,b,c)
   1252 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
   1253 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
   1254 	@ ldr	r2,[sp,#5*4]		@ 20
   1255 	@ ldr	r1,[sp,#2*4]
   1256 	mov	r0,r2,ror#7
   1257 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
   1258 	mov	r12,r1,ror#17
   1259 	eor	r0,r0,r2,ror#18
   1260 	eor	r12,r12,r1,ror#19
   1261 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1262 	ldr	r2,[sp,#4*4]
   1263 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1264 	ldr	r1,[sp,#13*4]
   1265 
   1266 	add	r12,r12,r0
   1267 	eor	r0,r4,r4,ror#5	@ from BODY_00_15
   1268 	add	r2,r2,r12
   1269 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
   1270 	add	r2,r2,r1			@ X[i]
   1271 	ldr	r12,[r14],#4			@ *K256++
   1272 	add	r7,r7,r2			@ h+=X[i]
   1273 	str	r2,[sp,#4*4]
   1274 	eor	r2,r5,r6
   1275 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
   1276 	and	r2,r2,r4
   1277 	add	r7,r7,r12			@ h+=K256[i]
   1278 	eor	r2,r2,r6			@ Ch(e,f,g)
   1279 	eor	r0,r8,r8,ror#11
   1280 	add	r7,r7,r2			@ h+=Ch(e,f,g)
   1281 #if 20==31
   1282 	and	r12,r12,#0xff
   1283 	cmp	r12,#0xf2			@ done?
   1284 #endif
   1285 #if 20<15
   1286 # if __ARM_ARCH__>=7
   1287 	ldr	r2,[r1],#4			@ prefetch
   1288 # else
   1289 	ldrb	r2,[r1,#3]
   1290 # endif
   1291 	eor	r12,r8,r9			@ a^b, b^c in next round
   1292 #else
   1293 	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
   1294 	eor	r12,r8,r9			@ a^b, b^c in next round
   1295 	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
   1296 #endif
   1297 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
   1298 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1299 	add	r11,r11,r7			@ d+=h
   1300 	eor	r3,r3,r9			@ Maj(a,b,c)
   1301 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
   1302 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
   1303 	@ ldr	r2,[sp,#6*4]		@ 21
   1304 	@ ldr	r1,[sp,#3*4]
   1305 	mov	r0,r2,ror#7
   1306 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
   1307 	mov	r3,r1,ror#17
   1308 	eor	r0,r0,r2,ror#18
   1309 	eor	r3,r3,r1,ror#19
   1310 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1311 	ldr	r2,[sp,#5*4]
   1312 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1313 	ldr	r1,[sp,#14*4]
   1314 
   1315 	add	r3,r3,r0
   1316 	eor	r0,r11,r11,ror#5	@ from BODY_00_15
   1317 	add	r2,r2,r3
   1318 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
   1319 	add	r2,r2,r1			@ X[i]
   1320 	ldr	r3,[r14],#4			@ *K256++
   1321 	add	r6,r6,r2			@ h+=X[i]
   1322 	str	r2,[sp,#5*4]
   1323 	eor	r2,r4,r5
   1324 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
   1325 	and	r2,r2,r11
   1326 	add	r6,r6,r3			@ h+=K256[i]
   1327 	eor	r2,r2,r5			@ Ch(e,f,g)
   1328 	eor	r0,r7,r7,ror#11
   1329 	add	r6,r6,r2			@ h+=Ch(e,f,g)
   1330 #if 21==31
   1331 	and	r3,r3,#0xff
   1332 	cmp	r3,#0xf2			@ done?
   1333 #endif
   1334 #if 21<15
   1335 # if __ARM_ARCH__>=7
   1336 	ldr	r2,[r1],#4			@ prefetch
   1337 # else
   1338 	ldrb	r2,[r1,#3]
   1339 # endif
   1340 	eor	r3,r7,r8			@ a^b, b^c in next round
   1341 #else
   1342 	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
   1343 	eor	r3,r7,r8			@ a^b, b^c in next round
   1344 	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
   1345 #endif
   1346 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
   1347 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1348 	add	r10,r10,r6			@ d+=h
   1349 	eor	r12,r12,r8			@ Maj(a,b,c)
   1350 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
   1351 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
   1352 	@ ldr	r2,[sp,#7*4]		@ 22
   1353 	@ ldr	r1,[sp,#4*4]
   1354 	mov	r0,r2,ror#7
   1355 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
   1356 	mov	r12,r1,ror#17
   1357 	eor	r0,r0,r2,ror#18
   1358 	eor	r12,r12,r1,ror#19
   1359 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1360 	ldr	r2,[sp,#6*4]
   1361 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1362 	ldr	r1,[sp,#15*4]
   1363 
   1364 	add	r12,r12,r0
   1365 	eor	r0,r10,r10,ror#5	@ from BODY_00_15
   1366 	add	r2,r2,r12
   1367 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
   1368 	add	r2,r2,r1			@ X[i]
   1369 	ldr	r12,[r14],#4			@ *K256++
   1370 	add	r5,r5,r2			@ h+=X[i]
   1371 	str	r2,[sp,#6*4]
   1372 	eor	r2,r11,r4
   1373 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
   1374 	and	r2,r2,r10
   1375 	add	r5,r5,r12			@ h+=K256[i]
   1376 	eor	r2,r2,r4			@ Ch(e,f,g)
   1377 	eor	r0,r6,r6,ror#11
   1378 	add	r5,r5,r2			@ h+=Ch(e,f,g)
   1379 #if 22==31
   1380 	and	r12,r12,#0xff
   1381 	cmp	r12,#0xf2			@ done?
   1382 #endif
   1383 #if 22<15
   1384 # if __ARM_ARCH__>=7
   1385 	ldr	r2,[r1],#4			@ prefetch
   1386 # else
   1387 	ldrb	r2,[r1,#3]
   1388 # endif
   1389 	eor	r12,r6,r7			@ a^b, b^c in next round
   1390 #else
   1391 	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
   1392 	eor	r12,r6,r7			@ a^b, b^c in next round
   1393 	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
   1394 #endif
   1395 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
   1396 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1397 	add	r9,r9,r5			@ d+=h
   1398 	eor	r3,r3,r7			@ Maj(a,b,c)
   1399 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
   1400 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
   1401 	@ ldr	r2,[sp,#8*4]		@ 23
   1402 	@ ldr	r1,[sp,#5*4]
   1403 	mov	r0,r2,ror#7
   1404 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1405 	mov	r3,r1,ror#17
   1406 	eor	r0,r0,r2,ror#18
   1407 	eor	r3,r3,r1,ror#19
   1408 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1409 	ldr	r2,[sp,#7*4]
   1410 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1411 	ldr	r1,[sp,#0*4]
   1412 
   1413 	add	r3,r3,r0
   1414 	eor	r0,r9,r9,ror#5	@ from BODY_00_15
   1415 	add	r2,r2,r3
   1416 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1417 	add	r2,r2,r1			@ X[i]
   1418 	ldr	r3,[r14],#4			@ *K256++
   1419 	add	r4,r4,r2			@ h+=X[i]
   1420 	str	r2,[sp,#7*4]
   1421 	eor	r2,r10,r11
   1422 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
   1423 	and	r2,r2,r9
   1424 	add	r4,r4,r3			@ h+=K256[i]
   1425 	eor	r2,r2,r11			@ Ch(e,f,g)
   1426 	eor	r0,r5,r5,ror#11
   1427 	add	r4,r4,r2			@ h+=Ch(e,f,g)
   1428 #if 23==31
   1429 	and	r3,r3,#0xff
   1430 	cmp	r3,#0xf2			@ done?
   1431 #endif
   1432 #if 23<15
   1433 # if __ARM_ARCH__>=7
   1434 	ldr	r2,[r1],#4			@ prefetch
   1435 # else
   1436 	ldrb	r2,[r1,#3]
   1437 # endif
   1438 	eor	r3,r5,r6			@ a^b, b^c in next round
   1439 #else
   1440 	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
   1441 	eor	r3,r5,r6			@ a^b, b^c in next round
   1442 	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
   1443 #endif
   1444 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
   1445 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1446 	add	r8,r8,r4			@ d+=h
   1447 	eor	r12,r12,r6			@ Maj(a,b,c)
   1448 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
   1449 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
   1450 	@ ldr	r2,[sp,#9*4]		@ 24
   1451 	@ ldr	r1,[sp,#6*4]
   1452 	mov	r0,r2,ror#7
   1453 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
   1454 	mov	r12,r1,ror#17
   1455 	eor	r0,r0,r2,ror#18
   1456 	eor	r12,r12,r1,ror#19
   1457 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1458 	ldr	r2,[sp,#8*4]
   1459 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1460 	ldr	r1,[sp,#1*4]
   1461 
   1462 	add	r12,r12,r0
   1463 	eor	r0,r8,r8,ror#5	@ from BODY_00_15
   1464 	add	r2,r2,r12
   1465 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
   1466 	add	r2,r2,r1			@ X[i]
   1467 	ldr	r12,[r14],#4			@ *K256++
   1468 	add	r11,r11,r2			@ h+=X[i]
   1469 	str	r2,[sp,#8*4]
   1470 	eor	r2,r9,r10
   1471 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
   1472 	and	r2,r2,r8
   1473 	add	r11,r11,r12			@ h+=K256[i]
   1474 	eor	r2,r2,r10			@ Ch(e,f,g)
   1475 	eor	r0,r4,r4,ror#11
   1476 	add	r11,r11,r2			@ h+=Ch(e,f,g)
   1477 #if 24==31
   1478 	and	r12,r12,#0xff
   1479 	cmp	r12,#0xf2			@ done?
   1480 #endif
   1481 #if 24<15
   1482 # if __ARM_ARCH__>=7
   1483 	ldr	r2,[r1],#4			@ prefetch
   1484 # else
   1485 	ldrb	r2,[r1,#3]
   1486 # endif
   1487 	eor	r12,r4,r5			@ a^b, b^c in next round
   1488 #else
   1489 	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
   1490 	eor	r12,r4,r5			@ a^b, b^c in next round
   1491 	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
   1492 #endif
   1493 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
   1494 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1495 	add	r7,r7,r11			@ d+=h
   1496 	eor	r3,r3,r5			@ Maj(a,b,c)
   1497 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
   1498 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
   1499 	@ ldr	r2,[sp,#10*4]		@ 25
   1500 	@ ldr	r1,[sp,#7*4]
   1501 	mov	r0,r2,ror#7
   1502 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
   1503 	mov	r3,r1,ror#17
   1504 	eor	r0,r0,r2,ror#18
   1505 	eor	r3,r3,r1,ror#19
   1506 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1507 	ldr	r2,[sp,#9*4]
   1508 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1509 	ldr	r1,[sp,#2*4]
   1510 
   1511 	add	r3,r3,r0
   1512 	eor	r0,r7,r7,ror#5	@ from BODY_00_15
   1513 	add	r2,r2,r3
   1514 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
   1515 	add	r2,r2,r1			@ X[i]
   1516 	ldr	r3,[r14],#4			@ *K256++
   1517 	add	r10,r10,r2			@ h+=X[i]
   1518 	str	r2,[sp,#9*4]
   1519 	eor	r2,r8,r9
   1520 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
   1521 	and	r2,r2,r7
   1522 	add	r10,r10,r3			@ h+=K256[i]
   1523 	eor	r2,r2,r9			@ Ch(e,f,g)
   1524 	eor	r0,r11,r11,ror#11
   1525 	add	r10,r10,r2			@ h+=Ch(e,f,g)
   1526 #if 25==31
   1527 	and	r3,r3,#0xff
   1528 	cmp	r3,#0xf2			@ done?
   1529 #endif
   1530 #if 25<15
   1531 # if __ARM_ARCH__>=7
   1532 	ldr	r2,[r1],#4			@ prefetch
   1533 # else
   1534 	ldrb	r2,[r1,#3]
   1535 # endif
   1536 	eor	r3,r11,r4			@ a^b, b^c in next round
   1537 #else
   1538 	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
   1539 	eor	r3,r11,r4			@ a^b, b^c in next round
   1540 	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
   1541 #endif
   1542 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
   1543 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1544 	add	r6,r6,r10			@ d+=h
   1545 	eor	r12,r12,r4			@ Maj(a,b,c)
   1546 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
   1547 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
   1548 	@ ldr	r2,[sp,#11*4]		@ 26
   1549 	@ ldr	r1,[sp,#8*4]
   1550 	mov	r0,r2,ror#7
   1551 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
   1552 	mov	r12,r1,ror#17
   1553 	eor	r0,r0,r2,ror#18
   1554 	eor	r12,r12,r1,ror#19
   1555 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1556 	ldr	r2,[sp,#10*4]
   1557 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1558 	ldr	r1,[sp,#3*4]
   1559 
   1560 	add	r12,r12,r0
   1561 	eor	r0,r6,r6,ror#5	@ from BODY_00_15
   1562 	add	r2,r2,r12
   1563 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
   1564 	add	r2,r2,r1			@ X[i]
   1565 	ldr	r12,[r14],#4			@ *K256++
   1566 	add	r9,r9,r2			@ h+=X[i]
   1567 	str	r2,[sp,#10*4]
   1568 	eor	r2,r7,r8
   1569 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
   1570 	and	r2,r2,r6
   1571 	add	r9,r9,r12			@ h+=K256[i]
   1572 	eor	r2,r2,r8			@ Ch(e,f,g)
   1573 	eor	r0,r10,r10,ror#11
   1574 	add	r9,r9,r2			@ h+=Ch(e,f,g)
   1575 #if 26==31
   1576 	and	r12,r12,#0xff
   1577 	cmp	r12,#0xf2			@ done?
   1578 #endif
   1579 #if 26<15
   1580 # if __ARM_ARCH__>=7
   1581 	ldr	r2,[r1],#4			@ prefetch
   1582 # else
   1583 	ldrb	r2,[r1,#3]
   1584 # endif
   1585 	eor	r12,r10,r11			@ a^b, b^c in next round
   1586 #else
   1587 	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
   1588 	eor	r12,r10,r11			@ a^b, b^c in next round
   1589 	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
   1590 #endif
   1591 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
   1592 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1593 	add	r5,r5,r9			@ d+=h
   1594 	eor	r3,r3,r11			@ Maj(a,b,c)
   1595 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
   1596 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
   1597 	@ ldr	r2,[sp,#12*4]		@ 27
   1598 	@ ldr	r1,[sp,#9*4]
   1599 	mov	r0,r2,ror#7
   1600 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
   1601 	mov	r3,r1,ror#17
   1602 	eor	r0,r0,r2,ror#18
   1603 	eor	r3,r3,r1,ror#19
   1604 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1605 	ldr	r2,[sp,#11*4]
   1606 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1607 	ldr	r1,[sp,#4*4]
   1608 
   1609 	add	r3,r3,r0
   1610 	eor	r0,r5,r5,ror#5	@ from BODY_00_15
   1611 	add	r2,r2,r3
   1612 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
   1613 	add	r2,r2,r1			@ X[i]
   1614 	ldr	r3,[r14],#4			@ *K256++
   1615 	add	r8,r8,r2			@ h+=X[i]
   1616 	str	r2,[sp,#11*4]
   1617 	eor	r2,r6,r7
   1618 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
   1619 	and	r2,r2,r5
   1620 	add	r8,r8,r3			@ h+=K256[i]
   1621 	eor	r2,r2,r7			@ Ch(e,f,g)
   1622 	eor	r0,r9,r9,ror#11
   1623 	add	r8,r8,r2			@ h+=Ch(e,f,g)
   1624 #if 27==31
   1625 	and	r3,r3,#0xff
   1626 	cmp	r3,#0xf2			@ done?
   1627 #endif
   1628 #if 27<15
   1629 # if __ARM_ARCH__>=7
   1630 	ldr	r2,[r1],#4			@ prefetch
   1631 # else
   1632 	ldrb	r2,[r1,#3]
   1633 # endif
   1634 	eor	r3,r9,r10			@ a^b, b^c in next round
   1635 #else
   1636 	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
   1637 	eor	r3,r9,r10			@ a^b, b^c in next round
   1638 	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
   1639 #endif
   1640 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
   1641 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1642 	add	r4,r4,r8			@ d+=h
   1643 	eor	r12,r12,r10			@ Maj(a,b,c)
   1644 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
   1645 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
   1646 	@ ldr	r2,[sp,#13*4]		@ 28
   1647 	@ ldr	r1,[sp,#10*4]
   1648 	mov	r0,r2,ror#7
   1649 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
   1650 	mov	r12,r1,ror#17
   1651 	eor	r0,r0,r2,ror#18
   1652 	eor	r12,r12,r1,ror#19
   1653 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1654 	ldr	r2,[sp,#12*4]
   1655 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1656 	ldr	r1,[sp,#5*4]
   1657 
   1658 	add	r12,r12,r0
   1659 	eor	r0,r4,r4,ror#5	@ from BODY_00_15
   1660 	add	r2,r2,r12
   1661 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
   1662 	add	r2,r2,r1			@ X[i]
   1663 	ldr	r12,[r14],#4			@ *K256++
   1664 	add	r7,r7,r2			@ h+=X[i]
   1665 	str	r2,[sp,#12*4]
   1666 	eor	r2,r5,r6
   1667 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
   1668 	and	r2,r2,r4
   1669 	add	r7,r7,r12			@ h+=K256[i]
   1670 	eor	r2,r2,r6			@ Ch(e,f,g)
   1671 	eor	r0,r8,r8,ror#11
   1672 	add	r7,r7,r2			@ h+=Ch(e,f,g)
   1673 #if 28==31
   1674 	and	r12,r12,#0xff
   1675 	cmp	r12,#0xf2			@ done?
   1676 #endif
   1677 #if 28<15
   1678 # if __ARM_ARCH__>=7
   1679 	ldr	r2,[r1],#4			@ prefetch
   1680 # else
   1681 	ldrb	r2,[r1,#3]
   1682 # endif
   1683 	eor	r12,r8,r9			@ a^b, b^c in next round
   1684 #else
   1685 	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
   1686 	eor	r12,r8,r9			@ a^b, b^c in next round
   1687 	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
   1688 #endif
   1689 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
   1690 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1691 	add	r11,r11,r7			@ d+=h
   1692 	eor	r3,r3,r9			@ Maj(a,b,c)
   1693 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
   1694 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
   1695 	@ ldr	r2,[sp,#14*4]		@ 29
   1696 	@ ldr	r1,[sp,#11*4]
   1697 	mov	r0,r2,ror#7
   1698 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
   1699 	mov	r3,r1,ror#17
   1700 	eor	r0,r0,r2,ror#18
   1701 	eor	r3,r3,r1,ror#19
   1702 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1703 	ldr	r2,[sp,#13*4]
   1704 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1705 	ldr	r1,[sp,#6*4]
   1706 
   1707 	add	r3,r3,r0
   1708 	eor	r0,r11,r11,ror#5	@ from BODY_00_15
   1709 	add	r2,r2,r3
   1710 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
   1711 	add	r2,r2,r1			@ X[i]
   1712 	ldr	r3,[r14],#4			@ *K256++
   1713 	add	r6,r6,r2			@ h+=X[i]
   1714 	str	r2,[sp,#13*4]
   1715 	eor	r2,r4,r5
   1716 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
   1717 	and	r2,r2,r11
   1718 	add	r6,r6,r3			@ h+=K256[i]
   1719 	eor	r2,r2,r5			@ Ch(e,f,g)
   1720 	eor	r0,r7,r7,ror#11
   1721 	add	r6,r6,r2			@ h+=Ch(e,f,g)
   1722 #if 29==31
   1723 	and	r3,r3,#0xff
   1724 	cmp	r3,#0xf2			@ done?
   1725 #endif
   1726 #if 29<15
   1727 # if __ARM_ARCH__>=7
   1728 	ldr	r2,[r1],#4			@ prefetch
   1729 # else
   1730 	ldrb	r2,[r1,#3]
   1731 # endif
   1732 	eor	r3,r7,r8			@ a^b, b^c in next round
   1733 #else
   1734 	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
   1735 	eor	r3,r7,r8			@ a^b, b^c in next round
   1736 	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
   1737 #endif
   1738 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
   1739 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1740 	add	r10,r10,r6			@ d+=h
   1741 	eor	r12,r12,r8			@ Maj(a,b,c)
   1742 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
   1743 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
   1744 	@ ldr	r2,[sp,#15*4]		@ 30
   1745 	@ ldr	r1,[sp,#12*4]
   1746 	mov	r0,r2,ror#7
   1747 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
   1748 	mov	r12,r1,ror#17
   1749 	eor	r0,r0,r2,ror#18
   1750 	eor	r12,r12,r1,ror#19
   1751 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1752 	ldr	r2,[sp,#14*4]
   1753 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1754 	ldr	r1,[sp,#7*4]
   1755 
   1756 	add	r12,r12,r0
   1757 	eor	r0,r10,r10,ror#5	@ from BODY_00_15
   1758 	add	r2,r2,r12
   1759 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
   1760 	add	r2,r2,r1			@ X[i]
   1761 	ldr	r12,[r14],#4			@ *K256++
   1762 	add	r5,r5,r2			@ h+=X[i]
   1763 	str	r2,[sp,#14*4]
   1764 	eor	r2,r11,r4
   1765 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
   1766 	and	r2,r2,r10
   1767 	add	r5,r5,r12			@ h+=K256[i]
   1768 	eor	r2,r2,r4			@ Ch(e,f,g)
   1769 	eor	r0,r6,r6,ror#11
   1770 	add	r5,r5,r2			@ h+=Ch(e,f,g)
   1771 #if 30==31
   1772 	and	r12,r12,#0xff
   1773 	cmp	r12,#0xf2			@ done?
   1774 #endif
   1775 #if 30<15
   1776 # if __ARM_ARCH__>=7
   1777 	ldr	r2,[r1],#4			@ prefetch
   1778 # else
   1779 	ldrb	r2,[r1,#3]
   1780 # endif
   1781 	eor	r12,r6,r7			@ a^b, b^c in next round
   1782 #else
   1783 	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
   1784 	eor	r12,r6,r7			@ a^b, b^c in next round
   1785 	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
   1786 #endif
   1787 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
   1788 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1789 	add	r9,r9,r5			@ d+=h
   1790 	eor	r3,r3,r7			@ Maj(a,b,c)
   1791 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
   1792 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
   1793 	@ ldr	r2,[sp,#0*4]		@ 31
   1794 	@ ldr	r1,[sp,#13*4]
   1795 	mov	r0,r2,ror#7
   1796 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1797 	mov	r3,r1,ror#17
   1798 	eor	r0,r0,r2,ror#18
   1799 	eor	r3,r3,r1,ror#19
   1800 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1801 	ldr	r2,[sp,#15*4]
   1802 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1803 	ldr	r1,[sp,#8*4]
   1804 
   1805 	add	r3,r3,r0
   1806 	eor	r0,r9,r9,ror#5	@ from BODY_00_15
   1807 	add	r2,r2,r3
   1808 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1809 	add	r2,r2,r1			@ X[i]
   1810 	ldr	r3,[r14],#4			@ *K256++
   1811 	add	r4,r4,r2			@ h+=X[i]
   1812 	str	r2,[sp,#15*4]
   1813 	eor	r2,r10,r11
   1814 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
   1815 	and	r2,r2,r9
   1816 	add	r4,r4,r3			@ h+=K256[i]
   1817 	eor	r2,r2,r11			@ Ch(e,f,g)
   1818 	eor	r0,r5,r5,ror#11
   1819 	add	r4,r4,r2			@ h+=Ch(e,f,g)
   1820 #if 31==31
   1821 	and	r3,r3,#0xff
   1822 	cmp	r3,#0xf2			@ done?
   1823 #endif
   1824 #if 31<15
   1825 # if __ARM_ARCH__>=7
   1826 	ldr	r2,[r1],#4			@ prefetch
   1827 # else
   1828 	ldrb	r2,[r1,#3]
   1829 # endif
   1830 	eor	r3,r5,r6			@ a^b, b^c in next round
   1831 #else
   1832 	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
   1833 	eor	r3,r5,r6			@ a^b, b^c in next round
   1834 	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
   1835 #endif
   1836 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
   1837 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1838 	add	r8,r8,r4			@ d+=h
   1839 	eor	r12,r12,r6			@ Maj(a,b,c)
   1840 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
   1841 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
   1842 #if __ARM_ARCH__>=7
   1843 	ite	eq			@ Thumb2 thing, sanity check in ARM
   1844 #endif
   1845 	ldreq	r3,[sp,#16*4]		@ pull ctx
   1846 	bne	Lrounds_16_xx
   1847 
   1848 	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
   1849 	ldr	r0,[r3,#0]
   1850 	ldr	r2,[r3,#4]
   1851 	ldr	r12,[r3,#8]
   1852 	add	r4,r4,r0
   1853 	ldr	r0,[r3,#12]
   1854 	add	r5,r5,r2
   1855 	ldr	r2,[r3,#16]
   1856 	add	r6,r6,r12
   1857 	ldr	r12,[r3,#20]
   1858 	add	r7,r7,r0
   1859 	ldr	r0,[r3,#24]
   1860 	add	r8,r8,r2
   1861 	ldr	r2,[r3,#28]
   1862 	add	r9,r9,r12
   1863 	ldr	r1,[sp,#17*4]		@ pull inp
   1864 	ldr	r12,[sp,#18*4]		@ pull inp+len
   1865 	add	r10,r10,r0
   1866 	add	r11,r11,r2
   1867 	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
   1868 	cmp	r1,r12
   1869 	sub	r14,r14,#256	@ rewind Ktbl
   1870 	bne	Loop
   1871 
   1872 	add	sp,sp,#19*4	@ destroy frame
   1873 #if __ARM_ARCH__>=5
   1874 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
   1875 #else
   1876 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
   1877 	tst	lr,#1
   1878 	moveq	pc,lr			@ be binary compatible with V4, yet
   1879 .word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   1880 #endif
   1881 
   1882 #if __ARM_MAX_ARCH__>=7
   1883 
   1884 
   1885 
   1886 .globl	_sha256_block_data_order_neon
   1887 .private_extern	_sha256_block_data_order_neon
   1888 #ifdef __thumb2__
   1889 .thumb_func	_sha256_block_data_order_neon
   1890 #endif
   1891 .align	5
   1892 .skip	16
   1893 _sha256_block_data_order_neon:
   1894 LNEON:
   1895 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
   1896 
   1897 	sub	r11,sp,#16*4+16
   1898 	adr	r14,K256
   1899 	bic	r11,r11,#15		@ align for 128-bit stores
   1900 	mov	r12,sp
   1901 	mov	sp,r11			@ alloca
   1902 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
   1903 
   1904 	vld1.8	{q0},[r1]!
   1905 	vld1.8	{q1},[r1]!
   1906 	vld1.8	{q2},[r1]!
   1907 	vld1.8	{q3},[r1]!
   1908 	vld1.32	{q8},[r14,:128]!
   1909 	vld1.32	{q9},[r14,:128]!
   1910 	vld1.32	{q10},[r14,:128]!
   1911 	vld1.32	{q11},[r14,:128]!
   1912 	vrev32.8	q0,q0		@ yes, even on
   1913 	str	r0,[sp,#64]
   1914 	vrev32.8	q1,q1		@ big-endian
   1915 	str	r1,[sp,#68]
   1916 	mov	r1,sp
   1917 	vrev32.8	q2,q2
   1918 	str	r2,[sp,#72]
   1919 	vrev32.8	q3,q3
   1920 	str	r12,[sp,#76]		@ save original sp
   1921 	vadd.i32	q8,q8,q0
   1922 	vadd.i32	q9,q9,q1
   1923 	vst1.32	{q8},[r1,:128]!
   1924 	vadd.i32	q10,q10,q2
   1925 	vst1.32	{q9},[r1,:128]!
   1926 	vadd.i32	q11,q11,q3
   1927 	vst1.32	{q10},[r1,:128]!
   1928 	vst1.32	{q11},[r1,:128]!
   1929 
   1930 	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
   1931 	sub	r1,r1,#64
   1932 	ldr	r2,[sp,#0]
   1933 	eor	r12,r12,r12
   1934 	eor	r3,r5,r6
   1935 	b	L_00_48
   1936 
   1937 .align	4
   1938 L_00_48:
   1939 	vext.8	q8,q0,q1,#4
   1940 	add	r11,r11,r2
   1941 	eor	r2,r9,r10
   1942 	eor	r0,r8,r8,ror#5
   1943 	vext.8	q9,q2,q3,#4
   1944 	add	r4,r4,r12
   1945 	and	r2,r2,r8
   1946 	eor	r12,r0,r8,ror#19
   1947 	vshr.u32	q10,q8,#7
   1948 	eor	r0,r4,r4,ror#11
   1949 	eor	r2,r2,r10
   1950 	vadd.i32	q0,q0,q9
   1951 	add	r11,r11,r12,ror#6
   1952 	eor	r12,r4,r5
   1953 	vshr.u32	q9,q8,#3
   1954 	eor	r0,r0,r4,ror#20
   1955 	add	r11,r11,r2
   1956 	vsli.32	q10,q8,#25
   1957 	ldr	r2,[sp,#4]
   1958 	and	r3,r3,r12
   1959 	vshr.u32	q11,q8,#18
   1960 	add	r7,r7,r11
   1961 	add	r11,r11,r0,ror#2
   1962 	eor	r3,r3,r5
   1963 	veor	q9,q9,q10
   1964 	add	r10,r10,r2
   1965 	vsli.32	q11,q8,#14
   1966 	eor	r2,r8,r9
   1967 	eor	r0,r7,r7,ror#5
   1968 	vshr.u32	d24,d7,#17
   1969 	add	r11,r11,r3
   1970 	and	r2,r2,r7
   1971 	veor	q9,q9,q11
   1972 	eor	r3,r0,r7,ror#19
   1973 	eor	r0,r11,r11,ror#11
   1974 	vsli.32	d24,d7,#15
   1975 	eor	r2,r2,r9
   1976 	add	r10,r10,r3,ror#6
   1977 	vshr.u32	d25,d7,#10
   1978 	eor	r3,r11,r4
   1979 	eor	r0,r0,r11,ror#20
   1980 	vadd.i32	q0,q0,q9
   1981 	add	r10,r10,r2
   1982 	ldr	r2,[sp,#8]
   1983 	veor	d25,d25,d24
   1984 	and	r12,r12,r3
   1985 	add	r6,r6,r10
   1986 	vshr.u32	d24,d7,#19
   1987 	add	r10,r10,r0,ror#2
   1988 	eor	r12,r12,r4
   1989 	vsli.32	d24,d7,#13
   1990 	add	r9,r9,r2
   1991 	eor	r2,r7,r8
   1992 	veor	d25,d25,d24
   1993 	eor	r0,r6,r6,ror#5
   1994 	add	r10,r10,r12
   1995 	vadd.i32	d0,d0,d25
   1996 	and	r2,r2,r6
   1997 	eor	r12,r0,r6,ror#19
   1998 	vshr.u32	d24,d0,#17
   1999 	eor	r0,r10,r10,ror#11
   2000 	eor	r2,r2,r8
   2001 	vsli.32	d24,d0,#15
   2002 	add	r9,r9,r12,ror#6
   2003 	eor	r12,r10,r11
   2004 	vshr.u32	d25,d0,#10
   2005 	eor	r0,r0,r10,ror#20
   2006 	add	r9,r9,r2
   2007 	veor	d25,d25,d24
   2008 	ldr	r2,[sp,#12]
   2009 	and	r3,r3,r12
   2010 	vshr.u32	d24,d0,#19
   2011 	add	r5,r5,r9
   2012 	add	r9,r9,r0,ror#2
   2013 	eor	r3,r3,r11
   2014 	vld1.32	{q8},[r14,:128]!
   2015 	add	r8,r8,r2
   2016 	vsli.32	d24,d0,#13
   2017 	eor	r2,r6,r7
   2018 	eor	r0,r5,r5,ror#5
   2019 	veor	d25,d25,d24
   2020 	add	r9,r9,r3
   2021 	and	r2,r2,r5
   2022 	vadd.i32	d1,d1,d25
   2023 	eor	r3,r0,r5,ror#19
   2024 	eor	r0,r9,r9,ror#11
   2025 	vadd.i32	q8,q8,q0
   2026 	eor	r2,r2,r7
   2027 	add	r8,r8,r3,ror#6
   2028 	eor	r3,r9,r10
   2029 	eor	r0,r0,r9,ror#20
   2030 	add	r8,r8,r2
   2031 	ldr	r2,[sp,#16]
   2032 	and	r12,r12,r3
   2033 	add	r4,r4,r8
   2034 	vst1.32	{q8},[r1,:128]!
   2035 	add	r8,r8,r0,ror#2
   2036 	eor	r12,r12,r10
   2037 	vext.8	q8,q1,q2,#4
   2038 	add	r7,r7,r2
   2039 	eor	r2,r5,r6
   2040 	eor	r0,r4,r4,ror#5
   2041 	vext.8	q9,q3,q0,#4
   2042 	add	r8,r8,r12
   2043 	and	r2,r2,r4
   2044 	eor	r12,r0,r4,ror#19
   2045 	vshr.u32	q10,q8,#7
   2046 	eor	r0,r8,r8,ror#11
   2047 	eor	r2,r2,r6
   2048 	vadd.i32	q1,q1,q9
   2049 	add	r7,r7,r12,ror#6
   2050 	eor	r12,r8,r9
   2051 	vshr.u32	q9,q8,#3
   2052 	eor	r0,r0,r8,ror#20
   2053 	add	r7,r7,r2
   2054 	vsli.32	q10,q8,#25
   2055 	ldr	r2,[sp,#20]
   2056 	and	r3,r3,r12
   2057 	vshr.u32	q11,q8,#18
   2058 	add	r11,r11,r7
   2059 	add	r7,r7,r0,ror#2
   2060 	eor	r3,r3,r9
   2061 	veor	q9,q9,q10
   2062 	add	r6,r6,r2
   2063 	vsli.32	q11,q8,#14
   2064 	eor	r2,r4,r5
   2065 	eor	r0,r11,r11,ror#5
   2066 	vshr.u32	d24,d1,#17
   2067 	add	r7,r7,r3
   2068 	and	r2,r2,r11
   2069 	veor	q9,q9,q11
   2070 	eor	r3,r0,r11,ror#19
   2071 	eor	r0,r7,r7,ror#11
   2072 	vsli.32	d24,d1,#15
   2073 	eor	r2,r2,r5
   2074 	add	r6,r6,r3,ror#6
   2075 	vshr.u32	d25,d1,#10
   2076 	eor	r3,r7,r8
   2077 	eor	r0,r0,r7,ror#20
   2078 	vadd.i32	q1,q1,q9
   2079 	add	r6,r6,r2
   2080 	ldr	r2,[sp,#24]
   2081 	veor	d25,d25,d24
   2082 	and	r12,r12,r3
   2083 	add	r10,r10,r6
   2084 	vshr.u32	d24,d1,#19
   2085 	add	r6,r6,r0,ror#2
   2086 	eor	r12,r12,r8
   2087 	vsli.32	d24,d1,#13
   2088 	add	r5,r5,r2
   2089 	eor	r2,r11,r4
   2090 	veor	d25,d25,d24
   2091 	eor	r0,r10,r10,ror#5
   2092 	add	r6,r6,r12
   2093 	vadd.i32	d2,d2,d25
   2094 	and	r2,r2,r10
   2095 	eor	r12,r0,r10,ror#19
   2096 	vshr.u32	d24,d2,#17
   2097 	eor	r0,r6,r6,ror#11
   2098 	eor	r2,r2,r4
   2099 	vsli.32	d24,d2,#15
   2100 	add	r5,r5,r12,ror#6
   2101 	eor	r12,r6,r7
   2102 	vshr.u32	d25,d2,#10
   2103 	eor	r0,r0,r6,ror#20
   2104 	add	r5,r5,r2
   2105 	veor	d25,d25,d24
   2106 	ldr	r2,[sp,#28]
   2107 	and	r3,r3,r12
   2108 	vshr.u32	d24,d2,#19
   2109 	add	r9,r9,r5
   2110 	add	r5,r5,r0,ror#2
   2111 	eor	r3,r3,r7
   2112 	vld1.32	{q8},[r14,:128]!
   2113 	add	r4,r4,r2
   2114 	vsli.32	d24,d2,#13
   2115 	eor	r2,r10,r11
   2116 	eor	r0,r9,r9,ror#5
   2117 	veor	d25,d25,d24
   2118 	add	r5,r5,r3
   2119 	and	r2,r2,r9
   2120 	vadd.i32	d3,d3,d25
   2121 	eor	r3,r0,r9,ror#19
   2122 	eor	r0,r5,r5,ror#11
   2123 	vadd.i32	q8,q8,q1
   2124 	eor	r2,r2,r11
   2125 	add	r4,r4,r3,ror#6
   2126 	eor	r3,r5,r6
   2127 	eor	r0,r0,r5,ror#20
   2128 	add	r4,r4,r2
   2129 	ldr	r2,[sp,#32]
   2130 	and	r12,r12,r3
   2131 	add	r8,r8,r4
   2132 	vst1.32	{q8},[r1,:128]!
   2133 	add	r4,r4,r0,ror#2
   2134 	eor	r12,r12,r6
   2135 	vext.8	q8,q2,q3,#4
   2136 	add	r11,r11,r2
   2137 	eor	r2,r9,r10
   2138 	eor	r0,r8,r8,ror#5
   2139 	vext.8	q9,q0,q1,#4
   2140 	add	r4,r4,r12
   2141 	and	r2,r2,r8
   2142 	eor	r12,r0,r8,ror#19
   2143 	vshr.u32	q10,q8,#7
   2144 	eor	r0,r4,r4,ror#11
   2145 	eor	r2,r2,r10
   2146 	vadd.i32	q2,q2,q9
   2147 	add	r11,r11,r12,ror#6
   2148 	eor	r12,r4,r5
   2149 	vshr.u32	q9,q8,#3
   2150 	eor	r0,r0,r4,ror#20
   2151 	add	r11,r11,r2
   2152 	vsli.32	q10,q8,#25
   2153 	ldr	r2,[sp,#36]
   2154 	and	r3,r3,r12
   2155 	vshr.u32	q11,q8,#18
   2156 	add	r7,r7,r11
   2157 	add	r11,r11,r0,ror#2
   2158 	eor	r3,r3,r5
   2159 	veor	q9,q9,q10
   2160 	add	r10,r10,r2
   2161 	vsli.32	q11,q8,#14
   2162 	eor	r2,r8,r9
   2163 	eor	r0,r7,r7,ror#5
   2164 	vshr.u32	d24,d3,#17
   2165 	add	r11,r11,r3
   2166 	and	r2,r2,r7
   2167 	veor	q9,q9,q11
   2168 	eor	r3,r0,r7,ror#19
   2169 	eor	r0,r11,r11,ror#11
   2170 	vsli.32	d24,d3,#15
   2171 	eor	r2,r2,r9
   2172 	add	r10,r10,r3,ror#6
   2173 	vshr.u32	d25,d3,#10
   2174 	eor	r3,r11,r4
   2175 	eor	r0,r0,r11,ror#20
   2176 	vadd.i32	q2,q2,q9
   2177 	add	r10,r10,r2
   2178 	ldr	r2,[sp,#40]
   2179 	veor	d25,d25,d24
   2180 	and	r12,r12,r3
   2181 	add	r6,r6,r10
   2182 	vshr.u32	d24,d3,#19
   2183 	add	r10,r10,r0,ror#2
   2184 	eor	r12,r12,r4
   2185 	vsli.32	d24,d3,#13
   2186 	add	r9,r9,r2
   2187 	eor	r2,r7,r8
   2188 	veor	d25,d25,d24
   2189 	eor	r0,r6,r6,ror#5
   2190 	add	r10,r10,r12
   2191 	vadd.i32	d4,d4,d25
   2192 	and	r2,r2,r6
   2193 	eor	r12,r0,r6,ror#19
   2194 	vshr.u32	d24,d4,#17
   2195 	eor	r0,r10,r10,ror#11
   2196 	eor	r2,r2,r8
   2197 	vsli.32	d24,d4,#15
   2198 	add	r9,r9,r12,ror#6
   2199 	eor	r12,r10,r11
   2200 	vshr.u32	d25,d4,#10
   2201 	eor	r0,r0,r10,ror#20
   2202 	add	r9,r9,r2
   2203 	veor	d25,d25,d24
   2204 	ldr	r2,[sp,#44]
   2205 	and	r3,r3,r12
   2206 	vshr.u32	d24,d4,#19
   2207 	add	r5,r5,r9
   2208 	add	r9,r9,r0,ror#2
   2209 	eor	r3,r3,r11
   2210 	vld1.32	{q8},[r14,:128]!
   2211 	add	r8,r8,r2
   2212 	vsli.32	d24,d4,#13
   2213 	eor	r2,r6,r7
   2214 	eor	r0,r5,r5,ror#5
   2215 	veor	d25,d25,d24
   2216 	add	r9,r9,r3
   2217 	and	r2,r2,r5
   2218 	vadd.i32	d5,d5,d25
   2219 	eor	r3,r0,r5,ror#19
   2220 	eor	r0,r9,r9,ror#11
   2221 	vadd.i32	q8,q8,q2
   2222 	eor	r2,r2,r7
   2223 	add	r8,r8,r3,ror#6
   2224 	eor	r3,r9,r10
   2225 	eor	r0,r0,r9,ror#20
   2226 	add	r8,r8,r2
   2227 	ldr	r2,[sp,#48]
   2228 	and	r12,r12,r3
   2229 	add	r4,r4,r8
   2230 	vst1.32	{q8},[r1,:128]!
   2231 	add	r8,r8,r0,ror#2
   2232 	eor	r12,r12,r10
   2233 	vext.8	q8,q3,q0,#4
   2234 	add	r7,r7,r2
   2235 	eor	r2,r5,r6
   2236 	eor	r0,r4,r4,ror#5
   2237 	vext.8	q9,q1,q2,#4
   2238 	add	r8,r8,r12
   2239 	and	r2,r2,r4
   2240 	eor	r12,r0,r4,ror#19
   2241 	vshr.u32	q10,q8,#7
   2242 	eor	r0,r8,r8,ror#11
   2243 	eor	r2,r2,r6
   2244 	vadd.i32	q3,q3,q9
   2245 	add	r7,r7,r12,ror#6
   2246 	eor	r12,r8,r9
   2247 	vshr.u32	q9,q8,#3
   2248 	eor	r0,r0,r8,ror#20
   2249 	add	r7,r7,r2
   2250 	vsli.32	q10,q8,#25
   2251 	ldr	r2,[sp,#52]
   2252 	and	r3,r3,r12
   2253 	vshr.u32	q11,q8,#18
   2254 	add	r11,r11,r7
   2255 	add	r7,r7,r0,ror#2
   2256 	eor	r3,r3,r9
   2257 	veor	q9,q9,q10
   2258 	add	r6,r6,r2
   2259 	vsli.32	q11,q8,#14
   2260 	eor	r2,r4,r5
   2261 	eor	r0,r11,r11,ror#5
   2262 	vshr.u32	d24,d5,#17
   2263 	add	r7,r7,r3
   2264 	and	r2,r2,r11
   2265 	veor	q9,q9,q11
   2266 	eor	r3,r0,r11,ror#19
   2267 	eor	r0,r7,r7,ror#11
   2268 	vsli.32	d24,d5,#15
   2269 	eor	r2,r2,r5
   2270 	add	r6,r6,r3,ror#6
   2271 	vshr.u32	d25,d5,#10
   2272 	eor	r3,r7,r8
   2273 	eor	r0,r0,r7,ror#20
   2274 	vadd.i32	q3,q3,q9
   2275 	add	r6,r6,r2
   2276 	ldr	r2,[sp,#56]
   2277 	veor	d25,d25,d24
   2278 	and	r12,r12,r3
   2279 	add	r10,r10,r6
   2280 	vshr.u32	d24,d5,#19
   2281 	add	r6,r6,r0,ror#2
   2282 	eor	r12,r12,r8
   2283 	vsli.32	d24,d5,#13
   2284 	add	r5,r5,r2
   2285 	eor	r2,r11,r4
   2286 	veor	d25,d25,d24
   2287 	eor	r0,r10,r10,ror#5
   2288 	add	r6,r6,r12
   2289 	vadd.i32	d6,d6,d25
   2290 	and	r2,r2,r10
   2291 	eor	r12,r0,r10,ror#19
   2292 	vshr.u32	d24,d6,#17
   2293 	eor	r0,r6,r6,ror#11
   2294 	eor	r2,r2,r4
   2295 	vsli.32	d24,d6,#15
   2296 	add	r5,r5,r12,ror#6
   2297 	eor	r12,r6,r7
   2298 	vshr.u32	d25,d6,#10
   2299 	eor	r0,r0,r6,ror#20
   2300 	add	r5,r5,r2
   2301 	veor	d25,d25,d24
   2302 	ldr	r2,[sp,#60]
   2303 	and	r3,r3,r12
   2304 	vshr.u32	d24,d6,#19
   2305 	add	r9,r9,r5
   2306 	add	r5,r5,r0,ror#2
   2307 	eor	r3,r3,r7
   2308 	vld1.32	{q8},[r14,:128]!
   2309 	add	r4,r4,r2
   2310 	vsli.32	d24,d6,#13
   2311 	eor	r2,r10,r11
   2312 	eor	r0,r9,r9,ror#5
   2313 	veor	d25,d25,d24
   2314 	add	r5,r5,r3
   2315 	and	r2,r2,r9
   2316 	vadd.i32	d7,d7,d25
   2317 	eor	r3,r0,r9,ror#19
   2318 	eor	r0,r5,r5,ror#11
   2319 	vadd.i32	q8,q8,q3
   2320 	eor	r2,r2,r11
   2321 	add	r4,r4,r3,ror#6
   2322 	eor	r3,r5,r6
   2323 	eor	r0,r0,r5,ror#20
   2324 	add	r4,r4,r2
   2325 	ldr	r2,[r14]
   2326 	and	r12,r12,r3
   2327 	add	r8,r8,r4
   2328 	vst1.32	{q8},[r1,:128]!
   2329 	add	r4,r4,r0,ror#2
   2330 	eor	r12,r12,r6
   2331 	teq	r2,#0				@ check for K256 terminator
   2332 	ldr	r2,[sp,#0]
   2333 	sub	r1,r1,#64
   2334 	bne	L_00_48
   2335 
   2336 	ldr	r1,[sp,#68]
   2337 	ldr	r0,[sp,#72]
   2338 	sub	r14,r14,#256	@ rewind r14
   2339 	teq	r1,r0
   2340 	it	eq
   2341 	subeq	r1,r1,#64		@ avoid SEGV
   2342 	vld1.8	{q0},[r1]!		@ load next input block
   2343 	vld1.8	{q1},[r1]!
   2344 	vld1.8	{q2},[r1]!
   2345 	vld1.8	{q3},[r1]!
   2346 	it	ne
   2347 	strne	r1,[sp,#68]
   2348 	mov	r1,sp
   2349 	add	r11,r11,r2
   2350 	eor	r2,r9,r10
   2351 	eor	r0,r8,r8,ror#5
   2352 	add	r4,r4,r12
   2353 	vld1.32	{q8},[r14,:128]!
   2354 	and	r2,r2,r8
   2355 	eor	r12,r0,r8,ror#19
   2356 	eor	r0,r4,r4,ror#11
   2357 	eor	r2,r2,r10
   2358 	vrev32.8	q0,q0
   2359 	add	r11,r11,r12,ror#6
   2360 	eor	r12,r4,r5
   2361 	eor	r0,r0,r4,ror#20
   2362 	add	r11,r11,r2
   2363 	vadd.i32	q8,q8,q0
   2364 	ldr	r2,[sp,#4]
   2365 	and	r3,r3,r12
   2366 	add	r7,r7,r11
   2367 	add	r11,r11,r0,ror#2
   2368 	eor	r3,r3,r5
   2369 	add	r10,r10,r2
   2370 	eor	r2,r8,r9
   2371 	eor	r0,r7,r7,ror#5
   2372 	add	r11,r11,r3
   2373 	and	r2,r2,r7
   2374 	eor	r3,r0,r7,ror#19
   2375 	eor	r0,r11,r11,ror#11
   2376 	eor	r2,r2,r9
   2377 	add	r10,r10,r3,ror#6
   2378 	eor	r3,r11,r4
   2379 	eor	r0,r0,r11,ror#20
   2380 	add	r10,r10,r2
   2381 	ldr	r2,[sp,#8]
   2382 	and	r12,r12,r3
   2383 	add	r6,r6,r10
   2384 	add	r10,r10,r0,ror#2
   2385 	eor	r12,r12,r4
   2386 	add	r9,r9,r2
   2387 	eor	r2,r7,r8
   2388 	eor	r0,r6,r6,ror#5
   2389 	add	r10,r10,r12
   2390 	and	r2,r2,r6
   2391 	eor	r12,r0,r6,ror#19
   2392 	eor	r0,r10,r10,ror#11
   2393 	eor	r2,r2,r8
   2394 	add	r9,r9,r12,ror#6
   2395 	eor	r12,r10,r11
   2396 	eor	r0,r0,r10,ror#20
   2397 	add	r9,r9,r2
   2398 	ldr	r2,[sp,#12]
   2399 	and	r3,r3,r12
   2400 	add	r5,r5,r9
   2401 	add	r9,r9,r0,ror#2
   2402 	eor	r3,r3,r11
   2403 	add	r8,r8,r2
   2404 	eor	r2,r6,r7
   2405 	eor	r0,r5,r5,ror#5
   2406 	add	r9,r9,r3
   2407 	and	r2,r2,r5
   2408 	eor	r3,r0,r5,ror#19
   2409 	eor	r0,r9,r9,ror#11
   2410 	eor	r2,r2,r7
   2411 	add	r8,r8,r3,ror#6
   2412 	eor	r3,r9,r10
   2413 	eor	r0,r0,r9,ror#20
   2414 	add	r8,r8,r2
   2415 	ldr	r2,[sp,#16]
   2416 	and	r12,r12,r3
   2417 	add	r4,r4,r8
   2418 	add	r8,r8,r0,ror#2
   2419 	eor	r12,r12,r10
   2420 	vst1.32	{q8},[r1,:128]!
   2421 	add	r7,r7,r2
   2422 	eor	r2,r5,r6
   2423 	eor	r0,r4,r4,ror#5
   2424 	add	r8,r8,r12
   2425 	vld1.32	{q8},[r14,:128]!
   2426 	and	r2,r2,r4
   2427 	eor	r12,r0,r4,ror#19
   2428 	eor	r0,r8,r8,ror#11
   2429 	eor	r2,r2,r6
   2430 	vrev32.8	q1,q1
   2431 	add	r7,r7,r12,ror#6
   2432 	eor	r12,r8,r9
   2433 	eor	r0,r0,r8,ror#20
   2434 	add	r7,r7,r2
   2435 	vadd.i32	q8,q8,q1
   2436 	ldr	r2,[sp,#20]
   2437 	and	r3,r3,r12
   2438 	add	r11,r11,r7
   2439 	add	r7,r7,r0,ror#2
   2440 	eor	r3,r3,r9
   2441 	add	r6,r6,r2
   2442 	eor	r2,r4,r5
   2443 	eor	r0,r11,r11,ror#5
   2444 	add	r7,r7,r3
   2445 	and	r2,r2,r11
   2446 	eor	r3,r0,r11,ror#19
   2447 	eor	r0,r7,r7,ror#11
   2448 	eor	r2,r2,r5
   2449 	add	r6,r6,r3,ror#6
   2450 	eor	r3,r7,r8
   2451 	eor	r0,r0,r7,ror#20
   2452 	add	r6,r6,r2
   2453 	ldr	r2,[sp,#24]
   2454 	and	r12,r12,r3
   2455 	add	r10,r10,r6
   2456 	add	r6,r6,r0,ror#2
   2457 	eor	r12,r12,r8
   2458 	add	r5,r5,r2
   2459 	eor	r2,r11,r4
   2460 	eor	r0,r10,r10,ror#5
   2461 	add	r6,r6,r12
   2462 	and	r2,r2,r10
   2463 	eor	r12,r0,r10,ror#19
   2464 	eor	r0,r6,r6,ror#11
   2465 	eor	r2,r2,r4
   2466 	add	r5,r5,r12,ror#6
   2467 	eor	r12,r6,r7
   2468 	eor	r0,r0,r6,ror#20
   2469 	add	r5,r5,r2
   2470 	ldr	r2,[sp,#28]
   2471 	and	r3,r3,r12
   2472 	add	r9,r9,r5
   2473 	add	r5,r5,r0,ror#2
   2474 	eor	r3,r3,r7
   2475 	add	r4,r4,r2
   2476 	eor	r2,r10,r11
   2477 	eor	r0,r9,r9,ror#5
   2478 	add	r5,r5,r3
   2479 	and	r2,r2,r9
   2480 	eor	r3,r0,r9,ror#19
   2481 	eor	r0,r5,r5,ror#11
   2482 	eor	r2,r2,r11
   2483 	add	r4,r4,r3,ror#6
   2484 	eor	r3,r5,r6
   2485 	eor	r0,r0,r5,ror#20
   2486 	add	r4,r4,r2
   2487 	ldr	r2,[sp,#32]
   2488 	and	r12,r12,r3
   2489 	add	r8,r8,r4
   2490 	add	r4,r4,r0,ror#2
   2491 	eor	r12,r12,r6
   2492 	vst1.32	{q8},[r1,:128]!
   2493 	add	r11,r11,r2
   2494 	eor	r2,r9,r10
   2495 	eor	r0,r8,r8,ror#5
   2496 	add	r4,r4,r12
   2497 	vld1.32	{q8},[r14,:128]!
   2498 	and	r2,r2,r8
   2499 	eor	r12,r0,r8,ror#19
   2500 	eor	r0,r4,r4,ror#11
   2501 	eor	r2,r2,r10
   2502 	vrev32.8	q2,q2
   2503 	add	r11,r11,r12,ror#6
   2504 	eor	r12,r4,r5
   2505 	eor	r0,r0,r4,ror#20
   2506 	add	r11,r11,r2
   2507 	vadd.i32	q8,q8,q2
   2508 	ldr	r2,[sp,#36]
   2509 	and	r3,r3,r12
   2510 	add	r7,r7,r11
   2511 	add	r11,r11,r0,ror#2
   2512 	eor	r3,r3,r5
   2513 	add	r10,r10,r2
   2514 	eor	r2,r8,r9
   2515 	eor	r0,r7,r7,ror#5
   2516 	add	r11,r11,r3
   2517 	and	r2,r2,r7
   2518 	eor	r3,r0,r7,ror#19
   2519 	eor	r0,r11,r11,ror#11
   2520 	eor	r2,r2,r9
   2521 	add	r10,r10,r3,ror#6
   2522 	eor	r3,r11,r4
   2523 	eor	r0,r0,r11,ror#20
   2524 	add	r10,r10,r2
   2525 	ldr	r2,[sp,#40]
   2526 	and	r12,r12,r3
   2527 	add	r6,r6,r10
   2528 	add	r10,r10,r0,ror#2
   2529 	eor	r12,r12,r4
   2530 	add	r9,r9,r2
   2531 	eor	r2,r7,r8
   2532 	eor	r0,r6,r6,ror#5
   2533 	add	r10,r10,r12
   2534 	and	r2,r2,r6
   2535 	eor	r12,r0,r6,ror#19
   2536 	eor	r0,r10,r10,ror#11
   2537 	eor	r2,r2,r8
   2538 	add	r9,r9,r12,ror#6
   2539 	eor	r12,r10,r11
   2540 	eor	r0,r0,r10,ror#20
   2541 	add	r9,r9,r2
   2542 	ldr	r2,[sp,#44]
   2543 	and	r3,r3,r12
   2544 	add	r5,r5,r9
   2545 	add	r9,r9,r0,ror#2
   2546 	eor	r3,r3,r11
   2547 	add	r8,r8,r2
   2548 	eor	r2,r6,r7
   2549 	eor	r0,r5,r5,ror#5
   2550 	add	r9,r9,r3
   2551 	and	r2,r2,r5
   2552 	eor	r3,r0,r5,ror#19
   2553 	eor	r0,r9,r9,ror#11
   2554 	eor	r2,r2,r7
   2555 	add	r8,r8,r3,ror#6
   2556 	eor	r3,r9,r10
   2557 	eor	r0,r0,r9,ror#20
   2558 	add	r8,r8,r2
   2559 	ldr	r2,[sp,#48]
   2560 	and	r12,r12,r3
   2561 	add	r4,r4,r8
   2562 	add	r8,r8,r0,ror#2
   2563 	eor	r12,r12,r10
   2564 	vst1.32	{q8},[r1,:128]!
   2565 	add	r7,r7,r2
   2566 	eor	r2,r5,r6
   2567 	eor	r0,r4,r4,ror#5
   2568 	add	r8,r8,r12
   2569 	vld1.32	{q8},[r14,:128]!
   2570 	and	r2,r2,r4
   2571 	eor	r12,r0,r4,ror#19
   2572 	eor	r0,r8,r8,ror#11
   2573 	eor	r2,r2,r6
   2574 	vrev32.8	q3,q3
   2575 	add	r7,r7,r12,ror#6
   2576 	eor	r12,r8,r9
   2577 	eor	r0,r0,r8,ror#20
   2578 	add	r7,r7,r2
   2579 	vadd.i32	q8,q8,q3
   2580 	ldr	r2,[sp,#52]
   2581 	and	r3,r3,r12
   2582 	add	r11,r11,r7
   2583 	add	r7,r7,r0,ror#2
   2584 	eor	r3,r3,r9
   2585 	add	r6,r6,r2
   2586 	eor	r2,r4,r5
   2587 	eor	r0,r11,r11,ror#5
   2588 	add	r7,r7,r3
   2589 	and	r2,r2,r11
   2590 	eor	r3,r0,r11,ror#19
   2591 	eor	r0,r7,r7,ror#11
   2592 	eor	r2,r2,r5
   2593 	add	r6,r6,r3,ror#6
   2594 	eor	r3,r7,r8
   2595 	eor	r0,r0,r7,ror#20
   2596 	add	r6,r6,r2
   2597 	ldr	r2,[sp,#56]
   2598 	and	r12,r12,r3
   2599 	add	r10,r10,r6
   2600 	add	r6,r6,r0,ror#2
   2601 	eor	r12,r12,r8
   2602 	add	r5,r5,r2
   2603 	eor	r2,r11,r4
   2604 	eor	r0,r10,r10,ror#5
   2605 	add	r6,r6,r12
   2606 	and	r2,r2,r10
   2607 	eor	r12,r0,r10,ror#19
   2608 	eor	r0,r6,r6,ror#11
   2609 	eor	r2,r2,r4
   2610 	add	r5,r5,r12,ror#6
   2611 	eor	r12,r6,r7
   2612 	eor	r0,r0,r6,ror#20
   2613 	add	r5,r5,r2
   2614 	ldr	r2,[sp,#60]
   2615 	and	r3,r3,r12
   2616 	add	r9,r9,r5
   2617 	add	r5,r5,r0,ror#2
   2618 	eor	r3,r3,r7
   2619 	add	r4,r4,r2
   2620 	eor	r2,r10,r11
   2621 	eor	r0,r9,r9,ror#5
   2622 	add	r5,r5,r3
   2623 	and	r2,r2,r9
   2624 	eor	r3,r0,r9,ror#19
   2625 	eor	r0,r5,r5,ror#11
   2626 	eor	r2,r2,r11
   2627 	add	r4,r4,r3,ror#6
   2628 	eor	r3,r5,r6
   2629 	eor	r0,r0,r5,ror#20
   2630 	add	r4,r4,r2
   2631 	ldr	r2,[sp,#64]
   2632 	and	r12,r12,r3
   2633 	add	r8,r8,r4
   2634 	add	r4,r4,r0,ror#2
   2635 	eor	r12,r12,r6
   2636 	vst1.32	{q8},[r1,:128]!
   2637 	ldr	r0,[r2,#0]
   2638 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
   2639 	ldr	r12,[r2,#4]
   2640 	ldr	r3,[r2,#8]
   2641 	ldr	r1,[r2,#12]
   2642 	add	r4,r4,r0			@ accumulate
   2643 	ldr	r0,[r2,#16]
   2644 	add	r5,r5,r12
   2645 	ldr	r12,[r2,#20]
   2646 	add	r6,r6,r3
   2647 	ldr	r3,[r2,#24]
   2648 	add	r7,r7,r1
   2649 	ldr	r1,[r2,#28]
   2650 	add	r8,r8,r0
   2651 	str	r4,[r2],#4
   2652 	add	r9,r9,r12
   2653 	str	r5,[r2],#4
   2654 	add	r10,r10,r3
   2655 	str	r6,[r2],#4
   2656 	add	r11,r11,r1
   2657 	str	r7,[r2],#4
   2658 	stmia	r2,{r8,r9,r10,r11}
   2659 
   2660 	ittte	ne
   2661 	movne	r1,sp
   2662 	ldrne	r2,[sp,#0]
   2663 	eorne	r12,r12,r12
   2664 	ldreq	sp,[sp,#76]			@ restore original sp
   2665 	itt	ne
   2666 	eorne	r3,r5,r6
   2667 	bne	L_00_48
   2668 
   2669 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
   2670 
   2671 #endif
   2672 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
   2673 
   2674 # if defined(__thumb2__)
   2675 #  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
   2676 # else
   2677 #  define INST(a,b,c,d)	.byte	a,b,c,d
   2678 # endif
   2679 
   2680 #ifdef __thumb2__
   2681 .thumb_func	sha256_block_data_order_armv8
   2682 #endif
   2683 .align	5
   2684 sha256_block_data_order_armv8:
   2685 LARMv8:
   2686 	vld1.32	{q0,q1},[r0]
   2687 	sub	r3,r3,#256+32
   2688 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
   2689 	b	Loop_v8
   2690 
   2691 .align	4
   2692 Loop_v8:
   2693 	vld1.8	{q8,q9},[r1]!
   2694 	vld1.8	{q10,q11},[r1]!
   2695 	vld1.32	{q12},[r3]!
   2696 	vrev32.8	q8,q8
   2697 	vrev32.8	q9,q9
   2698 	vrev32.8	q10,q10
   2699 	vrev32.8	q11,q11
   2700 	vmov	q14,q0	@ offload
   2701 	vmov	q15,q1
   2702 	teq	r1,r2
   2703 	vld1.32	{q13},[r3]!
   2704 	vadd.i32	q12,q12,q8
   2705 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
   2706 	vmov	q2,q0
   2707 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2708 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2709 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
   2710 	vld1.32	{q12},[r3]!
   2711 	vadd.i32	q13,q13,q9
   2712 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
   2713 	vmov	q2,q0
   2714 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2715 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2716 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
   2717 	vld1.32	{q13},[r3]!
   2718 	vadd.i32	q12,q12,q10
   2719 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
   2720 	vmov	q2,q0
   2721 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2722 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2723 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
   2724 	vld1.32	{q12},[r3]!
   2725 	vadd.i32	q13,q13,q11
   2726 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
   2727 	vmov	q2,q0
   2728 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2729 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2730 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
   2731 	vld1.32	{q13},[r3]!
   2732 	vadd.i32	q12,q12,q8
   2733 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
   2734 	vmov	q2,q0
   2735 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2736 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2737 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
   2738 	vld1.32	{q12},[r3]!
   2739 	vadd.i32	q13,q13,q9
   2740 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
   2741 	vmov	q2,q0
   2742 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2743 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2744 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
   2745 	vld1.32	{q13},[r3]!
   2746 	vadd.i32	q12,q12,q10
   2747 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
   2748 	vmov	q2,q0
   2749 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2750 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2751 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
   2752 	vld1.32	{q12},[r3]!
   2753 	vadd.i32	q13,q13,q11
   2754 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
   2755 	vmov	q2,q0
   2756 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2757 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2758 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
   2759 	vld1.32	{q13},[r3]!
   2760 	vadd.i32	q12,q12,q8
   2761 	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
   2762 	vmov	q2,q0
   2763 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2764 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2765 	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
   2766 	vld1.32	{q12},[r3]!
   2767 	vadd.i32	q13,q13,q9
   2768 	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
   2769 	vmov	q2,q0
   2770 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2771 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2772 	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
   2773 	vld1.32	{q13},[r3]!
   2774 	vadd.i32	q12,q12,q10
   2775 	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
   2776 	vmov	q2,q0
   2777 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2778 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2779 	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
   2780 	vld1.32	{q12},[r3]!
   2781 	vadd.i32	q13,q13,q11
   2782 	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
   2783 	vmov	q2,q0
   2784 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2785 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2786 	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
   2787 	vld1.32	{q13},[r3]!
   2788 	vadd.i32	q12,q12,q8
   2789 	vmov	q2,q0
   2790 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2791 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2792 
   2793 	vld1.32	{q12},[r3]!
   2794 	vadd.i32	q13,q13,q9
   2795 	vmov	q2,q0
   2796 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2797 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2798 
   2799 	vld1.32	{q13},[r3]
   2800 	vadd.i32	q12,q12,q10
   2801 	sub	r3,r3,#256-16	@ rewind
   2802 	vmov	q2,q0
   2803 	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
   2804 	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
   2805 
   2806 	vadd.i32	q13,q13,q11
   2807 	vmov	q2,q0
   2808 	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
   2809 	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
   2810 
   2811 	vadd.i32	q0,q0,q14
   2812 	vadd.i32	q1,q1,q15
   2813 	it	ne
   2814 	bne	Loop_v8
   2815 
   2816 	vst1.32	{q0,q1},[r0]
   2817 
   2818 	bx	lr		@ bx lr
   2819 
   2820 #endif
   2821 .byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   2822 .align	2
   2823 .align	2
   2824 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
   2825 .comm	_OPENSSL_armcap_P,4
   2826 .non_lazy_symbol_pointer
   2827 OPENSSL_armcap_P:
   2828 .indirect_symbol	_OPENSSL_armcap_P
   2829 .long	0
   2830 .private_extern	_OPENSSL_armcap_P
   2831 #endif
   2832