Home | History | Annotate | Download | only in asm
      1 #include "arm_arch.h"
      2 
      3 .text
      4 .code	32
      5 
      6 .type	K256,%object
      7 .align	5
      8 K256:
      9 .word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
     10 .word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
     11 .word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
     12 .word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
     13 .word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
     14 .word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
     15 .word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
     16 .word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
     17 .word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
     18 .word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
     19 .word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
     20 .word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
     21 .word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
     22 .word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
     23 .word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
     24 .word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
     25 .size	K256,.-K256
     26 .word	0				@ terminator
     27 .LOPENSSL_armcap:
     28 .word	OPENSSL_armcap_P-sha256_block_data_order
     29 .align	5
     30 
     31 .global	sha256_block_data_order
     32 .type	sha256_block_data_order,%function
     33 sha256_block_data_order:
     34 	sub	r3,pc,#8		@ sha256_block_data_order
     35 	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
     36 #if __ARM_ARCH__>=7
     37 	ldr	r12,.LOPENSSL_armcap
     38 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
     39 	tst	r12,#ARMV8_SHA256
     40 	bne	.LARMv8
     41 	tst	r12,#ARMV7_NEON
     42 	bne	.LNEON
     43 #endif
     44 	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
     45 	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
     46 	sub	r14,r3,#256+32	@ K256
     47 	sub	sp,sp,#16*4		@ alloca(X[16])
     48 .Loop:
     49 # if __ARM_ARCH__>=7
     50 	ldr	r2,[r1],#4
     51 # else
     52 	ldrb	r2,[r1,#3]
     53 # endif
     54 	eor	r3,r5,r6		@ magic
     55 	eor	r12,r12,r12
     56 #if __ARM_ARCH__>=7
     57 	@ ldr	r2,[r1],#4			@ 0
     58 # if 0==15
     59 	str	r1,[sp,#17*4]			@ make room for r1
     60 # endif
     61 	eor	r0,r8,r8,ror#5
     62 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
     63 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
     64 	rev	r2,r2
     65 #else
     66 	@ ldrb	r2,[r1,#3]			@ 0
     67 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
     68 	ldrb	r12,[r1,#2]
     69 	ldrb	r0,[r1,#1]
     70 	orr	r2,r2,r12,lsl#8
     71 	ldrb	r12,[r1],#4
     72 	orr	r2,r2,r0,lsl#16
     73 # if 0==15
     74 	str	r1,[sp,#17*4]			@ make room for r1
     75 # endif
     76 	eor	r0,r8,r8,ror#5
     77 	orr	r2,r2,r12,lsl#24
     78 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
     79 #endif
     80 	ldr	r12,[r14],#4			@ *K256++
     81 	add	r11,r11,r2			@ h+=X[i]
     82 	str	r2,[sp,#0*4]
     83 	eor	r2,r9,r10
     84 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
     85 	and	r2,r2,r8
     86 	add	r11,r11,r12			@ h+=K256[i]
     87 	eor	r2,r2,r10			@ Ch(e,f,g)
     88 	eor	r0,r4,r4,ror#11
     89 	add	r11,r11,r2			@ h+=Ch(e,f,g)
     90 #if 0==31
     91 	and	r12,r12,#0xff
     92 	cmp	r12,#0xf2			@ done?
     93 #endif
     94 #if 0<15
     95 # if __ARM_ARCH__>=7
     96 	ldr	r2,[r1],#4			@ prefetch
     97 # else
     98 	ldrb	r2,[r1,#3]
     99 # endif
    100 	eor	r12,r4,r5			@ a^b, b^c in next round
    101 #else
    102 	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
    103 	eor	r12,r4,r5			@ a^b, b^c in next round
    104 	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
    105 #endif
    106 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
    107 	and	r3,r3,r12			@ (b^c)&=(a^b)
    108 	add	r7,r7,r11			@ d+=h
    109 	eor	r3,r3,r5			@ Maj(a,b,c)
    110 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
    111 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
    112 #if __ARM_ARCH__>=7
    113 	@ ldr	r2,[r1],#4			@ 1
    114 # if 1==15
    115 	str	r1,[sp,#17*4]			@ make room for r1
    116 # endif
    117 	eor	r0,r7,r7,ror#5
    118 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    119 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    120 	rev	r2,r2
    121 #else
    122 	@ ldrb	r2,[r1,#3]			@ 1
    123 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    124 	ldrb	r3,[r1,#2]
    125 	ldrb	r0,[r1,#1]
    126 	orr	r2,r2,r3,lsl#8
    127 	ldrb	r3,[r1],#4
    128 	orr	r2,r2,r0,lsl#16
    129 # if 1==15
    130 	str	r1,[sp,#17*4]			@ make room for r1
    131 # endif
    132 	eor	r0,r7,r7,ror#5
    133 	orr	r2,r2,r3,lsl#24
    134 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    135 #endif
    136 	ldr	r3,[r14],#4			@ *K256++
    137 	add	r10,r10,r2			@ h+=X[i]
    138 	str	r2,[sp,#1*4]
    139 	eor	r2,r8,r9
    140 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
    141 	and	r2,r2,r7
    142 	add	r10,r10,r3			@ h+=K256[i]
    143 	eor	r2,r2,r9			@ Ch(e,f,g)
    144 	eor	r0,r11,r11,ror#11
    145 	add	r10,r10,r2			@ h+=Ch(e,f,g)
    146 #if 1==31
    147 	and	r3,r3,#0xff
    148 	cmp	r3,#0xf2			@ done?
    149 #endif
    150 #if 1<15
    151 # if __ARM_ARCH__>=7
    152 	ldr	r2,[r1],#4			@ prefetch
    153 # else
    154 	ldrb	r2,[r1,#3]
    155 # endif
    156 	eor	r3,r11,r4			@ a^b, b^c in next round
    157 #else
    158 	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
    159 	eor	r3,r11,r4			@ a^b, b^c in next round
    160 	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
    161 #endif
    162 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
    163 	and	r12,r12,r3			@ (b^c)&=(a^b)
    164 	add	r6,r6,r10			@ d+=h
    165 	eor	r12,r12,r4			@ Maj(a,b,c)
    166 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
    167 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
    168 #if __ARM_ARCH__>=7
    169 	@ ldr	r2,[r1],#4			@ 2
    170 # if 2==15
    171 	str	r1,[sp,#17*4]			@ make room for r1
    172 # endif
    173 	eor	r0,r6,r6,ror#5
    174 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    175 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    176 	rev	r2,r2
    177 #else
    178 	@ ldrb	r2,[r1,#3]			@ 2
    179 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    180 	ldrb	r12,[r1,#2]
    181 	ldrb	r0,[r1,#1]
    182 	orr	r2,r2,r12,lsl#8
    183 	ldrb	r12,[r1],#4
    184 	orr	r2,r2,r0,lsl#16
    185 # if 2==15
    186 	str	r1,[sp,#17*4]			@ make room for r1
    187 # endif
    188 	eor	r0,r6,r6,ror#5
    189 	orr	r2,r2,r12,lsl#24
    190 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    191 #endif
    192 	ldr	r12,[r14],#4			@ *K256++
    193 	add	r9,r9,r2			@ h+=X[i]
    194 	str	r2,[sp,#2*4]
    195 	eor	r2,r7,r8
    196 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
    197 	and	r2,r2,r6
    198 	add	r9,r9,r12			@ h+=K256[i]
    199 	eor	r2,r2,r8			@ Ch(e,f,g)
    200 	eor	r0,r10,r10,ror#11
    201 	add	r9,r9,r2			@ h+=Ch(e,f,g)
    202 #if 2==31
    203 	and	r12,r12,#0xff
    204 	cmp	r12,#0xf2			@ done?
    205 #endif
    206 #if 2<15
    207 # if __ARM_ARCH__>=7
    208 	ldr	r2,[r1],#4			@ prefetch
    209 # else
    210 	ldrb	r2,[r1,#3]
    211 # endif
    212 	eor	r12,r10,r11			@ a^b, b^c in next round
    213 #else
    214 	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
    215 	eor	r12,r10,r11			@ a^b, b^c in next round
    216 	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
    217 #endif
    218 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
    219 	and	r3,r3,r12			@ (b^c)&=(a^b)
    220 	add	r5,r5,r9			@ d+=h
    221 	eor	r3,r3,r11			@ Maj(a,b,c)
    222 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
    223 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
    224 #if __ARM_ARCH__>=7
    225 	@ ldr	r2,[r1],#4			@ 3
    226 # if 3==15
    227 	str	r1,[sp,#17*4]			@ make room for r1
    228 # endif
    229 	eor	r0,r5,r5,ror#5
    230 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    231 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    232 	rev	r2,r2
    233 #else
    234 	@ ldrb	r2,[r1,#3]			@ 3
    235 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    236 	ldrb	r3,[r1,#2]
    237 	ldrb	r0,[r1,#1]
    238 	orr	r2,r2,r3,lsl#8
    239 	ldrb	r3,[r1],#4
    240 	orr	r2,r2,r0,lsl#16
    241 # if 3==15
    242 	str	r1,[sp,#17*4]			@ make room for r1
    243 # endif
    244 	eor	r0,r5,r5,ror#5
    245 	orr	r2,r2,r3,lsl#24
    246 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    247 #endif
    248 	ldr	r3,[r14],#4			@ *K256++
    249 	add	r8,r8,r2			@ h+=X[i]
    250 	str	r2,[sp,#3*4]
    251 	eor	r2,r6,r7
    252 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
    253 	and	r2,r2,r5
    254 	add	r8,r8,r3			@ h+=K256[i]
    255 	eor	r2,r2,r7			@ Ch(e,f,g)
    256 	eor	r0,r9,r9,ror#11
    257 	add	r8,r8,r2			@ h+=Ch(e,f,g)
    258 #if 3==31
    259 	and	r3,r3,#0xff
    260 	cmp	r3,#0xf2			@ done?
    261 #endif
    262 #if 3<15
    263 # if __ARM_ARCH__>=7
    264 	ldr	r2,[r1],#4			@ prefetch
    265 # else
    266 	ldrb	r2,[r1,#3]
    267 # endif
    268 	eor	r3,r9,r10			@ a^b, b^c in next round
    269 #else
    270 	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
    271 	eor	r3,r9,r10			@ a^b, b^c in next round
    272 	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
    273 #endif
    274 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
    275 	and	r12,r12,r3			@ (b^c)&=(a^b)
    276 	add	r4,r4,r8			@ d+=h
    277 	eor	r12,r12,r10			@ Maj(a,b,c)
    278 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
    279 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
    280 #if __ARM_ARCH__>=7
    281 	@ ldr	r2,[r1],#4			@ 4
    282 # if 4==15
    283 	str	r1,[sp,#17*4]			@ make room for r1
    284 # endif
    285 	eor	r0,r4,r4,ror#5
    286 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    287 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    288 	rev	r2,r2
    289 #else
    290 	@ ldrb	r2,[r1,#3]			@ 4
    291 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    292 	ldrb	r12,[r1,#2]
    293 	ldrb	r0,[r1,#1]
    294 	orr	r2,r2,r12,lsl#8
    295 	ldrb	r12,[r1],#4
    296 	orr	r2,r2,r0,lsl#16
    297 # if 4==15
    298 	str	r1,[sp,#17*4]			@ make room for r1
    299 # endif
    300 	eor	r0,r4,r4,ror#5
    301 	orr	r2,r2,r12,lsl#24
    302 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    303 #endif
    304 	ldr	r12,[r14],#4			@ *K256++
    305 	add	r7,r7,r2			@ h+=X[i]
    306 	str	r2,[sp,#4*4]
    307 	eor	r2,r5,r6
    308 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
    309 	and	r2,r2,r4
    310 	add	r7,r7,r12			@ h+=K256[i]
    311 	eor	r2,r2,r6			@ Ch(e,f,g)
    312 	eor	r0,r8,r8,ror#11
    313 	add	r7,r7,r2			@ h+=Ch(e,f,g)
    314 #if 4==31
    315 	and	r12,r12,#0xff
    316 	cmp	r12,#0xf2			@ done?
    317 #endif
    318 #if 4<15
    319 # if __ARM_ARCH__>=7
    320 	ldr	r2,[r1],#4			@ prefetch
    321 # else
    322 	ldrb	r2,[r1,#3]
    323 # endif
    324 	eor	r12,r8,r9			@ a^b, b^c in next round
    325 #else
    326 	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
    327 	eor	r12,r8,r9			@ a^b, b^c in next round
    328 	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
    329 #endif
    330 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
    331 	and	r3,r3,r12			@ (b^c)&=(a^b)
    332 	add	r11,r11,r7			@ d+=h
    333 	eor	r3,r3,r9			@ Maj(a,b,c)
    334 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
    335 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
    336 #if __ARM_ARCH__>=7
    337 	@ ldr	r2,[r1],#4			@ 5
    338 # if 5==15
    339 	str	r1,[sp,#17*4]			@ make room for r1
    340 # endif
    341 	eor	r0,r11,r11,ror#5
    342 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    343 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    344 	rev	r2,r2
    345 #else
    346 	@ ldrb	r2,[r1,#3]			@ 5
    347 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    348 	ldrb	r3,[r1,#2]
    349 	ldrb	r0,[r1,#1]
    350 	orr	r2,r2,r3,lsl#8
    351 	ldrb	r3,[r1],#4
    352 	orr	r2,r2,r0,lsl#16
    353 # if 5==15
    354 	str	r1,[sp,#17*4]			@ make room for r1
    355 # endif
    356 	eor	r0,r11,r11,ror#5
    357 	orr	r2,r2,r3,lsl#24
    358 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    359 #endif
    360 	ldr	r3,[r14],#4			@ *K256++
    361 	add	r6,r6,r2			@ h+=X[i]
    362 	str	r2,[sp,#5*4]
    363 	eor	r2,r4,r5
    364 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
    365 	and	r2,r2,r11
    366 	add	r6,r6,r3			@ h+=K256[i]
    367 	eor	r2,r2,r5			@ Ch(e,f,g)
    368 	eor	r0,r7,r7,ror#11
    369 	add	r6,r6,r2			@ h+=Ch(e,f,g)
    370 #if 5==31
    371 	and	r3,r3,#0xff
    372 	cmp	r3,#0xf2			@ done?
    373 #endif
    374 #if 5<15
    375 # if __ARM_ARCH__>=7
    376 	ldr	r2,[r1],#4			@ prefetch
    377 # else
    378 	ldrb	r2,[r1,#3]
    379 # endif
    380 	eor	r3,r7,r8			@ a^b, b^c in next round
    381 #else
    382 	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
    383 	eor	r3,r7,r8			@ a^b, b^c in next round
    384 	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
    385 #endif
    386 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
    387 	and	r12,r12,r3			@ (b^c)&=(a^b)
    388 	add	r10,r10,r6			@ d+=h
    389 	eor	r12,r12,r8			@ Maj(a,b,c)
    390 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
    391 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
    392 #if __ARM_ARCH__>=7
    393 	@ ldr	r2,[r1],#4			@ 6
    394 # if 6==15
    395 	str	r1,[sp,#17*4]			@ make room for r1
    396 # endif
    397 	eor	r0,r10,r10,ror#5
    398 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    399 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    400 	rev	r2,r2
    401 #else
    402 	@ ldrb	r2,[r1,#3]			@ 6
    403 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    404 	ldrb	r12,[r1,#2]
    405 	ldrb	r0,[r1,#1]
    406 	orr	r2,r2,r12,lsl#8
    407 	ldrb	r12,[r1],#4
    408 	orr	r2,r2,r0,lsl#16
    409 # if 6==15
    410 	str	r1,[sp,#17*4]			@ make room for r1
    411 # endif
    412 	eor	r0,r10,r10,ror#5
    413 	orr	r2,r2,r12,lsl#24
    414 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    415 #endif
    416 	ldr	r12,[r14],#4			@ *K256++
    417 	add	r5,r5,r2			@ h+=X[i]
    418 	str	r2,[sp,#6*4]
    419 	eor	r2,r11,r4
    420 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
    421 	and	r2,r2,r10
    422 	add	r5,r5,r12			@ h+=K256[i]
    423 	eor	r2,r2,r4			@ Ch(e,f,g)
    424 	eor	r0,r6,r6,ror#11
    425 	add	r5,r5,r2			@ h+=Ch(e,f,g)
    426 #if 6==31
    427 	and	r12,r12,#0xff
    428 	cmp	r12,#0xf2			@ done?
    429 #endif
    430 #if 6<15
    431 # if __ARM_ARCH__>=7
    432 	ldr	r2,[r1],#4			@ prefetch
    433 # else
    434 	ldrb	r2,[r1,#3]
    435 # endif
    436 	eor	r12,r6,r7			@ a^b, b^c in next round
    437 #else
    438 	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
    439 	eor	r12,r6,r7			@ a^b, b^c in next round
    440 	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
    441 #endif
    442 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
    443 	and	r3,r3,r12			@ (b^c)&=(a^b)
    444 	add	r9,r9,r5			@ d+=h
    445 	eor	r3,r3,r7			@ Maj(a,b,c)
    446 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
    447 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
    448 #if __ARM_ARCH__>=7
    449 	@ ldr	r2,[r1],#4			@ 7
    450 # if 7==15
    451 	str	r1,[sp,#17*4]			@ make room for r1
    452 # endif
    453 	eor	r0,r9,r9,ror#5
    454 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
    455 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
    456 	rev	r2,r2
    457 #else
    458 	@ ldrb	r2,[r1,#3]			@ 7
    459 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
    460 	ldrb	r3,[r1,#2]
    461 	ldrb	r0,[r1,#1]
    462 	orr	r2,r2,r3,lsl#8
    463 	ldrb	r3,[r1],#4
    464 	orr	r2,r2,r0,lsl#16
    465 # if 7==15
    466 	str	r1,[sp,#17*4]			@ make room for r1
    467 # endif
    468 	eor	r0,r9,r9,ror#5
    469 	orr	r2,r2,r3,lsl#24
    470 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
    471 #endif
    472 	ldr	r3,[r14],#4			@ *K256++
    473 	add	r4,r4,r2			@ h+=X[i]
    474 	str	r2,[sp,#7*4]
    475 	eor	r2,r10,r11
    476 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
    477 	and	r2,r2,r9
    478 	add	r4,r4,r3			@ h+=K256[i]
    479 	eor	r2,r2,r11			@ Ch(e,f,g)
    480 	eor	r0,r5,r5,ror#11
    481 	add	r4,r4,r2			@ h+=Ch(e,f,g)
    482 #if 7==31
    483 	and	r3,r3,#0xff
    484 	cmp	r3,#0xf2			@ done?
    485 #endif
    486 #if 7<15
    487 # if __ARM_ARCH__>=7
    488 	ldr	r2,[r1],#4			@ prefetch
    489 # else
    490 	ldrb	r2,[r1,#3]
    491 # endif
    492 	eor	r3,r5,r6			@ a^b, b^c in next round
    493 #else
    494 	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
    495 	eor	r3,r5,r6			@ a^b, b^c in next round
    496 	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
    497 #endif
    498 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
    499 	and	r12,r12,r3			@ (b^c)&=(a^b)
    500 	add	r8,r8,r4			@ d+=h
    501 	eor	r12,r12,r6			@ Maj(a,b,c)
    502 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
    503 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
    504 #if __ARM_ARCH__>=7
    505 	@ ldr	r2,[r1],#4			@ 8
    506 # if 8==15
    507 	str	r1,[sp,#17*4]			@ make room for r1
    508 # endif
    509 	eor	r0,r8,r8,ror#5
    510 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    511 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    512 	rev	r2,r2
    513 #else
    514 	@ ldrb	r2,[r1,#3]			@ 8
    515 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    516 	ldrb	r12,[r1,#2]
    517 	ldrb	r0,[r1,#1]
    518 	orr	r2,r2,r12,lsl#8
    519 	ldrb	r12,[r1],#4
    520 	orr	r2,r2,r0,lsl#16
    521 # if 8==15
    522 	str	r1,[sp,#17*4]			@ make room for r1
    523 # endif
    524 	eor	r0,r8,r8,ror#5
    525 	orr	r2,r2,r12,lsl#24
    526 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    527 #endif
    528 	ldr	r12,[r14],#4			@ *K256++
    529 	add	r11,r11,r2			@ h+=X[i]
    530 	str	r2,[sp,#8*4]
    531 	eor	r2,r9,r10
    532 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
    533 	and	r2,r2,r8
    534 	add	r11,r11,r12			@ h+=K256[i]
    535 	eor	r2,r2,r10			@ Ch(e,f,g)
    536 	eor	r0,r4,r4,ror#11
    537 	add	r11,r11,r2			@ h+=Ch(e,f,g)
    538 #if 8==31
    539 	and	r12,r12,#0xff
    540 	cmp	r12,#0xf2			@ done?
    541 #endif
    542 #if 8<15
    543 # if __ARM_ARCH__>=7
    544 	ldr	r2,[r1],#4			@ prefetch
    545 # else
    546 	ldrb	r2,[r1,#3]
    547 # endif
    548 	eor	r12,r4,r5			@ a^b, b^c in next round
    549 #else
    550 	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
    551 	eor	r12,r4,r5			@ a^b, b^c in next round
    552 	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
    553 #endif
    554 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
    555 	and	r3,r3,r12			@ (b^c)&=(a^b)
    556 	add	r7,r7,r11			@ d+=h
    557 	eor	r3,r3,r5			@ Maj(a,b,c)
    558 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
    559 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
    560 #if __ARM_ARCH__>=7
    561 	@ ldr	r2,[r1],#4			@ 9
    562 # if 9==15
    563 	str	r1,[sp,#17*4]			@ make room for r1
    564 # endif
    565 	eor	r0,r7,r7,ror#5
    566 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    567 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    568 	rev	r2,r2
    569 #else
    570 	@ ldrb	r2,[r1,#3]			@ 9
    571 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
    572 	ldrb	r3,[r1,#2]
    573 	ldrb	r0,[r1,#1]
    574 	orr	r2,r2,r3,lsl#8
    575 	ldrb	r3,[r1],#4
    576 	orr	r2,r2,r0,lsl#16
    577 # if 9==15
    578 	str	r1,[sp,#17*4]			@ make room for r1
    579 # endif
    580 	eor	r0,r7,r7,ror#5
    581 	orr	r2,r2,r3,lsl#24
    582 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
    583 #endif
    584 	ldr	r3,[r14],#4			@ *K256++
    585 	add	r10,r10,r2			@ h+=X[i]
    586 	str	r2,[sp,#9*4]
    587 	eor	r2,r8,r9
    588 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
    589 	and	r2,r2,r7
    590 	add	r10,r10,r3			@ h+=K256[i]
    591 	eor	r2,r2,r9			@ Ch(e,f,g)
    592 	eor	r0,r11,r11,ror#11
    593 	add	r10,r10,r2			@ h+=Ch(e,f,g)
    594 #if 9==31
    595 	and	r3,r3,#0xff
    596 	cmp	r3,#0xf2			@ done?
    597 #endif
    598 #if 9<15
    599 # if __ARM_ARCH__>=7
    600 	ldr	r2,[r1],#4			@ prefetch
    601 # else
    602 	ldrb	r2,[r1,#3]
    603 # endif
    604 	eor	r3,r11,r4			@ a^b, b^c in next round
    605 #else
    606 	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
    607 	eor	r3,r11,r4			@ a^b, b^c in next round
    608 	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
    609 #endif
    610 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
    611 	and	r12,r12,r3			@ (b^c)&=(a^b)
    612 	add	r6,r6,r10			@ d+=h
    613 	eor	r12,r12,r4			@ Maj(a,b,c)
    614 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
    615 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
    616 #if __ARM_ARCH__>=7
    617 	@ ldr	r2,[r1],#4			@ 10
    618 # if 10==15
    619 	str	r1,[sp,#17*4]			@ make room for r1
    620 # endif
    621 	eor	r0,r6,r6,ror#5
    622 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    623 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    624 	rev	r2,r2
    625 #else
    626 	@ ldrb	r2,[r1,#3]			@ 10
    627 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
    628 	ldrb	r12,[r1,#2]
    629 	ldrb	r0,[r1,#1]
    630 	orr	r2,r2,r12,lsl#8
    631 	ldrb	r12,[r1],#4
    632 	orr	r2,r2,r0,lsl#16
    633 # if 10==15
    634 	str	r1,[sp,#17*4]			@ make room for r1
    635 # endif
    636 	eor	r0,r6,r6,ror#5
    637 	orr	r2,r2,r12,lsl#24
    638 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
    639 #endif
    640 	ldr	r12,[r14],#4			@ *K256++
    641 	add	r9,r9,r2			@ h+=X[i]
    642 	str	r2,[sp,#10*4]
    643 	eor	r2,r7,r8
    644 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
    645 	and	r2,r2,r6
    646 	add	r9,r9,r12			@ h+=K256[i]
    647 	eor	r2,r2,r8			@ Ch(e,f,g)
    648 	eor	r0,r10,r10,ror#11
    649 	add	r9,r9,r2			@ h+=Ch(e,f,g)
    650 #if 10==31
    651 	and	r12,r12,#0xff
    652 	cmp	r12,#0xf2			@ done?
    653 #endif
    654 #if 10<15
    655 # if __ARM_ARCH__>=7
    656 	ldr	r2,[r1],#4			@ prefetch
    657 # else
    658 	ldrb	r2,[r1,#3]
    659 # endif
    660 	eor	r12,r10,r11			@ a^b, b^c in next round
    661 #else
    662 	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
    663 	eor	r12,r10,r11			@ a^b, b^c in next round
    664 	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
    665 #endif
    666 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
    667 	and	r3,r3,r12			@ (b^c)&=(a^b)
    668 	add	r5,r5,r9			@ d+=h
    669 	eor	r3,r3,r11			@ Maj(a,b,c)
    670 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
    671 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
    672 #if __ARM_ARCH__>=7
    673 	@ ldr	r2,[r1],#4			@ 11
    674 # if 11==15
    675 	str	r1,[sp,#17*4]			@ make room for r1
    676 # endif
    677 	eor	r0,r5,r5,ror#5
    678 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    679 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    680 	rev	r2,r2
    681 #else
    682 	@ ldrb	r2,[r1,#3]			@ 11
    683 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
    684 	ldrb	r3,[r1,#2]
    685 	ldrb	r0,[r1,#1]
    686 	orr	r2,r2,r3,lsl#8
    687 	ldrb	r3,[r1],#4
    688 	orr	r2,r2,r0,lsl#16
    689 # if 11==15
    690 	str	r1,[sp,#17*4]			@ make room for r1
    691 # endif
    692 	eor	r0,r5,r5,ror#5
    693 	orr	r2,r2,r3,lsl#24
    694 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
    695 #endif
    696 	ldr	r3,[r14],#4			@ *K256++
    697 	add	r8,r8,r2			@ h+=X[i]
    698 	str	r2,[sp,#11*4]
    699 	eor	r2,r6,r7
    700 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
    701 	and	r2,r2,r5
    702 	add	r8,r8,r3			@ h+=K256[i]
    703 	eor	r2,r2,r7			@ Ch(e,f,g)
    704 	eor	r0,r9,r9,ror#11
    705 	add	r8,r8,r2			@ h+=Ch(e,f,g)
    706 #if 11==31
    707 	and	r3,r3,#0xff
    708 	cmp	r3,#0xf2			@ done?
    709 #endif
    710 #if 11<15
    711 # if __ARM_ARCH__>=7
    712 	ldr	r2,[r1],#4			@ prefetch
    713 # else
    714 	ldrb	r2,[r1,#3]
    715 # endif
    716 	eor	r3,r9,r10			@ a^b, b^c in next round
    717 #else
    718 	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
    719 	eor	r3,r9,r10			@ a^b, b^c in next round
    720 	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
    721 #endif
    722 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
    723 	and	r12,r12,r3			@ (b^c)&=(a^b)
    724 	add	r4,r4,r8			@ d+=h
    725 	eor	r12,r12,r10			@ Maj(a,b,c)
    726 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
    727 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
    728 #if __ARM_ARCH__>=7
    729 	@ ldr	r2,[r1],#4			@ 12
    730 # if 12==15
    731 	str	r1,[sp,#17*4]			@ make room for r1
    732 # endif
    733 	eor	r0,r4,r4,ror#5
    734 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    735 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    736 	rev	r2,r2
    737 #else
    738 	@ ldrb	r2,[r1,#3]			@ 12
    739 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
    740 	ldrb	r12,[r1,#2]
    741 	ldrb	r0,[r1,#1]
    742 	orr	r2,r2,r12,lsl#8
    743 	ldrb	r12,[r1],#4
    744 	orr	r2,r2,r0,lsl#16
    745 # if 12==15
    746 	str	r1,[sp,#17*4]			@ make room for r1
    747 # endif
    748 	eor	r0,r4,r4,ror#5
    749 	orr	r2,r2,r12,lsl#24
    750 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
    751 #endif
    752 	ldr	r12,[r14],#4			@ *K256++
    753 	add	r7,r7,r2			@ h+=X[i]
    754 	str	r2,[sp,#12*4]
    755 	eor	r2,r5,r6
    756 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
    757 	and	r2,r2,r4
    758 	add	r7,r7,r12			@ h+=K256[i]
    759 	eor	r2,r2,r6			@ Ch(e,f,g)
    760 	eor	r0,r8,r8,ror#11
    761 	add	r7,r7,r2			@ h+=Ch(e,f,g)
    762 #if 12==31
    763 	and	r12,r12,#0xff
    764 	cmp	r12,#0xf2			@ done?
    765 #endif
    766 #if 12<15
    767 # if __ARM_ARCH__>=7
    768 	ldr	r2,[r1],#4			@ prefetch
    769 # else
    770 	ldrb	r2,[r1,#3]
    771 # endif
    772 	eor	r12,r8,r9			@ a^b, b^c in next round
    773 #else
    774 	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
    775 	eor	r12,r8,r9			@ a^b, b^c in next round
    776 	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
    777 #endif
    778 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
    779 	and	r3,r3,r12			@ (b^c)&=(a^b)
    780 	add	r11,r11,r7			@ d+=h
    781 	eor	r3,r3,r9			@ Maj(a,b,c)
    782 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
    783 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
    784 #if __ARM_ARCH__>=7
    785 	@ ldr	r2,[r1],#4			@ 13
    786 # if 13==15
    787 	str	r1,[sp,#17*4]			@ make room for r1
    788 # endif
    789 	eor	r0,r11,r11,ror#5
    790 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    791 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    792 	rev	r2,r2
    793 #else
    794 	@ ldrb	r2,[r1,#3]			@ 13
    795 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
    796 	ldrb	r3,[r1,#2]
    797 	ldrb	r0,[r1,#1]
    798 	orr	r2,r2,r3,lsl#8
    799 	ldrb	r3,[r1],#4
    800 	orr	r2,r2,r0,lsl#16
    801 # if 13==15
    802 	str	r1,[sp,#17*4]			@ make room for r1
    803 # endif
    804 	eor	r0,r11,r11,ror#5
    805 	orr	r2,r2,r3,lsl#24
    806 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
    807 #endif
    808 	ldr	r3,[r14],#4			@ *K256++
    809 	add	r6,r6,r2			@ h+=X[i]
    810 	str	r2,[sp,#13*4]
    811 	eor	r2,r4,r5
    812 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
    813 	and	r2,r2,r11
    814 	add	r6,r6,r3			@ h+=K256[i]
    815 	eor	r2,r2,r5			@ Ch(e,f,g)
    816 	eor	r0,r7,r7,ror#11
    817 	add	r6,r6,r2			@ h+=Ch(e,f,g)
    818 #if 13==31
    819 	and	r3,r3,#0xff
    820 	cmp	r3,#0xf2			@ done?
    821 #endif
    822 #if 13<15
    823 # if __ARM_ARCH__>=7
    824 	ldr	r2,[r1],#4			@ prefetch
    825 # else
    826 	ldrb	r2,[r1,#3]
    827 # endif
    828 	eor	r3,r7,r8			@ a^b, b^c in next round
    829 #else
    830 	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
    831 	eor	r3,r7,r8			@ a^b, b^c in next round
    832 	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
    833 #endif
    834 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
    835 	and	r12,r12,r3			@ (b^c)&=(a^b)
    836 	add	r10,r10,r6			@ d+=h
    837 	eor	r12,r12,r8			@ Maj(a,b,c)
    838 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
    839 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
    840 #if __ARM_ARCH__>=7
    841 	@ ldr	r2,[r1],#4			@ 14
    842 # if 14==15
    843 	str	r1,[sp,#17*4]			@ make room for r1
    844 # endif
    845 	eor	r0,r10,r10,ror#5
    846 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    847 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    848 	rev	r2,r2
    849 #else
    850 	@ ldrb	r2,[r1,#3]			@ 14
    851 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
    852 	ldrb	r12,[r1,#2]
    853 	ldrb	r0,[r1,#1]
    854 	orr	r2,r2,r12,lsl#8
    855 	ldrb	r12,[r1],#4
    856 	orr	r2,r2,r0,lsl#16
    857 # if 14==15
    858 	str	r1,[sp,#17*4]			@ make room for r1
    859 # endif
    860 	eor	r0,r10,r10,ror#5
    861 	orr	r2,r2,r12,lsl#24
    862 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
    863 #endif
    864 	ldr	r12,[r14],#4			@ *K256++
    865 	add	r5,r5,r2			@ h+=X[i]
    866 	str	r2,[sp,#14*4]
    867 	eor	r2,r11,r4
    868 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
    869 	and	r2,r2,r10
    870 	add	r5,r5,r12			@ h+=K256[i]
    871 	eor	r2,r2,r4			@ Ch(e,f,g)
    872 	eor	r0,r6,r6,ror#11
    873 	add	r5,r5,r2			@ h+=Ch(e,f,g)
    874 #if 14==31
    875 	and	r12,r12,#0xff
    876 	cmp	r12,#0xf2			@ done?
    877 #endif
    878 #if 14<15
    879 # if __ARM_ARCH__>=7
    880 	ldr	r2,[r1],#4			@ prefetch
    881 # else
    882 	ldrb	r2,[r1,#3]
    883 # endif
    884 	eor	r12,r6,r7			@ a^b, b^c in next round
    885 #else
    886 	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
    887 	eor	r12,r6,r7			@ a^b, b^c in next round
    888 	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
    889 #endif
    890 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
    891 	and	r3,r3,r12			@ (b^c)&=(a^b)
    892 	add	r9,r9,r5			@ d+=h
    893 	eor	r3,r3,r7			@ Maj(a,b,c)
    894 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
    895 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
    896 #if __ARM_ARCH__>=7
    897 	@ ldr	r2,[r1],#4			@ 15
    898 # if 15==15
    899 	str	r1,[sp,#17*4]			@ make room for r1
    900 # endif
    901 	eor	r0,r9,r9,ror#5
    902 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
    903 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
    904 	rev	r2,r2
    905 #else
    906 	@ ldrb	r2,[r1,#3]			@ 15
    907 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
    908 	ldrb	r3,[r1,#2]
    909 	ldrb	r0,[r1,#1]
    910 	orr	r2,r2,r3,lsl#8
    911 	ldrb	r3,[r1],#4
    912 	orr	r2,r2,r0,lsl#16
    913 # if 15==15
    914 	str	r1,[sp,#17*4]			@ make room for r1
    915 # endif
    916 	eor	r0,r9,r9,ror#5
    917 	orr	r2,r2,r3,lsl#24
    918 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
    919 #endif
    920 	ldr	r3,[r14],#4			@ *K256++
    921 	add	r4,r4,r2			@ h+=X[i]
    922 	str	r2,[sp,#15*4]
    923 	eor	r2,r10,r11
    924 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
    925 	and	r2,r2,r9
    926 	add	r4,r4,r3			@ h+=K256[i]
    927 	eor	r2,r2,r11			@ Ch(e,f,g)
    928 	eor	r0,r5,r5,ror#11
    929 	add	r4,r4,r2			@ h+=Ch(e,f,g)
    930 #if 15==31
    931 	and	r3,r3,#0xff
    932 	cmp	r3,#0xf2			@ done?
    933 #endif
    934 #if 15<15
    935 # if __ARM_ARCH__>=7
    936 	ldr	r2,[r1],#4			@ prefetch
    937 # else
    938 	ldrb	r2,[r1,#3]
    939 # endif
    940 	eor	r3,r5,r6			@ a^b, b^c in next round
    941 #else
    942 	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
    943 	eor	r3,r5,r6			@ a^b, b^c in next round
    944 	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
    945 #endif
    946 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
    947 	and	r12,r12,r3			@ (b^c)&=(a^b)
    948 	add	r8,r8,r4			@ d+=h
    949 	eor	r12,r12,r6			@ Maj(a,b,c)
    950 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
    951 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
    952 .Lrounds_16_xx:
    953 	@ ldr	r2,[sp,#1*4]		@ 16
    954 	@ ldr	r1,[sp,#14*4]
    955 	mov	r0,r2,ror#7
    956 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
    957 	mov	r12,r1,ror#17
    958 	eor	r0,r0,r2,ror#18
    959 	eor	r12,r12,r1,ror#19
    960 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
    961 	ldr	r2,[sp,#0*4]
    962 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
    963 	ldr	r1,[sp,#9*4]
    964 
    965 	add	r12,r12,r0
    966 	eor	r0,r8,r8,ror#5	@ from BODY_00_15
    967 	add	r2,r2,r12
    968 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
    969 	add	r2,r2,r1			@ X[i]
    970 	ldr	r12,[r14],#4			@ *K256++
    971 	add	r11,r11,r2			@ h+=X[i]
    972 	str	r2,[sp,#0*4]
    973 	eor	r2,r9,r10
    974 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
    975 	and	r2,r2,r8
    976 	add	r11,r11,r12			@ h+=K256[i]
    977 	eor	r2,r2,r10			@ Ch(e,f,g)
    978 	eor	r0,r4,r4,ror#11
    979 	add	r11,r11,r2			@ h+=Ch(e,f,g)
    980 #if 16==31
    981 	and	r12,r12,#0xff
    982 	cmp	r12,#0xf2			@ done?
    983 #endif
    984 #if 16<15
    985 # if __ARM_ARCH__>=7
    986 	ldr	r2,[r1],#4			@ prefetch
    987 # else
    988 	ldrb	r2,[r1,#3]
    989 # endif
    990 	eor	r12,r4,r5			@ a^b, b^c in next round
    991 #else
    992 	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
    993 	eor	r12,r4,r5			@ a^b, b^c in next round
    994 	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
    995 #endif
    996 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
    997 	and	r3,r3,r12			@ (b^c)&=(a^b)
    998 	add	r7,r7,r11			@ d+=h
    999 	eor	r3,r3,r5			@ Maj(a,b,c)
   1000 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
   1001 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
   1002 	@ ldr	r2,[sp,#2*4]		@ 17
   1003 	@ ldr	r1,[sp,#15*4]
   1004 	mov	r0,r2,ror#7
   1005 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
   1006 	mov	r3,r1,ror#17
   1007 	eor	r0,r0,r2,ror#18
   1008 	eor	r3,r3,r1,ror#19
   1009 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1010 	ldr	r2,[sp,#1*4]
   1011 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1012 	ldr	r1,[sp,#10*4]
   1013 
   1014 	add	r3,r3,r0
   1015 	eor	r0,r7,r7,ror#5	@ from BODY_00_15
   1016 	add	r2,r2,r3
   1017 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
   1018 	add	r2,r2,r1			@ X[i]
   1019 	ldr	r3,[r14],#4			@ *K256++
   1020 	add	r10,r10,r2			@ h+=X[i]
   1021 	str	r2,[sp,#1*4]
   1022 	eor	r2,r8,r9
   1023 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
   1024 	and	r2,r2,r7
   1025 	add	r10,r10,r3			@ h+=K256[i]
   1026 	eor	r2,r2,r9			@ Ch(e,f,g)
   1027 	eor	r0,r11,r11,ror#11
   1028 	add	r10,r10,r2			@ h+=Ch(e,f,g)
   1029 #if 17==31
   1030 	and	r3,r3,#0xff
   1031 	cmp	r3,#0xf2			@ done?
   1032 #endif
   1033 #if 17<15
   1034 # if __ARM_ARCH__>=7
   1035 	ldr	r2,[r1],#4			@ prefetch
   1036 # else
   1037 	ldrb	r2,[r1,#3]
   1038 # endif
   1039 	eor	r3,r11,r4			@ a^b, b^c in next round
   1040 #else
   1041 	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
   1042 	eor	r3,r11,r4			@ a^b, b^c in next round
   1043 	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
   1044 #endif
   1045 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
   1046 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1047 	add	r6,r6,r10			@ d+=h
   1048 	eor	r12,r12,r4			@ Maj(a,b,c)
   1049 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
   1050 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
   1051 	@ ldr	r2,[sp,#3*4]		@ 18
   1052 	@ ldr	r1,[sp,#0*4]
   1053 	mov	r0,r2,ror#7
   1054 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
   1055 	mov	r12,r1,ror#17
   1056 	eor	r0,r0,r2,ror#18
   1057 	eor	r12,r12,r1,ror#19
   1058 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1059 	ldr	r2,[sp,#2*4]
   1060 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1061 	ldr	r1,[sp,#11*4]
   1062 
   1063 	add	r12,r12,r0
   1064 	eor	r0,r6,r6,ror#5	@ from BODY_00_15
   1065 	add	r2,r2,r12
   1066 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
   1067 	add	r2,r2,r1			@ X[i]
   1068 	ldr	r12,[r14],#4			@ *K256++
   1069 	add	r9,r9,r2			@ h+=X[i]
   1070 	str	r2,[sp,#2*4]
   1071 	eor	r2,r7,r8
   1072 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
   1073 	and	r2,r2,r6
   1074 	add	r9,r9,r12			@ h+=K256[i]
   1075 	eor	r2,r2,r8			@ Ch(e,f,g)
   1076 	eor	r0,r10,r10,ror#11
   1077 	add	r9,r9,r2			@ h+=Ch(e,f,g)
   1078 #if 18==31
   1079 	and	r12,r12,#0xff
   1080 	cmp	r12,#0xf2			@ done?
   1081 #endif
   1082 #if 18<15
   1083 # if __ARM_ARCH__>=7
   1084 	ldr	r2,[r1],#4			@ prefetch
   1085 # else
   1086 	ldrb	r2,[r1,#3]
   1087 # endif
   1088 	eor	r12,r10,r11			@ a^b, b^c in next round
   1089 #else
   1090 	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
   1091 	eor	r12,r10,r11			@ a^b, b^c in next round
   1092 	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
   1093 #endif
   1094 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
   1095 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1096 	add	r5,r5,r9			@ d+=h
   1097 	eor	r3,r3,r11			@ Maj(a,b,c)
   1098 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
   1099 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
   1100 	@ ldr	r2,[sp,#4*4]		@ 19
   1101 	@ ldr	r1,[sp,#1*4]
   1102 	mov	r0,r2,ror#7
   1103 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
   1104 	mov	r3,r1,ror#17
   1105 	eor	r0,r0,r2,ror#18
   1106 	eor	r3,r3,r1,ror#19
   1107 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1108 	ldr	r2,[sp,#3*4]
   1109 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1110 	ldr	r1,[sp,#12*4]
   1111 
   1112 	add	r3,r3,r0
   1113 	eor	r0,r5,r5,ror#5	@ from BODY_00_15
   1114 	add	r2,r2,r3
   1115 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
   1116 	add	r2,r2,r1			@ X[i]
   1117 	ldr	r3,[r14],#4			@ *K256++
   1118 	add	r8,r8,r2			@ h+=X[i]
   1119 	str	r2,[sp,#3*4]
   1120 	eor	r2,r6,r7
   1121 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
   1122 	and	r2,r2,r5
   1123 	add	r8,r8,r3			@ h+=K256[i]
   1124 	eor	r2,r2,r7			@ Ch(e,f,g)
   1125 	eor	r0,r9,r9,ror#11
   1126 	add	r8,r8,r2			@ h+=Ch(e,f,g)
   1127 #if 19==31
   1128 	and	r3,r3,#0xff
   1129 	cmp	r3,#0xf2			@ done?
   1130 #endif
   1131 #if 19<15
   1132 # if __ARM_ARCH__>=7
   1133 	ldr	r2,[r1],#4			@ prefetch
   1134 # else
   1135 	ldrb	r2,[r1,#3]
   1136 # endif
   1137 	eor	r3,r9,r10			@ a^b, b^c in next round
   1138 #else
   1139 	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
   1140 	eor	r3,r9,r10			@ a^b, b^c in next round
   1141 	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
   1142 #endif
   1143 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
   1144 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1145 	add	r4,r4,r8			@ d+=h
   1146 	eor	r12,r12,r10			@ Maj(a,b,c)
   1147 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
   1148 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
   1149 	@ ldr	r2,[sp,#5*4]		@ 20
   1150 	@ ldr	r1,[sp,#2*4]
   1151 	mov	r0,r2,ror#7
   1152 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
   1153 	mov	r12,r1,ror#17
   1154 	eor	r0,r0,r2,ror#18
   1155 	eor	r12,r12,r1,ror#19
   1156 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1157 	ldr	r2,[sp,#4*4]
   1158 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1159 	ldr	r1,[sp,#13*4]
   1160 
   1161 	add	r12,r12,r0
   1162 	eor	r0,r4,r4,ror#5	@ from BODY_00_15
   1163 	add	r2,r2,r12
   1164 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
   1165 	add	r2,r2,r1			@ X[i]
   1166 	ldr	r12,[r14],#4			@ *K256++
   1167 	add	r7,r7,r2			@ h+=X[i]
   1168 	str	r2,[sp,#4*4]
   1169 	eor	r2,r5,r6
   1170 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
   1171 	and	r2,r2,r4
   1172 	add	r7,r7,r12			@ h+=K256[i]
   1173 	eor	r2,r2,r6			@ Ch(e,f,g)
   1174 	eor	r0,r8,r8,ror#11
   1175 	add	r7,r7,r2			@ h+=Ch(e,f,g)
   1176 #if 20==31
   1177 	and	r12,r12,#0xff
   1178 	cmp	r12,#0xf2			@ done?
   1179 #endif
   1180 #if 20<15
   1181 # if __ARM_ARCH__>=7
   1182 	ldr	r2,[r1],#4			@ prefetch
   1183 # else
   1184 	ldrb	r2,[r1,#3]
   1185 # endif
   1186 	eor	r12,r8,r9			@ a^b, b^c in next round
   1187 #else
   1188 	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
   1189 	eor	r12,r8,r9			@ a^b, b^c in next round
   1190 	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
   1191 #endif
   1192 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
   1193 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1194 	add	r11,r11,r7			@ d+=h
   1195 	eor	r3,r3,r9			@ Maj(a,b,c)
   1196 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
   1197 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
   1198 	@ ldr	r2,[sp,#6*4]		@ 21
   1199 	@ ldr	r1,[sp,#3*4]
   1200 	mov	r0,r2,ror#7
   1201 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
   1202 	mov	r3,r1,ror#17
   1203 	eor	r0,r0,r2,ror#18
   1204 	eor	r3,r3,r1,ror#19
   1205 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1206 	ldr	r2,[sp,#5*4]
   1207 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1208 	ldr	r1,[sp,#14*4]
   1209 
   1210 	add	r3,r3,r0
   1211 	eor	r0,r11,r11,ror#5	@ from BODY_00_15
   1212 	add	r2,r2,r3
   1213 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
   1214 	add	r2,r2,r1			@ X[i]
   1215 	ldr	r3,[r14],#4			@ *K256++
   1216 	add	r6,r6,r2			@ h+=X[i]
   1217 	str	r2,[sp,#5*4]
   1218 	eor	r2,r4,r5
   1219 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
   1220 	and	r2,r2,r11
   1221 	add	r6,r6,r3			@ h+=K256[i]
   1222 	eor	r2,r2,r5			@ Ch(e,f,g)
   1223 	eor	r0,r7,r7,ror#11
   1224 	add	r6,r6,r2			@ h+=Ch(e,f,g)
   1225 #if 21==31
   1226 	and	r3,r3,#0xff
   1227 	cmp	r3,#0xf2			@ done?
   1228 #endif
   1229 #if 21<15
   1230 # if __ARM_ARCH__>=7
   1231 	ldr	r2,[r1],#4			@ prefetch
   1232 # else
   1233 	ldrb	r2,[r1,#3]
   1234 # endif
   1235 	eor	r3,r7,r8			@ a^b, b^c in next round
   1236 #else
   1237 	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
   1238 	eor	r3,r7,r8			@ a^b, b^c in next round
   1239 	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
   1240 #endif
   1241 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
   1242 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1243 	add	r10,r10,r6			@ d+=h
   1244 	eor	r12,r12,r8			@ Maj(a,b,c)
   1245 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
   1246 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
   1247 	@ ldr	r2,[sp,#7*4]		@ 22
   1248 	@ ldr	r1,[sp,#4*4]
   1249 	mov	r0,r2,ror#7
   1250 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
   1251 	mov	r12,r1,ror#17
   1252 	eor	r0,r0,r2,ror#18
   1253 	eor	r12,r12,r1,ror#19
   1254 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1255 	ldr	r2,[sp,#6*4]
   1256 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1257 	ldr	r1,[sp,#15*4]
   1258 
   1259 	add	r12,r12,r0
   1260 	eor	r0,r10,r10,ror#5	@ from BODY_00_15
   1261 	add	r2,r2,r12
   1262 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
   1263 	add	r2,r2,r1			@ X[i]
   1264 	ldr	r12,[r14],#4			@ *K256++
   1265 	add	r5,r5,r2			@ h+=X[i]
   1266 	str	r2,[sp,#6*4]
   1267 	eor	r2,r11,r4
   1268 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
   1269 	and	r2,r2,r10
   1270 	add	r5,r5,r12			@ h+=K256[i]
   1271 	eor	r2,r2,r4			@ Ch(e,f,g)
   1272 	eor	r0,r6,r6,ror#11
   1273 	add	r5,r5,r2			@ h+=Ch(e,f,g)
   1274 #if 22==31
   1275 	and	r12,r12,#0xff
   1276 	cmp	r12,#0xf2			@ done?
   1277 #endif
   1278 #if 22<15
   1279 # if __ARM_ARCH__>=7
   1280 	ldr	r2,[r1],#4			@ prefetch
   1281 # else
   1282 	ldrb	r2,[r1,#3]
   1283 # endif
   1284 	eor	r12,r6,r7			@ a^b, b^c in next round
   1285 #else
   1286 	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
   1287 	eor	r12,r6,r7			@ a^b, b^c in next round
   1288 	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
   1289 #endif
   1290 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
   1291 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1292 	add	r9,r9,r5			@ d+=h
   1293 	eor	r3,r3,r7			@ Maj(a,b,c)
   1294 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
   1295 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
   1296 	@ ldr	r2,[sp,#8*4]		@ 23
   1297 	@ ldr	r1,[sp,#5*4]
   1298 	mov	r0,r2,ror#7
   1299 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1300 	mov	r3,r1,ror#17
   1301 	eor	r0,r0,r2,ror#18
   1302 	eor	r3,r3,r1,ror#19
   1303 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1304 	ldr	r2,[sp,#7*4]
   1305 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1306 	ldr	r1,[sp,#0*4]
   1307 
   1308 	add	r3,r3,r0
   1309 	eor	r0,r9,r9,ror#5	@ from BODY_00_15
   1310 	add	r2,r2,r3
   1311 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1312 	add	r2,r2,r1			@ X[i]
   1313 	ldr	r3,[r14],#4			@ *K256++
   1314 	add	r4,r4,r2			@ h+=X[i]
   1315 	str	r2,[sp,#7*4]
   1316 	eor	r2,r10,r11
   1317 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
   1318 	and	r2,r2,r9
   1319 	add	r4,r4,r3			@ h+=K256[i]
   1320 	eor	r2,r2,r11			@ Ch(e,f,g)
   1321 	eor	r0,r5,r5,ror#11
   1322 	add	r4,r4,r2			@ h+=Ch(e,f,g)
   1323 #if 23==31
   1324 	and	r3,r3,#0xff
   1325 	cmp	r3,#0xf2			@ done?
   1326 #endif
   1327 #if 23<15
   1328 # if __ARM_ARCH__>=7
   1329 	ldr	r2,[r1],#4			@ prefetch
   1330 # else
   1331 	ldrb	r2,[r1,#3]
   1332 # endif
   1333 	eor	r3,r5,r6			@ a^b, b^c in next round
   1334 #else
   1335 	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
   1336 	eor	r3,r5,r6			@ a^b, b^c in next round
   1337 	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
   1338 #endif
   1339 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
   1340 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1341 	add	r8,r8,r4			@ d+=h
   1342 	eor	r12,r12,r6			@ Maj(a,b,c)
   1343 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
   1344 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
   1345 	@ ldr	r2,[sp,#9*4]		@ 24
   1346 	@ ldr	r1,[sp,#6*4]
   1347 	mov	r0,r2,ror#7
   1348 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
   1349 	mov	r12,r1,ror#17
   1350 	eor	r0,r0,r2,ror#18
   1351 	eor	r12,r12,r1,ror#19
   1352 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1353 	ldr	r2,[sp,#8*4]
   1354 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1355 	ldr	r1,[sp,#1*4]
   1356 
   1357 	add	r12,r12,r0
   1358 	eor	r0,r8,r8,ror#5	@ from BODY_00_15
   1359 	add	r2,r2,r12
   1360 	eor	r0,r0,r8,ror#19	@ Sigma1(e)
   1361 	add	r2,r2,r1			@ X[i]
   1362 	ldr	r12,[r14],#4			@ *K256++
   1363 	add	r11,r11,r2			@ h+=X[i]
   1364 	str	r2,[sp,#8*4]
   1365 	eor	r2,r9,r10
   1366 	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
   1367 	and	r2,r2,r8
   1368 	add	r11,r11,r12			@ h+=K256[i]
   1369 	eor	r2,r2,r10			@ Ch(e,f,g)
   1370 	eor	r0,r4,r4,ror#11
   1371 	add	r11,r11,r2			@ h+=Ch(e,f,g)
   1372 #if 24==31
   1373 	and	r12,r12,#0xff
   1374 	cmp	r12,#0xf2			@ done?
   1375 #endif
   1376 #if 24<15
   1377 # if __ARM_ARCH__>=7
   1378 	ldr	r2,[r1],#4			@ prefetch
   1379 # else
   1380 	ldrb	r2,[r1,#3]
   1381 # endif
   1382 	eor	r12,r4,r5			@ a^b, b^c in next round
   1383 #else
   1384 	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
   1385 	eor	r12,r4,r5			@ a^b, b^c in next round
   1386 	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
   1387 #endif
   1388 	eor	r0,r0,r4,ror#20	@ Sigma0(a)
   1389 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1390 	add	r7,r7,r11			@ d+=h
   1391 	eor	r3,r3,r5			@ Maj(a,b,c)
   1392 	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
   1393 	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
   1394 	@ ldr	r2,[sp,#10*4]		@ 25
   1395 	@ ldr	r1,[sp,#7*4]
   1396 	mov	r0,r2,ror#7
   1397 	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
   1398 	mov	r3,r1,ror#17
   1399 	eor	r0,r0,r2,ror#18
   1400 	eor	r3,r3,r1,ror#19
   1401 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1402 	ldr	r2,[sp,#9*4]
   1403 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1404 	ldr	r1,[sp,#2*4]
   1405 
   1406 	add	r3,r3,r0
   1407 	eor	r0,r7,r7,ror#5	@ from BODY_00_15
   1408 	add	r2,r2,r3
   1409 	eor	r0,r0,r7,ror#19	@ Sigma1(e)
   1410 	add	r2,r2,r1			@ X[i]
   1411 	ldr	r3,[r14],#4			@ *K256++
   1412 	add	r10,r10,r2			@ h+=X[i]
   1413 	str	r2,[sp,#9*4]
   1414 	eor	r2,r8,r9
   1415 	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
   1416 	and	r2,r2,r7
   1417 	add	r10,r10,r3			@ h+=K256[i]
   1418 	eor	r2,r2,r9			@ Ch(e,f,g)
   1419 	eor	r0,r11,r11,ror#11
   1420 	add	r10,r10,r2			@ h+=Ch(e,f,g)
   1421 #if 25==31
   1422 	and	r3,r3,#0xff
   1423 	cmp	r3,#0xf2			@ done?
   1424 #endif
   1425 #if 25<15
   1426 # if __ARM_ARCH__>=7
   1427 	ldr	r2,[r1],#4			@ prefetch
   1428 # else
   1429 	ldrb	r2,[r1,#3]
   1430 # endif
   1431 	eor	r3,r11,r4			@ a^b, b^c in next round
   1432 #else
   1433 	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
   1434 	eor	r3,r11,r4			@ a^b, b^c in next round
   1435 	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
   1436 #endif
   1437 	eor	r0,r0,r11,ror#20	@ Sigma0(a)
   1438 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1439 	add	r6,r6,r10			@ d+=h
   1440 	eor	r12,r12,r4			@ Maj(a,b,c)
   1441 	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
   1442 	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
   1443 	@ ldr	r2,[sp,#11*4]		@ 26
   1444 	@ ldr	r1,[sp,#8*4]
   1445 	mov	r0,r2,ror#7
   1446 	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
   1447 	mov	r12,r1,ror#17
   1448 	eor	r0,r0,r2,ror#18
   1449 	eor	r12,r12,r1,ror#19
   1450 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1451 	ldr	r2,[sp,#10*4]
   1452 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1453 	ldr	r1,[sp,#3*4]
   1454 
   1455 	add	r12,r12,r0
   1456 	eor	r0,r6,r6,ror#5	@ from BODY_00_15
   1457 	add	r2,r2,r12
   1458 	eor	r0,r0,r6,ror#19	@ Sigma1(e)
   1459 	add	r2,r2,r1			@ X[i]
   1460 	ldr	r12,[r14],#4			@ *K256++
   1461 	add	r9,r9,r2			@ h+=X[i]
   1462 	str	r2,[sp,#10*4]
   1463 	eor	r2,r7,r8
   1464 	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
   1465 	and	r2,r2,r6
   1466 	add	r9,r9,r12			@ h+=K256[i]
   1467 	eor	r2,r2,r8			@ Ch(e,f,g)
   1468 	eor	r0,r10,r10,ror#11
   1469 	add	r9,r9,r2			@ h+=Ch(e,f,g)
   1470 #if 26==31
   1471 	and	r12,r12,#0xff
   1472 	cmp	r12,#0xf2			@ done?
   1473 #endif
   1474 #if 26<15
   1475 # if __ARM_ARCH__>=7
   1476 	ldr	r2,[r1],#4			@ prefetch
   1477 # else
   1478 	ldrb	r2,[r1,#3]
   1479 # endif
   1480 	eor	r12,r10,r11			@ a^b, b^c in next round
   1481 #else
   1482 	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
   1483 	eor	r12,r10,r11			@ a^b, b^c in next round
   1484 	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
   1485 #endif
   1486 	eor	r0,r0,r10,ror#20	@ Sigma0(a)
   1487 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1488 	add	r5,r5,r9			@ d+=h
   1489 	eor	r3,r3,r11			@ Maj(a,b,c)
   1490 	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
   1491 	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
   1492 	@ ldr	r2,[sp,#12*4]		@ 27
   1493 	@ ldr	r1,[sp,#9*4]
   1494 	mov	r0,r2,ror#7
   1495 	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
   1496 	mov	r3,r1,ror#17
   1497 	eor	r0,r0,r2,ror#18
   1498 	eor	r3,r3,r1,ror#19
   1499 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1500 	ldr	r2,[sp,#11*4]
   1501 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1502 	ldr	r1,[sp,#4*4]
   1503 
   1504 	add	r3,r3,r0
   1505 	eor	r0,r5,r5,ror#5	@ from BODY_00_15
   1506 	add	r2,r2,r3
   1507 	eor	r0,r0,r5,ror#19	@ Sigma1(e)
   1508 	add	r2,r2,r1			@ X[i]
   1509 	ldr	r3,[r14],#4			@ *K256++
   1510 	add	r8,r8,r2			@ h+=X[i]
   1511 	str	r2,[sp,#11*4]
   1512 	eor	r2,r6,r7
   1513 	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
   1514 	and	r2,r2,r5
   1515 	add	r8,r8,r3			@ h+=K256[i]
   1516 	eor	r2,r2,r7			@ Ch(e,f,g)
   1517 	eor	r0,r9,r9,ror#11
   1518 	add	r8,r8,r2			@ h+=Ch(e,f,g)
   1519 #if 27==31
   1520 	and	r3,r3,#0xff
   1521 	cmp	r3,#0xf2			@ done?
   1522 #endif
   1523 #if 27<15
   1524 # if __ARM_ARCH__>=7
   1525 	ldr	r2,[r1],#4			@ prefetch
   1526 # else
   1527 	ldrb	r2,[r1,#3]
   1528 # endif
   1529 	eor	r3,r9,r10			@ a^b, b^c in next round
   1530 #else
   1531 	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
   1532 	eor	r3,r9,r10			@ a^b, b^c in next round
   1533 	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
   1534 #endif
   1535 	eor	r0,r0,r9,ror#20	@ Sigma0(a)
   1536 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1537 	add	r4,r4,r8			@ d+=h
   1538 	eor	r12,r12,r10			@ Maj(a,b,c)
   1539 	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
   1540 	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
   1541 	@ ldr	r2,[sp,#13*4]		@ 28
   1542 	@ ldr	r1,[sp,#10*4]
   1543 	mov	r0,r2,ror#7
   1544 	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
   1545 	mov	r12,r1,ror#17
   1546 	eor	r0,r0,r2,ror#18
   1547 	eor	r12,r12,r1,ror#19
   1548 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1549 	ldr	r2,[sp,#12*4]
   1550 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1551 	ldr	r1,[sp,#5*4]
   1552 
   1553 	add	r12,r12,r0
   1554 	eor	r0,r4,r4,ror#5	@ from BODY_00_15
   1555 	add	r2,r2,r12
   1556 	eor	r0,r0,r4,ror#19	@ Sigma1(e)
   1557 	add	r2,r2,r1			@ X[i]
   1558 	ldr	r12,[r14],#4			@ *K256++
   1559 	add	r7,r7,r2			@ h+=X[i]
   1560 	str	r2,[sp,#12*4]
   1561 	eor	r2,r5,r6
   1562 	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
   1563 	and	r2,r2,r4
   1564 	add	r7,r7,r12			@ h+=K256[i]
   1565 	eor	r2,r2,r6			@ Ch(e,f,g)
   1566 	eor	r0,r8,r8,ror#11
   1567 	add	r7,r7,r2			@ h+=Ch(e,f,g)
   1568 #if 28==31
   1569 	and	r12,r12,#0xff
   1570 	cmp	r12,#0xf2			@ done?
   1571 #endif
   1572 #if 28<15
   1573 # if __ARM_ARCH__>=7
   1574 	ldr	r2,[r1],#4			@ prefetch
   1575 # else
   1576 	ldrb	r2,[r1,#3]
   1577 # endif
   1578 	eor	r12,r8,r9			@ a^b, b^c in next round
   1579 #else
   1580 	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
   1581 	eor	r12,r8,r9			@ a^b, b^c in next round
   1582 	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
   1583 #endif
   1584 	eor	r0,r0,r8,ror#20	@ Sigma0(a)
   1585 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1586 	add	r11,r11,r7			@ d+=h
   1587 	eor	r3,r3,r9			@ Maj(a,b,c)
   1588 	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
   1589 	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
   1590 	@ ldr	r2,[sp,#14*4]		@ 29
   1591 	@ ldr	r1,[sp,#11*4]
   1592 	mov	r0,r2,ror#7
   1593 	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
   1594 	mov	r3,r1,ror#17
   1595 	eor	r0,r0,r2,ror#18
   1596 	eor	r3,r3,r1,ror#19
   1597 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1598 	ldr	r2,[sp,#13*4]
   1599 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1600 	ldr	r1,[sp,#6*4]
   1601 
   1602 	add	r3,r3,r0
   1603 	eor	r0,r11,r11,ror#5	@ from BODY_00_15
   1604 	add	r2,r2,r3
   1605 	eor	r0,r0,r11,ror#19	@ Sigma1(e)
   1606 	add	r2,r2,r1			@ X[i]
   1607 	ldr	r3,[r14],#4			@ *K256++
   1608 	add	r6,r6,r2			@ h+=X[i]
   1609 	str	r2,[sp,#13*4]
   1610 	eor	r2,r4,r5
   1611 	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
   1612 	and	r2,r2,r11
   1613 	add	r6,r6,r3			@ h+=K256[i]
   1614 	eor	r2,r2,r5			@ Ch(e,f,g)
   1615 	eor	r0,r7,r7,ror#11
   1616 	add	r6,r6,r2			@ h+=Ch(e,f,g)
   1617 #if 29==31
   1618 	and	r3,r3,#0xff
   1619 	cmp	r3,#0xf2			@ done?
   1620 #endif
   1621 #if 29<15
   1622 # if __ARM_ARCH__>=7
   1623 	ldr	r2,[r1],#4			@ prefetch
   1624 # else
   1625 	ldrb	r2,[r1,#3]
   1626 # endif
   1627 	eor	r3,r7,r8			@ a^b, b^c in next round
   1628 #else
   1629 	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
   1630 	eor	r3,r7,r8			@ a^b, b^c in next round
   1631 	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
   1632 #endif
   1633 	eor	r0,r0,r7,ror#20	@ Sigma0(a)
   1634 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1635 	add	r10,r10,r6			@ d+=h
   1636 	eor	r12,r12,r8			@ Maj(a,b,c)
   1637 	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
   1638 	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
   1639 	@ ldr	r2,[sp,#15*4]		@ 30
   1640 	@ ldr	r1,[sp,#12*4]
   1641 	mov	r0,r2,ror#7
   1642 	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
   1643 	mov	r12,r1,ror#17
   1644 	eor	r0,r0,r2,ror#18
   1645 	eor	r12,r12,r1,ror#19
   1646 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1647 	ldr	r2,[sp,#14*4]
   1648 	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
   1649 	ldr	r1,[sp,#7*4]
   1650 
   1651 	add	r12,r12,r0
   1652 	eor	r0,r10,r10,ror#5	@ from BODY_00_15
   1653 	add	r2,r2,r12
   1654 	eor	r0,r0,r10,ror#19	@ Sigma1(e)
   1655 	add	r2,r2,r1			@ X[i]
   1656 	ldr	r12,[r14],#4			@ *K256++
   1657 	add	r5,r5,r2			@ h+=X[i]
   1658 	str	r2,[sp,#14*4]
   1659 	eor	r2,r11,r4
   1660 	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
   1661 	and	r2,r2,r10
   1662 	add	r5,r5,r12			@ h+=K256[i]
   1663 	eor	r2,r2,r4			@ Ch(e,f,g)
   1664 	eor	r0,r6,r6,ror#11
   1665 	add	r5,r5,r2			@ h+=Ch(e,f,g)
   1666 #if 30==31
   1667 	and	r12,r12,#0xff
   1668 	cmp	r12,#0xf2			@ done?
   1669 #endif
   1670 #if 30<15
   1671 # if __ARM_ARCH__>=7
   1672 	ldr	r2,[r1],#4			@ prefetch
   1673 # else
   1674 	ldrb	r2,[r1,#3]
   1675 # endif
   1676 	eor	r12,r6,r7			@ a^b, b^c in next round
   1677 #else
   1678 	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
   1679 	eor	r12,r6,r7			@ a^b, b^c in next round
   1680 	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
   1681 #endif
   1682 	eor	r0,r0,r6,ror#20	@ Sigma0(a)
   1683 	and	r3,r3,r12			@ (b^c)&=(a^b)
   1684 	add	r9,r9,r5			@ d+=h
   1685 	eor	r3,r3,r7			@ Maj(a,b,c)
   1686 	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
   1687 	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
   1688 	@ ldr	r2,[sp,#0*4]		@ 31
   1689 	@ ldr	r1,[sp,#13*4]
   1690 	mov	r0,r2,ror#7
   1691 	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
   1692 	mov	r3,r1,ror#17
   1693 	eor	r0,r0,r2,ror#18
   1694 	eor	r3,r3,r1,ror#19
   1695 	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
   1696 	ldr	r2,[sp,#15*4]
   1697 	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
   1698 	ldr	r1,[sp,#8*4]
   1699 
   1700 	add	r3,r3,r0
   1701 	eor	r0,r9,r9,ror#5	@ from BODY_00_15
   1702 	add	r2,r2,r3
   1703 	eor	r0,r0,r9,ror#19	@ Sigma1(e)
   1704 	add	r2,r2,r1			@ X[i]
   1705 	ldr	r3,[r14],#4			@ *K256++
   1706 	add	r4,r4,r2			@ h+=X[i]
   1707 	str	r2,[sp,#15*4]
   1708 	eor	r2,r10,r11
   1709 	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
   1710 	and	r2,r2,r9
   1711 	add	r4,r4,r3			@ h+=K256[i]
   1712 	eor	r2,r2,r11			@ Ch(e,f,g)
   1713 	eor	r0,r5,r5,ror#11
   1714 	add	r4,r4,r2			@ h+=Ch(e,f,g)
   1715 #if 31==31
   1716 	and	r3,r3,#0xff
   1717 	cmp	r3,#0xf2			@ done?
   1718 #endif
   1719 #if 31<15
   1720 # if __ARM_ARCH__>=7
   1721 	ldr	r2,[r1],#4			@ prefetch
   1722 # else
   1723 	ldrb	r2,[r1,#3]
   1724 # endif
   1725 	eor	r3,r5,r6			@ a^b, b^c in next round
   1726 #else
   1727 	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
   1728 	eor	r3,r5,r6			@ a^b, b^c in next round
   1729 	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
   1730 #endif
   1731 	eor	r0,r0,r5,ror#20	@ Sigma0(a)
   1732 	and	r12,r12,r3			@ (b^c)&=(a^b)
   1733 	add	r8,r8,r4			@ d+=h
   1734 	eor	r12,r12,r6			@ Maj(a,b,c)
   1735 	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
   1736 	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
   1737 	ldreq	r3,[sp,#16*4]		@ pull ctx
   1738 	bne	.Lrounds_16_xx
   1739 
   1740 	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
   1741 	ldr	r0,[r3,#0]
   1742 	ldr	r2,[r3,#4]
   1743 	ldr	r12,[r3,#8]
   1744 	add	r4,r4,r0
   1745 	ldr	r0,[r3,#12]
   1746 	add	r5,r5,r2
   1747 	ldr	r2,[r3,#16]
   1748 	add	r6,r6,r12
   1749 	ldr	r12,[r3,#20]
   1750 	add	r7,r7,r0
   1751 	ldr	r0,[r3,#24]
   1752 	add	r8,r8,r2
   1753 	ldr	r2,[r3,#28]
   1754 	add	r9,r9,r12
   1755 	ldr	r1,[sp,#17*4]		@ pull inp
   1756 	ldr	r12,[sp,#18*4]		@ pull inp+len
   1757 	add	r10,r10,r0
   1758 	add	r11,r11,r2
   1759 	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
   1760 	cmp	r1,r12
   1761 	sub	r14,r14,#256	@ rewind Ktbl
   1762 	bne	.Loop
   1763 
   1764 	add	sp,sp,#19*4	@ destroy frame
   1765 #if __ARM_ARCH__>=5
   1766 	ldmia	sp!,{r4-r11,pc}
   1767 #else
   1768 	ldmia	sp!,{r4-r11,lr}
   1769 	tst	lr,#1
   1770 	moveq	pc,lr			@ be binary compatible with V4, yet
   1771 	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
   1772 #endif
   1773 .size	sha256_block_data_order,.-sha256_block_data_order
   1774 #if __ARM_ARCH__>=7
   1775 .fpu	neon
   1776 
   1777 .type	sha256_block_data_order_neon,%function
   1778 .align	4
   1779 sha256_block_data_order_neon:
   1780 .LNEON:
   1781 	stmdb	sp!,{r4-r12,lr}
   1782 
   1783 	mov	r12,sp
   1784 	sub	sp,sp,#16*4+16		@ alloca
   1785 	sub	r14,r3,#256+32	@ K256
   1786 	bic	sp,sp,#15		@ align for 128-bit stores
   1787 
   1788 	vld1.8		{q0},[r1]!
   1789 	vld1.8		{q1},[r1]!
   1790 	vld1.8		{q2},[r1]!
   1791 	vld1.8		{q3},[r1]!
   1792 	vld1.32		{q8},[r14,:128]!
   1793 	vld1.32		{q9},[r14,:128]!
   1794 	vld1.32		{q10},[r14,:128]!
   1795 	vld1.32		{q11},[r14,:128]!
   1796 	vrev32.8	q0,q0		@ yes, even on
   1797 	str		r0,[sp,#64]
   1798 	vrev32.8	q1,q1		@ big-endian
   1799 	str		r1,[sp,#68]
   1800 	mov		r1,sp
   1801 	vrev32.8	q2,q2
   1802 	str		r2,[sp,#72]
   1803 	vrev32.8	q3,q3
   1804 	str		r12,[sp,#76]		@ save original sp
   1805 	vadd.i32	q8,q8,q0
   1806 	vadd.i32	q9,q9,q1
   1807 	vst1.32		{q8},[r1,:128]!
   1808 	vadd.i32	q10,q10,q2
   1809 	vst1.32		{q9},[r1,:128]!
   1810 	vadd.i32	q11,q11,q3
   1811 	vst1.32		{q10},[r1,:128]!
   1812 	vst1.32		{q11},[r1,:128]!
   1813 
   1814 	ldmia		r0,{r4-r11}
   1815 	sub		r1,r1,#64
   1816 	ldr		r2,[sp,#0]
   1817 	eor		r12,r12,r12
   1818 	eor		r3,r5,r6
   1819 	b		.L_00_48
   1820 
   1821 .align	4
   1822 .L_00_48:
   1823 	vext.8	q8,q0,q1,#4
   1824 	add	r11,r11,r2
   1825 	eor	r2,r9,r10
   1826 	eor	r0,r8,r8,ror#5
   1827 	vext.8	q9,q2,q3,#4
   1828 	add	r4,r4,r12
   1829 	and	r2,r2,r8
   1830 	eor	r12,r0,r8,ror#19
   1831 	vshr.u32	q10,q8,#7
   1832 	eor	r0,r4,r4,ror#11
   1833 	eor	r2,r2,r10
   1834 	vadd.i32	q0,q0,q9
   1835 	add	r11,r11,r12,ror#6
   1836 	eor	r12,r4,r5
   1837 	vshr.u32	q9,q8,#3
   1838 	eor	r0,r0,r4,ror#20
   1839 	add	r11,r11,r2
   1840 	vsli.32	q10,q8,#25
   1841 	ldr	r2,[sp,#4]
   1842 	and	r3,r3,r12
   1843 	vshr.u32	q11,q8,#18
   1844 	add	r7,r7,r11
   1845 	add	r11,r11,r0,ror#2
   1846 	eor	r3,r3,r5
   1847 	veor	q9,q9,q10
   1848 	add	r10,r10,r2
   1849 	vsli.32	q11,q8,#14
   1850 	eor	r2,r8,r9
   1851 	eor	r0,r7,r7,ror#5
   1852 	vshr.u32	d24,d7,#17
   1853 	add	r11,r11,r3
   1854 	and	r2,r2,r7
   1855 	veor	q9,q9,q11
   1856 	eor	r3,r0,r7,ror#19
   1857 	eor	r0,r11,r11,ror#11
   1858 	vsli.32	d24,d7,#15
   1859 	eor	r2,r2,r9
   1860 	add	r10,r10,r3,ror#6
   1861 	vshr.u32	d25,d7,#10
   1862 	eor	r3,r11,r4
   1863 	eor	r0,r0,r11,ror#20
   1864 	vadd.i32	q0,q0,q9
   1865 	add	r10,r10,r2
   1866 	ldr	r2,[sp,#8]
   1867 	veor	d25,d25,d24
   1868 	and	r12,r12,r3
   1869 	add	r6,r6,r10
   1870 	vshr.u32	d24,d7,#19
   1871 	add	r10,r10,r0,ror#2
   1872 	eor	r12,r12,r4
   1873 	vsli.32	d24,d7,#13
   1874 	add	r9,r9,r2
   1875 	eor	r2,r7,r8
   1876 	veor	d25,d25,d24
   1877 	eor	r0,r6,r6,ror#5
   1878 	add	r10,r10,r12
   1879 	vadd.i32	d0,d0,d25
   1880 	and	r2,r2,r6
   1881 	eor	r12,r0,r6,ror#19
   1882 	vshr.u32	d24,d0,#17
   1883 	eor	r0,r10,r10,ror#11
   1884 	eor	r2,r2,r8
   1885 	vsli.32	d24,d0,#15
   1886 	add	r9,r9,r12,ror#6
   1887 	eor	r12,r10,r11
   1888 	vshr.u32	d25,d0,#10
   1889 	eor	r0,r0,r10,ror#20
   1890 	add	r9,r9,r2
   1891 	veor	d25,d25,d24
   1892 	ldr	r2,[sp,#12]
   1893 	and	r3,r3,r12
   1894 	vshr.u32	d24,d0,#19
   1895 	add	r5,r5,r9
   1896 	add	r9,r9,r0,ror#2
   1897 	eor	r3,r3,r11
   1898 	vld1.32	{q8},[r14,:128]!
   1899 	add	r8,r8,r2
   1900 	vsli.32	d24,d0,#13
   1901 	eor	r2,r6,r7
   1902 	eor	r0,r5,r5,ror#5
   1903 	veor	d25,d25,d24
   1904 	add	r9,r9,r3
   1905 	and	r2,r2,r5
   1906 	vadd.i32	d1,d1,d25
   1907 	eor	r3,r0,r5,ror#19
   1908 	eor	r0,r9,r9,ror#11
   1909 	vadd.i32	q8,q8,q0
   1910 	eor	r2,r2,r7
   1911 	add	r8,r8,r3,ror#6
   1912 	eor	r3,r9,r10
   1913 	eor	r0,r0,r9,ror#20
   1914 	add	r8,r8,r2
   1915 	ldr	r2,[sp,#16]
   1916 	and	r12,r12,r3
   1917 	add	r4,r4,r8
   1918 	vst1.32	{q8},[r1,:128]!
   1919 	add	r8,r8,r0,ror#2
   1920 	eor	r12,r12,r10
   1921 	vext.8	q8,q1,q2,#4
   1922 	add	r7,r7,r2
   1923 	eor	r2,r5,r6
   1924 	eor	r0,r4,r4,ror#5
   1925 	vext.8	q9,q3,q0,#4
   1926 	add	r8,r8,r12
   1927 	and	r2,r2,r4
   1928 	eor	r12,r0,r4,ror#19
   1929 	vshr.u32	q10,q8,#7
   1930 	eor	r0,r8,r8,ror#11
   1931 	eor	r2,r2,r6
   1932 	vadd.i32	q1,q1,q9
   1933 	add	r7,r7,r12,ror#6
   1934 	eor	r12,r8,r9
   1935 	vshr.u32	q9,q8,#3
   1936 	eor	r0,r0,r8,ror#20
   1937 	add	r7,r7,r2
   1938 	vsli.32	q10,q8,#25
   1939 	ldr	r2,[sp,#20]
   1940 	and	r3,r3,r12
   1941 	vshr.u32	q11,q8,#18
   1942 	add	r11,r11,r7
   1943 	add	r7,r7,r0,ror#2
   1944 	eor	r3,r3,r9
   1945 	veor	q9,q9,q10
   1946 	add	r6,r6,r2
   1947 	vsli.32	q11,q8,#14
   1948 	eor	r2,r4,r5
   1949 	eor	r0,r11,r11,ror#5
   1950 	vshr.u32	d24,d1,#17
   1951 	add	r7,r7,r3
   1952 	and	r2,r2,r11
   1953 	veor	q9,q9,q11
   1954 	eor	r3,r0,r11,ror#19
   1955 	eor	r0,r7,r7,ror#11
   1956 	vsli.32	d24,d1,#15
   1957 	eor	r2,r2,r5
   1958 	add	r6,r6,r3,ror#6
   1959 	vshr.u32	d25,d1,#10
   1960 	eor	r3,r7,r8
   1961 	eor	r0,r0,r7,ror#20
   1962 	vadd.i32	q1,q1,q9
   1963 	add	r6,r6,r2
   1964 	ldr	r2,[sp,#24]
   1965 	veor	d25,d25,d24
   1966 	and	r12,r12,r3
   1967 	add	r10,r10,r6
   1968 	vshr.u32	d24,d1,#19
   1969 	add	r6,r6,r0,ror#2
   1970 	eor	r12,r12,r8
   1971 	vsli.32	d24,d1,#13
   1972 	add	r5,r5,r2
   1973 	eor	r2,r11,r4
   1974 	veor	d25,d25,d24
   1975 	eor	r0,r10,r10,ror#5
   1976 	add	r6,r6,r12
   1977 	vadd.i32	d2,d2,d25
   1978 	and	r2,r2,r10
   1979 	eor	r12,r0,r10,ror#19
   1980 	vshr.u32	d24,d2,#17
   1981 	eor	r0,r6,r6,ror#11
   1982 	eor	r2,r2,r4
   1983 	vsli.32	d24,d2,#15
   1984 	add	r5,r5,r12,ror#6
   1985 	eor	r12,r6,r7
   1986 	vshr.u32	d25,d2,#10
   1987 	eor	r0,r0,r6,ror#20
   1988 	add	r5,r5,r2
   1989 	veor	d25,d25,d24
   1990 	ldr	r2,[sp,#28]
   1991 	and	r3,r3,r12
   1992 	vshr.u32	d24,d2,#19
   1993 	add	r9,r9,r5
   1994 	add	r5,r5,r0,ror#2
   1995 	eor	r3,r3,r7
   1996 	vld1.32	{q8},[r14,:128]!
   1997 	add	r4,r4,r2
   1998 	vsli.32	d24,d2,#13
   1999 	eor	r2,r10,r11
   2000 	eor	r0,r9,r9,ror#5
   2001 	veor	d25,d25,d24
   2002 	add	r5,r5,r3
   2003 	and	r2,r2,r9
   2004 	vadd.i32	d3,d3,d25
   2005 	eor	r3,r0,r9,ror#19
   2006 	eor	r0,r5,r5,ror#11
   2007 	vadd.i32	q8,q8,q1
   2008 	eor	r2,r2,r11
   2009 	add	r4,r4,r3,ror#6
   2010 	eor	r3,r5,r6
   2011 	eor	r0,r0,r5,ror#20
   2012 	add	r4,r4,r2
   2013 	ldr	r2,[sp,#32]
   2014 	and	r12,r12,r3
   2015 	add	r8,r8,r4
   2016 	vst1.32	{q8},[r1,:128]!
   2017 	add	r4,r4,r0,ror#2
   2018 	eor	r12,r12,r6
   2019 	vext.8	q8,q2,q3,#4
   2020 	add	r11,r11,r2
   2021 	eor	r2,r9,r10
   2022 	eor	r0,r8,r8,ror#5
   2023 	vext.8	q9,q0,q1,#4
   2024 	add	r4,r4,r12
   2025 	and	r2,r2,r8
   2026 	eor	r12,r0,r8,ror#19
   2027 	vshr.u32	q10,q8,#7
   2028 	eor	r0,r4,r4,ror#11
   2029 	eor	r2,r2,r10
   2030 	vadd.i32	q2,q2,q9
   2031 	add	r11,r11,r12,ror#6
   2032 	eor	r12,r4,r5
   2033 	vshr.u32	q9,q8,#3
   2034 	eor	r0,r0,r4,ror#20
   2035 	add	r11,r11,r2
   2036 	vsli.32	q10,q8,#25
   2037 	ldr	r2,[sp,#36]
   2038 	and	r3,r3,r12
   2039 	vshr.u32	q11,q8,#18
   2040 	add	r7,r7,r11
   2041 	add	r11,r11,r0,ror#2
   2042 	eor	r3,r3,r5
   2043 	veor	q9,q9,q10
   2044 	add	r10,r10,r2
   2045 	vsli.32	q11,q8,#14
   2046 	eor	r2,r8,r9
   2047 	eor	r0,r7,r7,ror#5
   2048 	vshr.u32	d24,d3,#17
   2049 	add	r11,r11,r3
   2050 	and	r2,r2,r7
   2051 	veor	q9,q9,q11
   2052 	eor	r3,r0,r7,ror#19
   2053 	eor	r0,r11,r11,ror#11
   2054 	vsli.32	d24,d3,#15
   2055 	eor	r2,r2,r9
   2056 	add	r10,r10,r3,ror#6
   2057 	vshr.u32	d25,d3,#10
   2058 	eor	r3,r11,r4
   2059 	eor	r0,r0,r11,ror#20
   2060 	vadd.i32	q2,q2,q9
   2061 	add	r10,r10,r2
   2062 	ldr	r2,[sp,#40]
   2063 	veor	d25,d25,d24
   2064 	and	r12,r12,r3
   2065 	add	r6,r6,r10
   2066 	vshr.u32	d24,d3,#19
   2067 	add	r10,r10,r0,ror#2
   2068 	eor	r12,r12,r4
   2069 	vsli.32	d24,d3,#13
   2070 	add	r9,r9,r2
   2071 	eor	r2,r7,r8
   2072 	veor	d25,d25,d24
   2073 	eor	r0,r6,r6,ror#5
   2074 	add	r10,r10,r12
   2075 	vadd.i32	d4,d4,d25
   2076 	and	r2,r2,r6
   2077 	eor	r12,r0,r6,ror#19
   2078 	vshr.u32	d24,d4,#17
   2079 	eor	r0,r10,r10,ror#11
   2080 	eor	r2,r2,r8
   2081 	vsli.32	d24,d4,#15
   2082 	add	r9,r9,r12,ror#6
   2083 	eor	r12,r10,r11
   2084 	vshr.u32	d25,d4,#10
   2085 	eor	r0,r0,r10,ror#20
   2086 	add	r9,r9,r2
   2087 	veor	d25,d25,d24
   2088 	ldr	r2,[sp,#44]
   2089 	and	r3,r3,r12
   2090 	vshr.u32	d24,d4,#19
   2091 	add	r5,r5,r9
   2092 	add	r9,r9,r0,ror#2
   2093 	eor	r3,r3,r11
   2094 	vld1.32	{q8},[r14,:128]!
   2095 	add	r8,r8,r2
   2096 	vsli.32	d24,d4,#13
   2097 	eor	r2,r6,r7
   2098 	eor	r0,r5,r5,ror#5
   2099 	veor	d25,d25,d24
   2100 	add	r9,r9,r3
   2101 	and	r2,r2,r5
   2102 	vadd.i32	d5,d5,d25
   2103 	eor	r3,r0,r5,ror#19
   2104 	eor	r0,r9,r9,ror#11
   2105 	vadd.i32	q8,q8,q2
   2106 	eor	r2,r2,r7
   2107 	add	r8,r8,r3,ror#6
   2108 	eor	r3,r9,r10
   2109 	eor	r0,r0,r9,ror#20
   2110 	add	r8,r8,r2
   2111 	ldr	r2,[sp,#48]
   2112 	and	r12,r12,r3
   2113 	add	r4,r4,r8
   2114 	vst1.32	{q8},[r1,:128]!
   2115 	add	r8,r8,r0,ror#2
   2116 	eor	r12,r12,r10
   2117 	vext.8	q8,q3,q0,#4
   2118 	add	r7,r7,r2
   2119 	eor	r2,r5,r6
   2120 	eor	r0,r4,r4,ror#5
   2121 	vext.8	q9,q1,q2,#4
   2122 	add	r8,r8,r12
   2123 	and	r2,r2,r4
   2124 	eor	r12,r0,r4,ror#19
   2125 	vshr.u32	q10,q8,#7
   2126 	eor	r0,r8,r8,ror#11
   2127 	eor	r2,r2,r6
   2128 	vadd.i32	q3,q3,q9
   2129 	add	r7,r7,r12,ror#6
   2130 	eor	r12,r8,r9
   2131 	vshr.u32	q9,q8,#3
   2132 	eor	r0,r0,r8,ror#20
   2133 	add	r7,r7,r2
   2134 	vsli.32	q10,q8,#25
   2135 	ldr	r2,[sp,#52]
   2136 	and	r3,r3,r12
   2137 	vshr.u32	q11,q8,#18
   2138 	add	r11,r11,r7
   2139 	add	r7,r7,r0,ror#2
   2140 	eor	r3,r3,r9
   2141 	veor	q9,q9,q10
   2142 	add	r6,r6,r2
   2143 	vsli.32	q11,q8,#14
   2144 	eor	r2,r4,r5
   2145 	eor	r0,r11,r11,ror#5
   2146 	vshr.u32	d24,d5,#17
   2147 	add	r7,r7,r3
   2148 	and	r2,r2,r11
   2149 	veor	q9,q9,q11
   2150 	eor	r3,r0,r11,ror#19
   2151 	eor	r0,r7,r7,ror#11
   2152 	vsli.32	d24,d5,#15
   2153 	eor	r2,r2,r5
   2154 	add	r6,r6,r3,ror#6
   2155 	vshr.u32	d25,d5,#10
   2156 	eor	r3,r7,r8
   2157 	eor	r0,r0,r7,ror#20
   2158 	vadd.i32	q3,q3,q9
   2159 	add	r6,r6,r2
   2160 	ldr	r2,[sp,#56]
   2161 	veor	d25,d25,d24
   2162 	and	r12,r12,r3
   2163 	add	r10,r10,r6
   2164 	vshr.u32	d24,d5,#19
   2165 	add	r6,r6,r0,ror#2
   2166 	eor	r12,r12,r8
   2167 	vsli.32	d24,d5,#13
   2168 	add	r5,r5,r2
   2169 	eor	r2,r11,r4
   2170 	veor	d25,d25,d24
   2171 	eor	r0,r10,r10,ror#5
   2172 	add	r6,r6,r12
   2173 	vadd.i32	d6,d6,d25
   2174 	and	r2,r2,r10
   2175 	eor	r12,r0,r10,ror#19
   2176 	vshr.u32	d24,d6,#17
   2177 	eor	r0,r6,r6,ror#11
   2178 	eor	r2,r2,r4
   2179 	vsli.32	d24,d6,#15
   2180 	add	r5,r5,r12,ror#6
   2181 	eor	r12,r6,r7
   2182 	vshr.u32	d25,d6,#10
   2183 	eor	r0,r0,r6,ror#20
   2184 	add	r5,r5,r2
   2185 	veor	d25,d25,d24
   2186 	ldr	r2,[sp,#60]
   2187 	and	r3,r3,r12
   2188 	vshr.u32	d24,d6,#19
   2189 	add	r9,r9,r5
   2190 	add	r5,r5,r0,ror#2
   2191 	eor	r3,r3,r7
   2192 	vld1.32	{q8},[r14,:128]!
   2193 	add	r4,r4,r2
   2194 	vsli.32	d24,d6,#13
   2195 	eor	r2,r10,r11
   2196 	eor	r0,r9,r9,ror#5
   2197 	veor	d25,d25,d24
   2198 	add	r5,r5,r3
   2199 	and	r2,r2,r9
   2200 	vadd.i32	d7,d7,d25
   2201 	eor	r3,r0,r9,ror#19
   2202 	eor	r0,r5,r5,ror#11
   2203 	vadd.i32	q8,q8,q3
   2204 	eor	r2,r2,r11
   2205 	add	r4,r4,r3,ror#6
   2206 	eor	r3,r5,r6
   2207 	eor	r0,r0,r5,ror#20
   2208 	add	r4,r4,r2
   2209 	ldr	r2,[r14]
   2210 	and	r12,r12,r3
   2211 	add	r8,r8,r4
   2212 	vst1.32	{q8},[r1,:128]!
   2213 	add	r4,r4,r0,ror#2
   2214 	eor	r12,r12,r6
   2215 	teq	r2,#0				@ check for K256 terminator
   2216 	ldr	r2,[sp,#0]
   2217 	sub	r1,r1,#64
   2218 	bne	.L_00_48
   2219 
   2220 	ldr		r1,[sp,#68]
   2221 	ldr		r0,[sp,#72]
   2222 	sub		r14,r14,#256	@ rewind r14
   2223 	teq		r1,r0
   2224 	subeq		r1,r1,#64		@ avoid SEGV
   2225 	vld1.8		{q0},[r1]!		@ load next input block
   2226 	vld1.8		{q1},[r1]!
   2227 	vld1.8		{q2},[r1]!
   2228 	vld1.8		{q3},[r1]!
   2229 	strne		r1,[sp,#68]
   2230 	mov		r1,sp
   2231 	add	r11,r11,r2
   2232 	eor	r2,r9,r10
   2233 	eor	r0,r8,r8,ror#5
   2234 	add	r4,r4,r12
   2235 	vld1.32	{q8},[r14,:128]!
   2236 	and	r2,r2,r8
   2237 	eor	r12,r0,r8,ror#19
   2238 	eor	r0,r4,r4,ror#11
   2239 	eor	r2,r2,r10
   2240 	vrev32.8	q0,q0
   2241 	add	r11,r11,r12,ror#6
   2242 	eor	r12,r4,r5
   2243 	eor	r0,r0,r4,ror#20
   2244 	add	r11,r11,r2
   2245 	vadd.i32	q8,q8,q0
   2246 	ldr	r2,[sp,#4]
   2247 	and	r3,r3,r12
   2248 	add	r7,r7,r11
   2249 	add	r11,r11,r0,ror#2
   2250 	eor	r3,r3,r5
   2251 	add	r10,r10,r2
   2252 	eor	r2,r8,r9
   2253 	eor	r0,r7,r7,ror#5
   2254 	add	r11,r11,r3
   2255 	and	r2,r2,r7
   2256 	eor	r3,r0,r7,ror#19
   2257 	eor	r0,r11,r11,ror#11
   2258 	eor	r2,r2,r9
   2259 	add	r10,r10,r3,ror#6
   2260 	eor	r3,r11,r4
   2261 	eor	r0,r0,r11,ror#20
   2262 	add	r10,r10,r2
   2263 	ldr	r2,[sp,#8]
   2264 	and	r12,r12,r3
   2265 	add	r6,r6,r10
   2266 	add	r10,r10,r0,ror#2
   2267 	eor	r12,r12,r4
   2268 	add	r9,r9,r2
   2269 	eor	r2,r7,r8
   2270 	eor	r0,r6,r6,ror#5
   2271 	add	r10,r10,r12
   2272 	and	r2,r2,r6
   2273 	eor	r12,r0,r6,ror#19
   2274 	eor	r0,r10,r10,ror#11
   2275 	eor	r2,r2,r8
   2276 	add	r9,r9,r12,ror#6
   2277 	eor	r12,r10,r11
   2278 	eor	r0,r0,r10,ror#20
   2279 	add	r9,r9,r2
   2280 	ldr	r2,[sp,#12]
   2281 	and	r3,r3,r12
   2282 	add	r5,r5,r9
   2283 	add	r9,r9,r0,ror#2
   2284 	eor	r3,r3,r11
   2285 	add	r8,r8,r2
   2286 	eor	r2,r6,r7
   2287 	eor	r0,r5,r5,ror#5
   2288 	add	r9,r9,r3
   2289 	and	r2,r2,r5
   2290 	eor	r3,r0,r5,ror#19
   2291 	eor	r0,r9,r9,ror#11
   2292 	eor	r2,r2,r7
   2293 	add	r8,r8,r3,ror#6
   2294 	eor	r3,r9,r10
   2295 	eor	r0,r0,r9,ror#20
   2296 	add	r8,r8,r2
   2297 	ldr	r2,[sp,#16]
   2298 	and	r12,r12,r3
   2299 	add	r4,r4,r8
   2300 	add	r8,r8,r0,ror#2
   2301 	eor	r12,r12,r10
   2302 	vst1.32	{q8},[r1,:128]!
   2303 	add	r7,r7,r2
   2304 	eor	r2,r5,r6
   2305 	eor	r0,r4,r4,ror#5
   2306 	add	r8,r8,r12
   2307 	vld1.32	{q8},[r14,:128]!
   2308 	and	r2,r2,r4
   2309 	eor	r12,r0,r4,ror#19
   2310 	eor	r0,r8,r8,ror#11
   2311 	eor	r2,r2,r6
   2312 	vrev32.8	q1,q1
   2313 	add	r7,r7,r12,ror#6
   2314 	eor	r12,r8,r9
   2315 	eor	r0,r0,r8,ror#20
   2316 	add	r7,r7,r2
   2317 	vadd.i32	q8,q8,q1
   2318 	ldr	r2,[sp,#20]
   2319 	and	r3,r3,r12
   2320 	add	r11,r11,r7
   2321 	add	r7,r7,r0,ror#2
   2322 	eor	r3,r3,r9
   2323 	add	r6,r6,r2
   2324 	eor	r2,r4,r5
   2325 	eor	r0,r11,r11,ror#5
   2326 	add	r7,r7,r3
   2327 	and	r2,r2,r11
   2328 	eor	r3,r0,r11,ror#19
   2329 	eor	r0,r7,r7,ror#11
   2330 	eor	r2,r2,r5
   2331 	add	r6,r6,r3,ror#6
   2332 	eor	r3,r7,r8
   2333 	eor	r0,r0,r7,ror#20
   2334 	add	r6,r6,r2
   2335 	ldr	r2,[sp,#24]
   2336 	and	r12,r12,r3
   2337 	add	r10,r10,r6
   2338 	add	r6,r6,r0,ror#2
   2339 	eor	r12,r12,r8
   2340 	add	r5,r5,r2
   2341 	eor	r2,r11,r4
   2342 	eor	r0,r10,r10,ror#5
   2343 	add	r6,r6,r12
   2344 	and	r2,r2,r10
   2345 	eor	r12,r0,r10,ror#19
   2346 	eor	r0,r6,r6,ror#11
   2347 	eor	r2,r2,r4
   2348 	add	r5,r5,r12,ror#6
   2349 	eor	r12,r6,r7
   2350 	eor	r0,r0,r6,ror#20
   2351 	add	r5,r5,r2
   2352 	ldr	r2,[sp,#28]
   2353 	and	r3,r3,r12
   2354 	add	r9,r9,r5
   2355 	add	r5,r5,r0,ror#2
   2356 	eor	r3,r3,r7
   2357 	add	r4,r4,r2
   2358 	eor	r2,r10,r11
   2359 	eor	r0,r9,r9,ror#5
   2360 	add	r5,r5,r3
   2361 	and	r2,r2,r9
   2362 	eor	r3,r0,r9,ror#19
   2363 	eor	r0,r5,r5,ror#11
   2364 	eor	r2,r2,r11
   2365 	add	r4,r4,r3,ror#6
   2366 	eor	r3,r5,r6
   2367 	eor	r0,r0,r5,ror#20
   2368 	add	r4,r4,r2
   2369 	ldr	r2,[sp,#32]
   2370 	and	r12,r12,r3
   2371 	add	r8,r8,r4
   2372 	add	r4,r4,r0,ror#2
   2373 	eor	r12,r12,r6
   2374 	vst1.32	{q8},[r1,:128]!
   2375 	add	r11,r11,r2
   2376 	eor	r2,r9,r10
   2377 	eor	r0,r8,r8,ror#5
   2378 	add	r4,r4,r12
   2379 	vld1.32	{q8},[r14,:128]!
   2380 	and	r2,r2,r8
   2381 	eor	r12,r0,r8,ror#19
   2382 	eor	r0,r4,r4,ror#11
   2383 	eor	r2,r2,r10
   2384 	vrev32.8	q2,q2
   2385 	add	r11,r11,r12,ror#6
   2386 	eor	r12,r4,r5
   2387 	eor	r0,r0,r4,ror#20
   2388 	add	r11,r11,r2
   2389 	vadd.i32	q8,q8,q2
   2390 	ldr	r2,[sp,#36]
   2391 	and	r3,r3,r12
   2392 	add	r7,r7,r11
   2393 	add	r11,r11,r0,ror#2
   2394 	eor	r3,r3,r5
   2395 	add	r10,r10,r2
   2396 	eor	r2,r8,r9
   2397 	eor	r0,r7,r7,ror#5
   2398 	add	r11,r11,r3
   2399 	and	r2,r2,r7
   2400 	eor	r3,r0,r7,ror#19
   2401 	eor	r0,r11,r11,ror#11
   2402 	eor	r2,r2,r9
   2403 	add	r10,r10,r3,ror#6
   2404 	eor	r3,r11,r4
   2405 	eor	r0,r0,r11,ror#20
   2406 	add	r10,r10,r2
   2407 	ldr	r2,[sp,#40]
   2408 	and	r12,r12,r3
   2409 	add	r6,r6,r10
   2410 	add	r10,r10,r0,ror#2
   2411 	eor	r12,r12,r4
   2412 	add	r9,r9,r2
   2413 	eor	r2,r7,r8
   2414 	eor	r0,r6,r6,ror#5
   2415 	add	r10,r10,r12
   2416 	and	r2,r2,r6
   2417 	eor	r12,r0,r6,ror#19
   2418 	eor	r0,r10,r10,ror#11
   2419 	eor	r2,r2,r8
   2420 	add	r9,r9,r12,ror#6
   2421 	eor	r12,r10,r11
   2422 	eor	r0,r0,r10,ror#20
   2423 	add	r9,r9,r2
   2424 	ldr	r2,[sp,#44]
   2425 	and	r3,r3,r12
   2426 	add	r5,r5,r9
   2427 	add	r9,r9,r0,ror#2
   2428 	eor	r3,r3,r11
   2429 	add	r8,r8,r2
   2430 	eor	r2,r6,r7
   2431 	eor	r0,r5,r5,ror#5
   2432 	add	r9,r9,r3
   2433 	and	r2,r2,r5
   2434 	eor	r3,r0,r5,ror#19
   2435 	eor	r0,r9,r9,ror#11
   2436 	eor	r2,r2,r7
   2437 	add	r8,r8,r3,ror#6
   2438 	eor	r3,r9,r10
   2439 	eor	r0,r0,r9,ror#20
   2440 	add	r8,r8,r2
   2441 	ldr	r2,[sp,#48]
   2442 	and	r12,r12,r3
   2443 	add	r4,r4,r8
   2444 	add	r8,r8,r0,ror#2
   2445 	eor	r12,r12,r10
   2446 	vst1.32	{q8},[r1,:128]!
   2447 	add	r7,r7,r2
   2448 	eor	r2,r5,r6
   2449 	eor	r0,r4,r4,ror#5
   2450 	add	r8,r8,r12
   2451 	vld1.32	{q8},[r14,:128]!
   2452 	and	r2,r2,r4
   2453 	eor	r12,r0,r4,ror#19
   2454 	eor	r0,r8,r8,ror#11
   2455 	eor	r2,r2,r6
   2456 	vrev32.8	q3,q3
   2457 	add	r7,r7,r12,ror#6
   2458 	eor	r12,r8,r9
   2459 	eor	r0,r0,r8,ror#20
   2460 	add	r7,r7,r2
   2461 	vadd.i32	q8,q8,q3
   2462 	ldr	r2,[sp,#52]
   2463 	and	r3,r3,r12
   2464 	add	r11,r11,r7
   2465 	add	r7,r7,r0,ror#2
   2466 	eor	r3,r3,r9
   2467 	add	r6,r6,r2
   2468 	eor	r2,r4,r5
   2469 	eor	r0,r11,r11,ror#5
   2470 	add	r7,r7,r3
   2471 	and	r2,r2,r11
   2472 	eor	r3,r0,r11,ror#19
   2473 	eor	r0,r7,r7,ror#11
   2474 	eor	r2,r2,r5
   2475 	add	r6,r6,r3,ror#6
   2476 	eor	r3,r7,r8
   2477 	eor	r0,r0,r7,ror#20
   2478 	add	r6,r6,r2
   2479 	ldr	r2,[sp,#56]
   2480 	and	r12,r12,r3
   2481 	add	r10,r10,r6
   2482 	add	r6,r6,r0,ror#2
   2483 	eor	r12,r12,r8
   2484 	add	r5,r5,r2
   2485 	eor	r2,r11,r4
   2486 	eor	r0,r10,r10,ror#5
   2487 	add	r6,r6,r12
   2488 	and	r2,r2,r10
   2489 	eor	r12,r0,r10,ror#19
   2490 	eor	r0,r6,r6,ror#11
   2491 	eor	r2,r2,r4
   2492 	add	r5,r5,r12,ror#6
   2493 	eor	r12,r6,r7
   2494 	eor	r0,r0,r6,ror#20
   2495 	add	r5,r5,r2
   2496 	ldr	r2,[sp,#60]
   2497 	and	r3,r3,r12
   2498 	add	r9,r9,r5
   2499 	add	r5,r5,r0,ror#2
   2500 	eor	r3,r3,r7
   2501 	add	r4,r4,r2
   2502 	eor	r2,r10,r11
   2503 	eor	r0,r9,r9,ror#5
   2504 	add	r5,r5,r3
   2505 	and	r2,r2,r9
   2506 	eor	r3,r0,r9,ror#19
   2507 	eor	r0,r5,r5,ror#11
   2508 	eor	r2,r2,r11
   2509 	add	r4,r4,r3,ror#6
   2510 	eor	r3,r5,r6
   2511 	eor	r0,r0,r5,ror#20
   2512 	add	r4,r4,r2
   2513 	ldr	r2,[sp,#64]
   2514 	and	r12,r12,r3
   2515 	add	r8,r8,r4
   2516 	add	r4,r4,r0,ror#2
   2517 	eor	r12,r12,r6
   2518 	vst1.32	{q8},[r1,:128]!
   2519 	ldr	r0,[r2,#0]
   2520 	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
   2521 	ldr	r12,[r2,#4]
   2522 	ldr	r3,[r2,#8]
   2523 	ldr	r1,[r2,#12]
   2524 	add	r4,r4,r0			@ accumulate
   2525 	ldr	r0,[r2,#16]
   2526 	add	r5,r5,r12
   2527 	ldr	r12,[r2,#20]
   2528 	add	r6,r6,r3
   2529 	ldr	r3,[r2,#24]
   2530 	add	r7,r7,r1
   2531 	ldr	r1,[r2,#28]
   2532 	add	r8,r8,r0
   2533 	str	r4,[r2],#4
   2534 	add	r9,r9,r12
   2535 	str	r5,[r2],#4
   2536 	add	r10,r10,r3
   2537 	str	r6,[r2],#4
   2538 	add	r11,r11,r1
   2539 	str	r7,[r2],#4
   2540 	stmia	r2,{r8-r11}
   2541 
   2542 	movne	r1,sp
   2543 	ldrne	r2,[sp,#0]
   2544 	eorne	r12,r12,r12
   2545 	ldreq	sp,[sp,#76]			@ restore original sp
   2546 	eorne	r3,r5,r6
   2547 	bne	.L_00_48
   2548 
   2549 	ldmia	sp!,{r4-r12,pc}
   2550 .size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
   2551 #endif
   2552 #if __ARM_ARCH__>=7
   2553 .type	sha256_block_data_order_armv8,%function
   2554 .align	5
   2555 sha256_block_data_order_armv8:
   2556 .LARMv8:
   2557 	vld1.32	{q0,q1},[r0]
   2558 	sub	r3,r3,#sha256_block_data_order-K256
   2559 
   2560 .Loop_v8:
   2561 	vld1.8		{q8-q9},[r1]!
   2562 	vld1.8		{q10-q11},[r1]!
   2563 	vld1.32		{q12},[r3]!
   2564 	vrev32.8	q8,q8
   2565 	vrev32.8	q9,q9
   2566 	vrev32.8	q10,q10
   2567 	vrev32.8	q11,q11
   2568 	vmov		q14,q0	@ offload
   2569 	vmov		q15,q1
   2570 	teq		r1,r2
   2571 	vld1.32		{q13},[r3]!
   2572 	vadd.i32	q12,q12,q8
   2573 	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9
   2574 	vmov		q2,q0
   2575 	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
   2576 	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
   2577 	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11
   2578 	vld1.32		{q12},[r3]!
   2579 	vadd.i32	q13,q13,q9
   2580 	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10
   2581 	vmov		q2,q0
   2582 	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
   2583 	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
   2584 	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8
   2585 	vld1.32		{q13},[r3]!
   2586 	vadd.i32	q12,q12,q10
   2587 	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11
   2588 	vmov		q2,q0
   2589 	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
   2590 	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
   2591 	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9
   2592 	vld1.32		{q12},[r3]!
   2593 	vadd.i32	q13,q13,q11
   2594 	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8
   2595 	vmov		q2,q0
   2596 	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
   2597 	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
   2598 	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10
   2599 	vld1.32		{q13},[r3]!
   2600 	vadd.i32	q12,q12,q8
   2601 	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9
   2602 	vmov		q2,q0
   2603 	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
   2604 	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
   2605 	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11
   2606 	vld1.32		{q12},[r3]!
   2607 	vadd.i32	q13,q13,q9
   2608 	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10
   2609 	vmov		q2,q0
   2610 	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
   2611 	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
   2612 	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8
   2613 	vld1.32		{q13},[r3]!
   2614 	vadd.i32	q12,q12,q10
   2615 	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11
   2616 	vmov		q2,q0
   2617 	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
   2618 	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
   2619 	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9
   2620 	vld1.32		{q12},[r3]!
   2621 	vadd.i32	q13,q13,q11
   2622 	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8
   2623 	vmov		q2,q0
   2624 	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
   2625 	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
   2626 	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10
   2627 	vld1.32		{q13},[r3]!
   2628 	vadd.i32	q12,q12,q8
   2629 	.byte	0xe2,0x03,0xfa,0xf3	@ sha256su0 q8,q9
   2630 	vmov		q2,q0
   2631 	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
   2632 	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
   2633 	.byte	0xe6,0x0c,0x64,0xf3	@ sha256su1 q8,q10,q11
   2634 	vld1.32		{q12},[r3]!
   2635 	vadd.i32	q13,q13,q9
   2636 	.byte	0xe4,0x23,0xfa,0xf3	@ sha256su0 q9,q10
   2637 	vmov		q2,q0
   2638 	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
   2639 	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
   2640 	.byte	0xe0,0x2c,0x66,0xf3	@ sha256su1 q9,q11,q8
   2641 	vld1.32		{q13},[r3]!
   2642 	vadd.i32	q12,q12,q10
   2643 	.byte	0xe6,0x43,0xfa,0xf3	@ sha256su0 q10,q11
   2644 	vmov		q2,q0
   2645 	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
   2646 	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
   2647 	.byte	0xe2,0x4c,0x60,0xf3	@ sha256su1 q10,q8,q9
   2648 	vld1.32		{q12},[r3]!
   2649 	vadd.i32	q13,q13,q11
   2650 	.byte	0xe0,0x63,0xfa,0xf3	@ sha256su0 q11,q8
   2651 	vmov		q2,q0
   2652 	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
   2653 	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
   2654 	.byte	0xe4,0x6c,0x62,0xf3	@ sha256su1 q11,q9,q10
   2655 	vld1.32		{q13},[r3]!
   2656 	vadd.i32	q12,q12,q8
   2657 	vmov		q2,q0
   2658 	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
   2659 	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
   2660 
   2661 	vld1.32		{q12},[r3]!
   2662 	vadd.i32	q13,q13,q9
   2663 	vmov		q2,q0
   2664 	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
   2665 	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
   2666 
   2667 	vld1.32		{q13},[r3]
   2668 	vadd.i32	q12,q12,q10
   2669 	sub		r3,r3,#256-16	@ rewind
   2670 	vmov		q2,q0
   2671 	.byte	0x68,0x0c,0x02,0xf3	@ sha256h q0,q1,q12
   2672 	.byte	0x68,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q12
   2673 
   2674 	vadd.i32	q13,q13,q11
   2675 	vmov		q2,q0
   2676 	.byte	0x6a,0x0c,0x02,0xf3	@ sha256h q0,q1,q13
   2677 	.byte	0x6a,0x2c,0x14,0xf3	@ sha256h2 q1,q2,q13
   2678 
   2679 	vadd.i32	q0,q0,q14
   2680 	vadd.i32	q1,q1,q15
   2681 	bne		.Loop_v8
   2682 
   2683 	vst1.32		{q0,q1},[r0]
   2684 
   2685 	bx	lr		@ bx lr
   2686 .size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
   2687 #endif
   2688 .asciz  "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro (at) openssl.org>"
   2689 .align	2
   2690 .comm   OPENSSL_armcap_P,4,4
   2691