Home | History | Annotate | Download | only in fipsmodule
      1 // This file is generated from a similarly-named Perl script in the BoringSSL
      2 // source tree. Do not edit by hand.
      3 
      4 #if defined(__has_feature)
      5 #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
      6 #define OPENSSL_NO_ASM
      7 #endif
      8 #endif
      9 
     10 #if !defined(OPENSSL_NO_ASM)
     11 #if defined(__aarch64__)
     12 #if defined(BORINGSSL_PREFIX)
     13 #include <boringssl_prefix_symbols_asm.h>
     14 #endif
     15 // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
     16 //
     17 // Licensed under the OpenSSL license (the "License").  You may not use
     18 // this file except in compliance with the License.  You can obtain a copy
     19 // in the file LICENSE in the source distribution or at
     20 // https://www.openssl.org/source/license.html
     21 
     22 // ====================================================================
     23 // Written by Andy Polyakov <appro (at) openssl.org> for the OpenSSL
     24 // project. The module is, however, dual licensed under OpenSSL and
     25 // CRYPTOGAMS licenses depending on where you obtain it. For further
     26 // details see http://www.openssl.org/~appro/cryptogams/.
     27 //
     28 // Permission to use under GPLv2 terms is granted.
     29 // ====================================================================
     30 //
     31 // SHA256/512 for ARMv8.
     32 //
     33 // Performance in cycles per processed byte and improvement coefficient
     34 // over code generated with "default" compiler:
     35 //
     36 //		SHA256-hw	SHA256(*)	SHA512
     37 // Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
     38 // Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
     39 // Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
     40 // Denver	2.01		10.5 (+26%)	6.70 (+8%)
     41 // X-Gene			20.0 (+100%)	12.8 (+300%(***))
     42 // Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
     43 //
     44 // (*)	Software SHA256 results are of lesser relevance, presented
     45 //	mostly for informational purposes.
     46 // (**)	The result is a trade-off: it's possible to improve it by
     47 //	10% (or by 1 cycle per round), but at the cost of 20% loss
     48 //	on Cortex-A53 (or by 4 cycles per round).
     49 // (***)	Super-impressive coefficients over gcc-generated code are
     50 //	indication of some compiler "pathology", most notably code
     51 //	generated with -mgeneral-regs-only is significanty faster
     52 //	and the gap is only 40-90%.
     53 
     54 #ifndef	__KERNEL__
     55 # include <openssl/arm_arch.h>
     56 #endif
     57 
     58 .text
     59 
     60 
     61 .globl	sha512_block_data_order
     62 .hidden	sha512_block_data_order
     63 .type	sha512_block_data_order,%function
     64 .align	6
     65 sha512_block_data_order:
     66 	stp	x29,x30,[sp,#-128]!
     67 	add	x29,sp,#0
     68 
     69 	stp	x19,x20,[sp,#16]
     70 	stp	x21,x22,[sp,#32]
     71 	stp	x23,x24,[sp,#48]
     72 	stp	x25,x26,[sp,#64]
     73 	stp	x27,x28,[sp,#80]
     74 	sub	sp,sp,#4*8
     75 
     76 	ldp	x20,x21,[x0]				// load context
     77 	ldp	x22,x23,[x0,#2*8]
     78 	ldp	x24,x25,[x0,#4*8]
     79 	add	x2,x1,x2,lsl#7	// end of input
     80 	ldp	x26,x27,[x0,#6*8]
     81 	adrp	x30,.LK512
     82 	add	x30,x30,:lo12:.LK512
     83 	stp	x0,x2,[x29,#96]
     84 
     85 .Loop:
     86 	ldp	x3,x4,[x1],#2*8
     87 	ldr	x19,[x30],#8			// *K++
     88 	eor	x28,x21,x22				// magic seed
     89 	str	x1,[x29,#112]
     90 #ifndef	__ARMEB__
     91 	rev	x3,x3			// 0
     92 #endif
     93 	ror	x16,x24,#14
     94 	add	x27,x27,x19			// h+=K[i]
     95 	eor	x6,x24,x24,ror#23
     96 	and	x17,x25,x24
     97 	bic	x19,x26,x24
     98 	add	x27,x27,x3			// h+=X[i]
     99 	orr	x17,x17,x19			// Ch(e,f,g)
    100 	eor	x19,x20,x21			// a^b, b^c in next round
    101 	eor	x16,x16,x6,ror#18	// Sigma1(e)
    102 	ror	x6,x20,#28
    103 	add	x27,x27,x17			// h+=Ch(e,f,g)
    104 	eor	x17,x20,x20,ror#5
    105 	add	x27,x27,x16			// h+=Sigma1(e)
    106 	and	x28,x28,x19			// (b^c)&=(a^b)
    107 	add	x23,x23,x27			// d+=h
    108 	eor	x28,x28,x21			// Maj(a,b,c)
    109 	eor	x17,x6,x17,ror#34	// Sigma0(a)
    110 	add	x27,x27,x28			// h+=Maj(a,b,c)
    111 	ldr	x28,[x30],#8		// *K++, x19 in next round
    112 	//add	x27,x27,x17			// h+=Sigma0(a)
    113 #ifndef	__ARMEB__
    114 	rev	x4,x4			// 1
    115 #endif
    116 	ldp	x5,x6,[x1],#2*8
    117 	add	x27,x27,x17			// h+=Sigma0(a)
    118 	ror	x16,x23,#14
    119 	add	x26,x26,x28			// h+=K[i]
    120 	eor	x7,x23,x23,ror#23
    121 	and	x17,x24,x23
    122 	bic	x28,x25,x23
    123 	add	x26,x26,x4			// h+=X[i]
    124 	orr	x17,x17,x28			// Ch(e,f,g)
    125 	eor	x28,x27,x20			// a^b, b^c in next round
    126 	eor	x16,x16,x7,ror#18	// Sigma1(e)
    127 	ror	x7,x27,#28
    128 	add	x26,x26,x17			// h+=Ch(e,f,g)
    129 	eor	x17,x27,x27,ror#5
    130 	add	x26,x26,x16			// h+=Sigma1(e)
    131 	and	x19,x19,x28			// (b^c)&=(a^b)
    132 	add	x22,x22,x26			// d+=h
    133 	eor	x19,x19,x20			// Maj(a,b,c)
    134 	eor	x17,x7,x17,ror#34	// Sigma0(a)
    135 	add	x26,x26,x19			// h+=Maj(a,b,c)
    136 	ldr	x19,[x30],#8		// *K++, x28 in next round
    137 	//add	x26,x26,x17			// h+=Sigma0(a)
    138 #ifndef	__ARMEB__
    139 	rev	x5,x5			// 2
    140 #endif
    141 	add	x26,x26,x17			// h+=Sigma0(a)
    142 	ror	x16,x22,#14
    143 	add	x25,x25,x19			// h+=K[i]
    144 	eor	x8,x22,x22,ror#23
    145 	and	x17,x23,x22
    146 	bic	x19,x24,x22
    147 	add	x25,x25,x5			// h+=X[i]
    148 	orr	x17,x17,x19			// Ch(e,f,g)
    149 	eor	x19,x26,x27			// a^b, b^c in next round
    150 	eor	x16,x16,x8,ror#18	// Sigma1(e)
    151 	ror	x8,x26,#28
    152 	add	x25,x25,x17			// h+=Ch(e,f,g)
    153 	eor	x17,x26,x26,ror#5
    154 	add	x25,x25,x16			// h+=Sigma1(e)
    155 	and	x28,x28,x19			// (b^c)&=(a^b)
    156 	add	x21,x21,x25			// d+=h
    157 	eor	x28,x28,x27			// Maj(a,b,c)
    158 	eor	x17,x8,x17,ror#34	// Sigma0(a)
    159 	add	x25,x25,x28			// h+=Maj(a,b,c)
    160 	ldr	x28,[x30],#8		// *K++, x19 in next round
    161 	//add	x25,x25,x17			// h+=Sigma0(a)
    162 #ifndef	__ARMEB__
    163 	rev	x6,x6			// 3
    164 #endif
    165 	ldp	x7,x8,[x1],#2*8
    166 	add	x25,x25,x17			// h+=Sigma0(a)
    167 	ror	x16,x21,#14
    168 	add	x24,x24,x28			// h+=K[i]
    169 	eor	x9,x21,x21,ror#23
    170 	and	x17,x22,x21
    171 	bic	x28,x23,x21
    172 	add	x24,x24,x6			// h+=X[i]
    173 	orr	x17,x17,x28			// Ch(e,f,g)
    174 	eor	x28,x25,x26			// a^b, b^c in next round
    175 	eor	x16,x16,x9,ror#18	// Sigma1(e)
    176 	ror	x9,x25,#28
    177 	add	x24,x24,x17			// h+=Ch(e,f,g)
    178 	eor	x17,x25,x25,ror#5
    179 	add	x24,x24,x16			// h+=Sigma1(e)
    180 	and	x19,x19,x28			// (b^c)&=(a^b)
    181 	add	x20,x20,x24			// d+=h
    182 	eor	x19,x19,x26			// Maj(a,b,c)
    183 	eor	x17,x9,x17,ror#34	// Sigma0(a)
    184 	add	x24,x24,x19			// h+=Maj(a,b,c)
    185 	ldr	x19,[x30],#8		// *K++, x28 in next round
    186 	//add	x24,x24,x17			// h+=Sigma0(a)
    187 #ifndef	__ARMEB__
    188 	rev	x7,x7			// 4
    189 #endif
    190 	add	x24,x24,x17			// h+=Sigma0(a)
    191 	ror	x16,x20,#14
    192 	add	x23,x23,x19			// h+=K[i]
    193 	eor	x10,x20,x20,ror#23
    194 	and	x17,x21,x20
    195 	bic	x19,x22,x20
    196 	add	x23,x23,x7			// h+=X[i]
    197 	orr	x17,x17,x19			// Ch(e,f,g)
    198 	eor	x19,x24,x25			// a^b, b^c in next round
    199 	eor	x16,x16,x10,ror#18	// Sigma1(e)
    200 	ror	x10,x24,#28
    201 	add	x23,x23,x17			// h+=Ch(e,f,g)
    202 	eor	x17,x24,x24,ror#5
    203 	add	x23,x23,x16			// h+=Sigma1(e)
    204 	and	x28,x28,x19			// (b^c)&=(a^b)
    205 	add	x27,x27,x23			// d+=h
    206 	eor	x28,x28,x25			// Maj(a,b,c)
    207 	eor	x17,x10,x17,ror#34	// Sigma0(a)
    208 	add	x23,x23,x28			// h+=Maj(a,b,c)
    209 	ldr	x28,[x30],#8		// *K++, x19 in next round
    210 	//add	x23,x23,x17			// h+=Sigma0(a)
    211 #ifndef	__ARMEB__
    212 	rev	x8,x8			// 5
    213 #endif
    214 	ldp	x9,x10,[x1],#2*8
    215 	add	x23,x23,x17			// h+=Sigma0(a)
    216 	ror	x16,x27,#14
    217 	add	x22,x22,x28			// h+=K[i]
    218 	eor	x11,x27,x27,ror#23
    219 	and	x17,x20,x27
    220 	bic	x28,x21,x27
    221 	add	x22,x22,x8			// h+=X[i]
    222 	orr	x17,x17,x28			// Ch(e,f,g)
    223 	eor	x28,x23,x24			// a^b, b^c in next round
    224 	eor	x16,x16,x11,ror#18	// Sigma1(e)
    225 	ror	x11,x23,#28
    226 	add	x22,x22,x17			// h+=Ch(e,f,g)
    227 	eor	x17,x23,x23,ror#5
    228 	add	x22,x22,x16			// h+=Sigma1(e)
    229 	and	x19,x19,x28			// (b^c)&=(a^b)
    230 	add	x26,x26,x22			// d+=h
    231 	eor	x19,x19,x24			// Maj(a,b,c)
    232 	eor	x17,x11,x17,ror#34	// Sigma0(a)
    233 	add	x22,x22,x19			// h+=Maj(a,b,c)
    234 	ldr	x19,[x30],#8		// *K++, x28 in next round
    235 	//add	x22,x22,x17			// h+=Sigma0(a)
    236 #ifndef	__ARMEB__
    237 	rev	x9,x9			// 6
    238 #endif
    239 	add	x22,x22,x17			// h+=Sigma0(a)
    240 	ror	x16,x26,#14
    241 	add	x21,x21,x19			// h+=K[i]
    242 	eor	x12,x26,x26,ror#23
    243 	and	x17,x27,x26
    244 	bic	x19,x20,x26
    245 	add	x21,x21,x9			// h+=X[i]
    246 	orr	x17,x17,x19			// Ch(e,f,g)
    247 	eor	x19,x22,x23			// a^b, b^c in next round
    248 	eor	x16,x16,x12,ror#18	// Sigma1(e)
    249 	ror	x12,x22,#28
    250 	add	x21,x21,x17			// h+=Ch(e,f,g)
    251 	eor	x17,x22,x22,ror#5
    252 	add	x21,x21,x16			// h+=Sigma1(e)
    253 	and	x28,x28,x19			// (b^c)&=(a^b)
    254 	add	x25,x25,x21			// d+=h
    255 	eor	x28,x28,x23			// Maj(a,b,c)
    256 	eor	x17,x12,x17,ror#34	// Sigma0(a)
    257 	add	x21,x21,x28			// h+=Maj(a,b,c)
    258 	ldr	x28,[x30],#8		// *K++, x19 in next round
    259 	//add	x21,x21,x17			// h+=Sigma0(a)
    260 #ifndef	__ARMEB__
    261 	rev	x10,x10			// 7
    262 #endif
    263 	ldp	x11,x12,[x1],#2*8
    264 	add	x21,x21,x17			// h+=Sigma0(a)
    265 	ror	x16,x25,#14
    266 	add	x20,x20,x28			// h+=K[i]
    267 	eor	x13,x25,x25,ror#23
    268 	and	x17,x26,x25
    269 	bic	x28,x27,x25
    270 	add	x20,x20,x10			// h+=X[i]
    271 	orr	x17,x17,x28			// Ch(e,f,g)
    272 	eor	x28,x21,x22			// a^b, b^c in next round
    273 	eor	x16,x16,x13,ror#18	// Sigma1(e)
    274 	ror	x13,x21,#28
    275 	add	x20,x20,x17			// h+=Ch(e,f,g)
    276 	eor	x17,x21,x21,ror#5
    277 	add	x20,x20,x16			// h+=Sigma1(e)
    278 	and	x19,x19,x28			// (b^c)&=(a^b)
    279 	add	x24,x24,x20			// d+=h
    280 	eor	x19,x19,x22			// Maj(a,b,c)
    281 	eor	x17,x13,x17,ror#34	// Sigma0(a)
    282 	add	x20,x20,x19			// h+=Maj(a,b,c)
    283 	ldr	x19,[x30],#8		// *K++, x28 in next round
    284 	//add	x20,x20,x17			// h+=Sigma0(a)
    285 #ifndef	__ARMEB__
    286 	rev	x11,x11			// 8
    287 #endif
    288 	add	x20,x20,x17			// h+=Sigma0(a)
    289 	ror	x16,x24,#14
    290 	add	x27,x27,x19			// h+=K[i]
    291 	eor	x14,x24,x24,ror#23
    292 	and	x17,x25,x24
    293 	bic	x19,x26,x24
    294 	add	x27,x27,x11			// h+=X[i]
    295 	orr	x17,x17,x19			// Ch(e,f,g)
    296 	eor	x19,x20,x21			// a^b, b^c in next round
    297 	eor	x16,x16,x14,ror#18	// Sigma1(e)
    298 	ror	x14,x20,#28
    299 	add	x27,x27,x17			// h+=Ch(e,f,g)
    300 	eor	x17,x20,x20,ror#5
    301 	add	x27,x27,x16			// h+=Sigma1(e)
    302 	and	x28,x28,x19			// (b^c)&=(a^b)
    303 	add	x23,x23,x27			// d+=h
    304 	eor	x28,x28,x21			// Maj(a,b,c)
    305 	eor	x17,x14,x17,ror#34	// Sigma0(a)
    306 	add	x27,x27,x28			// h+=Maj(a,b,c)
    307 	ldr	x28,[x30],#8		// *K++, x19 in next round
    308 	//add	x27,x27,x17			// h+=Sigma0(a)
    309 #ifndef	__ARMEB__
    310 	rev	x12,x12			// 9
    311 #endif
    312 	ldp	x13,x14,[x1],#2*8
    313 	add	x27,x27,x17			// h+=Sigma0(a)
    314 	ror	x16,x23,#14
    315 	add	x26,x26,x28			// h+=K[i]
    316 	eor	x15,x23,x23,ror#23
    317 	and	x17,x24,x23
    318 	bic	x28,x25,x23
    319 	add	x26,x26,x12			// h+=X[i]
    320 	orr	x17,x17,x28			// Ch(e,f,g)
    321 	eor	x28,x27,x20			// a^b, b^c in next round
    322 	eor	x16,x16,x15,ror#18	// Sigma1(e)
    323 	ror	x15,x27,#28
    324 	add	x26,x26,x17			// h+=Ch(e,f,g)
    325 	eor	x17,x27,x27,ror#5
    326 	add	x26,x26,x16			// h+=Sigma1(e)
    327 	and	x19,x19,x28			// (b^c)&=(a^b)
    328 	add	x22,x22,x26			// d+=h
    329 	eor	x19,x19,x20			// Maj(a,b,c)
    330 	eor	x17,x15,x17,ror#34	// Sigma0(a)
    331 	add	x26,x26,x19			// h+=Maj(a,b,c)
    332 	ldr	x19,[x30],#8		// *K++, x28 in next round
    333 	//add	x26,x26,x17			// h+=Sigma0(a)
    334 #ifndef	__ARMEB__
    335 	rev	x13,x13			// 10
    336 #endif
    337 	add	x26,x26,x17			// h+=Sigma0(a)
    338 	ror	x16,x22,#14
    339 	add	x25,x25,x19			// h+=K[i]
    340 	eor	x0,x22,x22,ror#23
    341 	and	x17,x23,x22
    342 	bic	x19,x24,x22
    343 	add	x25,x25,x13			// h+=X[i]
    344 	orr	x17,x17,x19			// Ch(e,f,g)
    345 	eor	x19,x26,x27			// a^b, b^c in next round
    346 	eor	x16,x16,x0,ror#18	// Sigma1(e)
    347 	ror	x0,x26,#28
    348 	add	x25,x25,x17			// h+=Ch(e,f,g)
    349 	eor	x17,x26,x26,ror#5
    350 	add	x25,x25,x16			// h+=Sigma1(e)
    351 	and	x28,x28,x19			// (b^c)&=(a^b)
    352 	add	x21,x21,x25			// d+=h
    353 	eor	x28,x28,x27			// Maj(a,b,c)
    354 	eor	x17,x0,x17,ror#34	// Sigma0(a)
    355 	add	x25,x25,x28			// h+=Maj(a,b,c)
    356 	ldr	x28,[x30],#8		// *K++, x19 in next round
    357 	//add	x25,x25,x17			// h+=Sigma0(a)
    358 #ifndef	__ARMEB__
    359 	rev	x14,x14			// 11
    360 #endif
    361 	ldp	x15,x0,[x1],#2*8
    362 	add	x25,x25,x17			// h+=Sigma0(a)
    363 	str	x6,[sp,#24]
    364 	ror	x16,x21,#14
    365 	add	x24,x24,x28			// h+=K[i]
    366 	eor	x6,x21,x21,ror#23
    367 	and	x17,x22,x21
    368 	bic	x28,x23,x21
    369 	add	x24,x24,x14			// h+=X[i]
    370 	orr	x17,x17,x28			// Ch(e,f,g)
    371 	eor	x28,x25,x26			// a^b, b^c in next round
    372 	eor	x16,x16,x6,ror#18	// Sigma1(e)
    373 	ror	x6,x25,#28
    374 	add	x24,x24,x17			// h+=Ch(e,f,g)
    375 	eor	x17,x25,x25,ror#5
    376 	add	x24,x24,x16			// h+=Sigma1(e)
    377 	and	x19,x19,x28			// (b^c)&=(a^b)
    378 	add	x20,x20,x24			// d+=h
    379 	eor	x19,x19,x26			// Maj(a,b,c)
    380 	eor	x17,x6,x17,ror#34	// Sigma0(a)
    381 	add	x24,x24,x19			// h+=Maj(a,b,c)
    382 	ldr	x19,[x30],#8		// *K++, x28 in next round
    383 	//add	x24,x24,x17			// h+=Sigma0(a)
    384 #ifndef	__ARMEB__
    385 	rev	x15,x15			// 12
    386 #endif
    387 	add	x24,x24,x17			// h+=Sigma0(a)
    388 	str	x7,[sp,#0]
    389 	ror	x16,x20,#14
    390 	add	x23,x23,x19			// h+=K[i]
    391 	eor	x7,x20,x20,ror#23
    392 	and	x17,x21,x20
    393 	bic	x19,x22,x20
    394 	add	x23,x23,x15			// h+=X[i]
    395 	orr	x17,x17,x19			// Ch(e,f,g)
    396 	eor	x19,x24,x25			// a^b, b^c in next round
    397 	eor	x16,x16,x7,ror#18	// Sigma1(e)
    398 	ror	x7,x24,#28
    399 	add	x23,x23,x17			// h+=Ch(e,f,g)
    400 	eor	x17,x24,x24,ror#5
    401 	add	x23,x23,x16			// h+=Sigma1(e)
    402 	and	x28,x28,x19			// (b^c)&=(a^b)
    403 	add	x27,x27,x23			// d+=h
    404 	eor	x28,x28,x25			// Maj(a,b,c)
    405 	eor	x17,x7,x17,ror#34	// Sigma0(a)
    406 	add	x23,x23,x28			// h+=Maj(a,b,c)
    407 	ldr	x28,[x30],#8		// *K++, x19 in next round
    408 	//add	x23,x23,x17			// h+=Sigma0(a)
    409 #ifndef	__ARMEB__
    410 	rev	x0,x0			// 13
    411 #endif
    412 	ldp	x1,x2,[x1]
    413 	add	x23,x23,x17			// h+=Sigma0(a)
    414 	str	x8,[sp,#8]
    415 	ror	x16,x27,#14
    416 	add	x22,x22,x28			// h+=K[i]
    417 	eor	x8,x27,x27,ror#23
    418 	and	x17,x20,x27
    419 	bic	x28,x21,x27
    420 	add	x22,x22,x0			// h+=X[i]
    421 	orr	x17,x17,x28			// Ch(e,f,g)
    422 	eor	x28,x23,x24			// a^b, b^c in next round
    423 	eor	x16,x16,x8,ror#18	// Sigma1(e)
    424 	ror	x8,x23,#28
    425 	add	x22,x22,x17			// h+=Ch(e,f,g)
    426 	eor	x17,x23,x23,ror#5
    427 	add	x22,x22,x16			// h+=Sigma1(e)
    428 	and	x19,x19,x28			// (b^c)&=(a^b)
    429 	add	x26,x26,x22			// d+=h
    430 	eor	x19,x19,x24			// Maj(a,b,c)
    431 	eor	x17,x8,x17,ror#34	// Sigma0(a)
    432 	add	x22,x22,x19			// h+=Maj(a,b,c)
    433 	ldr	x19,[x30],#8		// *K++, x28 in next round
    434 	//add	x22,x22,x17			// h+=Sigma0(a)
    435 #ifndef	__ARMEB__
    436 	rev	x1,x1			// 14
    437 #endif
    438 	ldr	x6,[sp,#24]
    439 	add	x22,x22,x17			// h+=Sigma0(a)
    440 	str	x9,[sp,#16]
    441 	ror	x16,x26,#14
    442 	add	x21,x21,x19			// h+=K[i]
    443 	eor	x9,x26,x26,ror#23
    444 	and	x17,x27,x26
    445 	bic	x19,x20,x26
    446 	add	x21,x21,x1			// h+=X[i]
    447 	orr	x17,x17,x19			// Ch(e,f,g)
    448 	eor	x19,x22,x23			// a^b, b^c in next round
    449 	eor	x16,x16,x9,ror#18	// Sigma1(e)
    450 	ror	x9,x22,#28
    451 	add	x21,x21,x17			// h+=Ch(e,f,g)
    452 	eor	x17,x22,x22,ror#5
    453 	add	x21,x21,x16			// h+=Sigma1(e)
    454 	and	x28,x28,x19			// (b^c)&=(a^b)
    455 	add	x25,x25,x21			// d+=h
    456 	eor	x28,x28,x23			// Maj(a,b,c)
    457 	eor	x17,x9,x17,ror#34	// Sigma0(a)
    458 	add	x21,x21,x28			// h+=Maj(a,b,c)
    459 	ldr	x28,[x30],#8		// *K++, x19 in next round
    460 	//add	x21,x21,x17			// h+=Sigma0(a)
    461 #ifndef	__ARMEB__
    462 	rev	x2,x2			// 15
    463 #endif
    464 	ldr	x7,[sp,#0]
    465 	add	x21,x21,x17			// h+=Sigma0(a)
    466 	str	x10,[sp,#24]
    467 	ror	x16,x25,#14
    468 	add	x20,x20,x28			// h+=K[i]
    469 	ror	x9,x4,#1
    470 	and	x17,x26,x25
    471 	ror	x8,x1,#19
    472 	bic	x28,x27,x25
    473 	ror	x10,x21,#28
    474 	add	x20,x20,x2			// h+=X[i]
    475 	eor	x16,x16,x25,ror#18
    476 	eor	x9,x9,x4,ror#8
    477 	orr	x17,x17,x28			// Ch(e,f,g)
    478 	eor	x28,x21,x22			// a^b, b^c in next round
    479 	eor	x16,x16,x25,ror#41	// Sigma1(e)
    480 	eor	x10,x10,x21,ror#34
    481 	add	x20,x20,x17			// h+=Ch(e,f,g)
    482 	and	x19,x19,x28			// (b^c)&=(a^b)
    483 	eor	x8,x8,x1,ror#61
    484 	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
    485 	add	x20,x20,x16			// h+=Sigma1(e)
    486 	eor	x19,x19,x22			// Maj(a,b,c)
    487 	eor	x17,x10,x21,ror#39	// Sigma0(a)
    488 	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
    489 	add	x3,x3,x12
    490 	add	x24,x24,x20			// d+=h
    491 	add	x20,x20,x19			// h+=Maj(a,b,c)
    492 	ldr	x19,[x30],#8		// *K++, x28 in next round
    493 	add	x3,x3,x9
    494 	add	x20,x20,x17			// h+=Sigma0(a)
    495 	add	x3,x3,x8
    496 .Loop_16_xx:
    497 	ldr	x8,[sp,#8]
    498 	str	x11,[sp,#0]
    499 	ror	x16,x24,#14
    500 	add	x27,x27,x19			// h+=K[i]
    501 	ror	x10,x5,#1
    502 	and	x17,x25,x24
    503 	ror	x9,x2,#19
    504 	bic	x19,x26,x24
    505 	ror	x11,x20,#28
    506 	add	x27,x27,x3			// h+=X[i]
    507 	eor	x16,x16,x24,ror#18
    508 	eor	x10,x10,x5,ror#8
    509 	orr	x17,x17,x19			// Ch(e,f,g)
    510 	eor	x19,x20,x21			// a^b, b^c in next round
    511 	eor	x16,x16,x24,ror#41	// Sigma1(e)
    512 	eor	x11,x11,x20,ror#34
    513 	add	x27,x27,x17			// h+=Ch(e,f,g)
    514 	and	x28,x28,x19			// (b^c)&=(a^b)
    515 	eor	x9,x9,x2,ror#61
    516 	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
    517 	add	x27,x27,x16			// h+=Sigma1(e)
    518 	eor	x28,x28,x21			// Maj(a,b,c)
    519 	eor	x17,x11,x20,ror#39	// Sigma0(a)
    520 	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
    521 	add	x4,x4,x13
    522 	add	x23,x23,x27			// d+=h
    523 	add	x27,x27,x28			// h+=Maj(a,b,c)
    524 	ldr	x28,[x30],#8		// *K++, x19 in next round
    525 	add	x4,x4,x10
    526 	add	x27,x27,x17			// h+=Sigma0(a)
    527 	add	x4,x4,x9
    528 	ldr	x9,[sp,#16]
    529 	str	x12,[sp,#8]
    530 	ror	x16,x23,#14
    531 	add	x26,x26,x28			// h+=K[i]
    532 	ror	x11,x6,#1
    533 	and	x17,x24,x23
    534 	ror	x10,x3,#19
    535 	bic	x28,x25,x23
    536 	ror	x12,x27,#28
    537 	add	x26,x26,x4			// h+=X[i]
    538 	eor	x16,x16,x23,ror#18
    539 	eor	x11,x11,x6,ror#8
    540 	orr	x17,x17,x28			// Ch(e,f,g)
    541 	eor	x28,x27,x20			// a^b, b^c in next round
    542 	eor	x16,x16,x23,ror#41	// Sigma1(e)
    543 	eor	x12,x12,x27,ror#34
    544 	add	x26,x26,x17			// h+=Ch(e,f,g)
    545 	and	x19,x19,x28			// (b^c)&=(a^b)
    546 	eor	x10,x10,x3,ror#61
    547 	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
    548 	add	x26,x26,x16			// h+=Sigma1(e)
    549 	eor	x19,x19,x20			// Maj(a,b,c)
    550 	eor	x17,x12,x27,ror#39	// Sigma0(a)
    551 	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
    552 	add	x5,x5,x14
    553 	add	x22,x22,x26			// d+=h
    554 	add	x26,x26,x19			// h+=Maj(a,b,c)
    555 	ldr	x19,[x30],#8		// *K++, x28 in next round
    556 	add	x5,x5,x11
    557 	add	x26,x26,x17			// h+=Sigma0(a)
    558 	add	x5,x5,x10
    559 	ldr	x10,[sp,#24]
    560 	str	x13,[sp,#16]
    561 	ror	x16,x22,#14
    562 	add	x25,x25,x19			// h+=K[i]
    563 	ror	x12,x7,#1
    564 	and	x17,x23,x22
    565 	ror	x11,x4,#19
    566 	bic	x19,x24,x22
    567 	ror	x13,x26,#28
    568 	add	x25,x25,x5			// h+=X[i]
    569 	eor	x16,x16,x22,ror#18
    570 	eor	x12,x12,x7,ror#8
    571 	orr	x17,x17,x19			// Ch(e,f,g)
    572 	eor	x19,x26,x27			// a^b, b^c in next round
    573 	eor	x16,x16,x22,ror#41	// Sigma1(e)
    574 	eor	x13,x13,x26,ror#34
    575 	add	x25,x25,x17			// h+=Ch(e,f,g)
    576 	and	x28,x28,x19			// (b^c)&=(a^b)
    577 	eor	x11,x11,x4,ror#61
    578 	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
    579 	add	x25,x25,x16			// h+=Sigma1(e)
    580 	eor	x28,x28,x27			// Maj(a,b,c)
    581 	eor	x17,x13,x26,ror#39	// Sigma0(a)
    582 	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
    583 	add	x6,x6,x15
    584 	add	x21,x21,x25			// d+=h
    585 	add	x25,x25,x28			// h+=Maj(a,b,c)
    586 	ldr	x28,[x30],#8		// *K++, x19 in next round
    587 	add	x6,x6,x12
    588 	add	x25,x25,x17			// h+=Sigma0(a)
    589 	add	x6,x6,x11
    590 	ldr	x11,[sp,#0]
    591 	str	x14,[sp,#24]
    592 	ror	x16,x21,#14
    593 	add	x24,x24,x28			// h+=K[i]
    594 	ror	x13,x8,#1
    595 	and	x17,x22,x21
    596 	ror	x12,x5,#19
    597 	bic	x28,x23,x21
    598 	ror	x14,x25,#28
    599 	add	x24,x24,x6			// h+=X[i]
    600 	eor	x16,x16,x21,ror#18
    601 	eor	x13,x13,x8,ror#8
    602 	orr	x17,x17,x28			// Ch(e,f,g)
    603 	eor	x28,x25,x26			// a^b, b^c in next round
    604 	eor	x16,x16,x21,ror#41	// Sigma1(e)
    605 	eor	x14,x14,x25,ror#34
    606 	add	x24,x24,x17			// h+=Ch(e,f,g)
    607 	and	x19,x19,x28			// (b^c)&=(a^b)
    608 	eor	x12,x12,x5,ror#61
    609 	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
    610 	add	x24,x24,x16			// h+=Sigma1(e)
    611 	eor	x19,x19,x26			// Maj(a,b,c)
    612 	eor	x17,x14,x25,ror#39	// Sigma0(a)
    613 	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
    614 	add	x7,x7,x0
    615 	add	x20,x20,x24			// d+=h
    616 	add	x24,x24,x19			// h+=Maj(a,b,c)
    617 	ldr	x19,[x30],#8		// *K++, x28 in next round
    618 	add	x7,x7,x13
    619 	add	x24,x24,x17			// h+=Sigma0(a)
    620 	add	x7,x7,x12
    621 	ldr	x12,[sp,#8]
    622 	str	x15,[sp,#0]
    623 	ror	x16,x20,#14
    624 	add	x23,x23,x19			// h+=K[i]
    625 	ror	x14,x9,#1
    626 	and	x17,x21,x20
    627 	ror	x13,x6,#19
    628 	bic	x19,x22,x20
    629 	ror	x15,x24,#28
    630 	add	x23,x23,x7			// h+=X[i]
    631 	eor	x16,x16,x20,ror#18
    632 	eor	x14,x14,x9,ror#8
    633 	orr	x17,x17,x19			// Ch(e,f,g)
    634 	eor	x19,x24,x25			// a^b, b^c in next round
    635 	eor	x16,x16,x20,ror#41	// Sigma1(e)
    636 	eor	x15,x15,x24,ror#34
    637 	add	x23,x23,x17			// h+=Ch(e,f,g)
    638 	and	x28,x28,x19			// (b^c)&=(a^b)
    639 	eor	x13,x13,x6,ror#61
    640 	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
    641 	add	x23,x23,x16			// h+=Sigma1(e)
    642 	eor	x28,x28,x25			// Maj(a,b,c)
    643 	eor	x17,x15,x24,ror#39	// Sigma0(a)
    644 	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
    645 	add	x8,x8,x1
    646 	add	x27,x27,x23			// d+=h
    647 	add	x23,x23,x28			// h+=Maj(a,b,c)
    648 	ldr	x28,[x30],#8		// *K++, x19 in next round
    649 	add	x8,x8,x14
    650 	add	x23,x23,x17			// h+=Sigma0(a)
    651 	add	x8,x8,x13
    652 	ldr	x13,[sp,#16]
    653 	str	x0,[sp,#8]
    654 	ror	x16,x27,#14
    655 	add	x22,x22,x28			// h+=K[i]
    656 	ror	x15,x10,#1
    657 	and	x17,x20,x27
    658 	ror	x14,x7,#19
    659 	bic	x28,x21,x27
    660 	ror	x0,x23,#28
    661 	add	x22,x22,x8			// h+=X[i]
    662 	eor	x16,x16,x27,ror#18
    663 	eor	x15,x15,x10,ror#8
    664 	orr	x17,x17,x28			// Ch(e,f,g)
    665 	eor	x28,x23,x24			// a^b, b^c in next round
    666 	eor	x16,x16,x27,ror#41	// Sigma1(e)
    667 	eor	x0,x0,x23,ror#34
    668 	add	x22,x22,x17			// h+=Ch(e,f,g)
    669 	and	x19,x19,x28			// (b^c)&=(a^b)
    670 	eor	x14,x14,x7,ror#61
    671 	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
    672 	add	x22,x22,x16			// h+=Sigma1(e)
    673 	eor	x19,x19,x24			// Maj(a,b,c)
    674 	eor	x17,x0,x23,ror#39	// Sigma0(a)
    675 	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
    676 	add	x9,x9,x2
    677 	add	x26,x26,x22			// d+=h
    678 	add	x22,x22,x19			// h+=Maj(a,b,c)
    679 	ldr	x19,[x30],#8		// *K++, x28 in next round
    680 	add	x9,x9,x15
    681 	add	x22,x22,x17			// h+=Sigma0(a)
    682 	add	x9,x9,x14
    683 	ldr	x14,[sp,#24]
    684 	str	x1,[sp,#16]
    685 	ror	x16,x26,#14
    686 	add	x21,x21,x19			// h+=K[i]
    687 	ror	x0,x11,#1
    688 	and	x17,x27,x26
    689 	ror	x15,x8,#19
    690 	bic	x19,x20,x26
    691 	ror	x1,x22,#28
    692 	add	x21,x21,x9			// h+=X[i]
    693 	eor	x16,x16,x26,ror#18
    694 	eor	x0,x0,x11,ror#8
    695 	orr	x17,x17,x19			// Ch(e,f,g)
    696 	eor	x19,x22,x23			// a^b, b^c in next round
    697 	eor	x16,x16,x26,ror#41	// Sigma1(e)
    698 	eor	x1,x1,x22,ror#34
    699 	add	x21,x21,x17			// h+=Ch(e,f,g)
    700 	and	x28,x28,x19			// (b^c)&=(a^b)
    701 	eor	x15,x15,x8,ror#61
    702 	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
    703 	add	x21,x21,x16			// h+=Sigma1(e)
    704 	eor	x28,x28,x23			// Maj(a,b,c)
    705 	eor	x17,x1,x22,ror#39	// Sigma0(a)
    706 	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
    707 	add	x10,x10,x3
    708 	add	x25,x25,x21			// d+=h
    709 	add	x21,x21,x28			// h+=Maj(a,b,c)
    710 	ldr	x28,[x30],#8		// *K++, x19 in next round
    711 	add	x10,x10,x0
    712 	add	x21,x21,x17			// h+=Sigma0(a)
    713 	add	x10,x10,x15
    714 	ldr	x15,[sp,#0]
    715 	str	x2,[sp,#24]
    716 	ror	x16,x25,#14
    717 	add	x20,x20,x28			// h+=K[i]
    718 	ror	x1,x12,#1
    719 	and	x17,x26,x25
    720 	ror	x0,x9,#19
    721 	bic	x28,x27,x25
    722 	ror	x2,x21,#28
    723 	add	x20,x20,x10			// h+=X[i]
    724 	eor	x16,x16,x25,ror#18
    725 	eor	x1,x1,x12,ror#8
    726 	orr	x17,x17,x28			// Ch(e,f,g)
    727 	eor	x28,x21,x22			// a^b, b^c in next round
    728 	eor	x16,x16,x25,ror#41	// Sigma1(e)
    729 	eor	x2,x2,x21,ror#34
    730 	add	x20,x20,x17			// h+=Ch(e,f,g)
    731 	and	x19,x19,x28			// (b^c)&=(a^b)
    732 	eor	x0,x0,x9,ror#61
    733 	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
    734 	add	x20,x20,x16			// h+=Sigma1(e)
    735 	eor	x19,x19,x22			// Maj(a,b,c)
    736 	eor	x17,x2,x21,ror#39	// Sigma0(a)
    737 	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
    738 	add	x11,x11,x4
    739 	add	x24,x24,x20			// d+=h
    740 	add	x20,x20,x19			// h+=Maj(a,b,c)
    741 	ldr	x19,[x30],#8		// *K++, x28 in next round
    742 	add	x11,x11,x1
    743 	add	x20,x20,x17			// h+=Sigma0(a)
    744 	add	x11,x11,x0
    745 	ldr	x0,[sp,#8]
    746 	str	x3,[sp,#0]
    747 	ror	x16,x24,#14
    748 	add	x27,x27,x19			// h+=K[i]
    749 	ror	x2,x13,#1
    750 	and	x17,x25,x24
    751 	ror	x1,x10,#19
    752 	bic	x19,x26,x24
    753 	ror	x3,x20,#28
    754 	add	x27,x27,x11			// h+=X[i]
    755 	eor	x16,x16,x24,ror#18
    756 	eor	x2,x2,x13,ror#8
    757 	orr	x17,x17,x19			// Ch(e,f,g)
    758 	eor	x19,x20,x21			// a^b, b^c in next round
    759 	eor	x16,x16,x24,ror#41	// Sigma1(e)
    760 	eor	x3,x3,x20,ror#34
    761 	add	x27,x27,x17			// h+=Ch(e,f,g)
    762 	and	x28,x28,x19			// (b^c)&=(a^b)
    763 	eor	x1,x1,x10,ror#61
    764 	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
    765 	add	x27,x27,x16			// h+=Sigma1(e)
    766 	eor	x28,x28,x21			// Maj(a,b,c)
    767 	eor	x17,x3,x20,ror#39	// Sigma0(a)
    768 	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
    769 	add	x12,x12,x5
    770 	add	x23,x23,x27			// d+=h
    771 	add	x27,x27,x28			// h+=Maj(a,b,c)
    772 	ldr	x28,[x30],#8		// *K++, x19 in next round
    773 	add	x12,x12,x2
    774 	add	x27,x27,x17			// h+=Sigma0(a)
    775 	add	x12,x12,x1
    776 	ldr	x1,[sp,#16]
    777 	str	x4,[sp,#8]
    778 	ror	x16,x23,#14
    779 	add	x26,x26,x28			// h+=K[i]
    780 	ror	x3,x14,#1
    781 	and	x17,x24,x23
    782 	ror	x2,x11,#19
    783 	bic	x28,x25,x23
    784 	ror	x4,x27,#28
    785 	add	x26,x26,x12			// h+=X[i]
    786 	eor	x16,x16,x23,ror#18
    787 	eor	x3,x3,x14,ror#8
    788 	orr	x17,x17,x28			// Ch(e,f,g)
    789 	eor	x28,x27,x20			// a^b, b^c in next round
    790 	eor	x16,x16,x23,ror#41	// Sigma1(e)
    791 	eor	x4,x4,x27,ror#34
    792 	add	x26,x26,x17			// h+=Ch(e,f,g)
    793 	and	x19,x19,x28			// (b^c)&=(a^b)
    794 	eor	x2,x2,x11,ror#61
    795 	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
    796 	add	x26,x26,x16			// h+=Sigma1(e)
    797 	eor	x19,x19,x20			// Maj(a,b,c)
    798 	eor	x17,x4,x27,ror#39	// Sigma0(a)
    799 	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
    800 	add	x13,x13,x6
    801 	add	x22,x22,x26			// d+=h
    802 	add	x26,x26,x19			// h+=Maj(a,b,c)
    803 	ldr	x19,[x30],#8		// *K++, x28 in next round
    804 	add	x13,x13,x3
    805 	add	x26,x26,x17			// h+=Sigma0(a)
    806 	add	x13,x13,x2
    807 	ldr	x2,[sp,#24]
    808 	str	x5,[sp,#16]
    809 	ror	x16,x22,#14
    810 	add	x25,x25,x19			// h+=K[i]
    811 	ror	x4,x15,#1
    812 	and	x17,x23,x22
    813 	ror	x3,x12,#19
    814 	bic	x19,x24,x22
    815 	ror	x5,x26,#28
    816 	add	x25,x25,x13			// h+=X[i]
    817 	eor	x16,x16,x22,ror#18
    818 	eor	x4,x4,x15,ror#8
    819 	orr	x17,x17,x19			// Ch(e,f,g)
    820 	eor	x19,x26,x27			// a^b, b^c in next round
    821 	eor	x16,x16,x22,ror#41	// Sigma1(e)
    822 	eor	x5,x5,x26,ror#34
    823 	add	x25,x25,x17			// h+=Ch(e,f,g)
    824 	and	x28,x28,x19			// (b^c)&=(a^b)
    825 	eor	x3,x3,x12,ror#61
    826 	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
    827 	add	x25,x25,x16			// h+=Sigma1(e)
    828 	eor	x28,x28,x27			// Maj(a,b,c)
    829 	eor	x17,x5,x26,ror#39	// Sigma0(a)
    830 	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
    831 	add	x14,x14,x7
    832 	add	x21,x21,x25			// d+=h
    833 	add	x25,x25,x28			// h+=Maj(a,b,c)
    834 	ldr	x28,[x30],#8		// *K++, x19 in next round
    835 	add	x14,x14,x4
    836 	add	x25,x25,x17			// h+=Sigma0(a)
    837 	add	x14,x14,x3
    838 	ldr	x3,[sp,#0]
    839 	str	x6,[sp,#24]
    840 	ror	x16,x21,#14
    841 	add	x24,x24,x28			// h+=K[i]
    842 	ror	x5,x0,#1
    843 	and	x17,x22,x21
    844 	ror	x4,x13,#19
    845 	bic	x28,x23,x21
    846 	ror	x6,x25,#28
    847 	add	x24,x24,x14			// h+=X[i]
    848 	eor	x16,x16,x21,ror#18
    849 	eor	x5,x5,x0,ror#8
    850 	orr	x17,x17,x28			// Ch(e,f,g)
    851 	eor	x28,x25,x26			// a^b, b^c in next round
    852 	eor	x16,x16,x21,ror#41	// Sigma1(e)
    853 	eor	x6,x6,x25,ror#34
    854 	add	x24,x24,x17			// h+=Ch(e,f,g)
    855 	and	x19,x19,x28			// (b^c)&=(a^b)
    856 	eor	x4,x4,x13,ror#61
    857 	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
    858 	add	x24,x24,x16			// h+=Sigma1(e)
    859 	eor	x19,x19,x26			// Maj(a,b,c)
    860 	eor	x17,x6,x25,ror#39	// Sigma0(a)
    861 	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
    862 	add	x15,x15,x8
    863 	add	x20,x20,x24			// d+=h
    864 	add	x24,x24,x19			// h+=Maj(a,b,c)
    865 	ldr	x19,[x30],#8		// *K++, x28 in next round
    866 	add	x15,x15,x5
    867 	add	x24,x24,x17			// h+=Sigma0(a)
    868 	add	x15,x15,x4
    869 	ldr	x4,[sp,#8]
    870 	str	x7,[sp,#0]
    871 	ror	x16,x20,#14
    872 	add	x23,x23,x19			// h+=K[i]
    873 	ror	x6,x1,#1
    874 	and	x17,x21,x20
    875 	ror	x5,x14,#19
    876 	bic	x19,x22,x20
    877 	ror	x7,x24,#28
    878 	add	x23,x23,x15			// h+=X[i]
    879 	eor	x16,x16,x20,ror#18
    880 	eor	x6,x6,x1,ror#8
    881 	orr	x17,x17,x19			// Ch(e,f,g)
    882 	eor	x19,x24,x25			// a^b, b^c in next round
    883 	eor	x16,x16,x20,ror#41	// Sigma1(e)
    884 	eor	x7,x7,x24,ror#34
    885 	add	x23,x23,x17			// h+=Ch(e,f,g)
    886 	and	x28,x28,x19			// (b^c)&=(a^b)
    887 	eor	x5,x5,x14,ror#61
    888 	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
    889 	add	x23,x23,x16			// h+=Sigma1(e)
    890 	eor	x28,x28,x25			// Maj(a,b,c)
    891 	eor	x17,x7,x24,ror#39	// Sigma0(a)
    892 	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
    893 	add	x0,x0,x9
    894 	add	x27,x27,x23			// d+=h
    895 	add	x23,x23,x28			// h+=Maj(a,b,c)
    896 	ldr	x28,[x30],#8		// *K++, x19 in next round
    897 	add	x0,x0,x6
    898 	add	x23,x23,x17			// h+=Sigma0(a)
    899 	add	x0,x0,x5
    900 	ldr	x5,[sp,#16]
    901 	str	x8,[sp,#8]
    902 	ror	x16,x27,#14
    903 	add	x22,x22,x28			// h+=K[i]
    904 	ror	x7,x2,#1
    905 	and	x17,x20,x27
    906 	ror	x6,x15,#19
    907 	bic	x28,x21,x27
    908 	ror	x8,x23,#28
    909 	add	x22,x22,x0			// h+=X[i]
    910 	eor	x16,x16,x27,ror#18
    911 	eor	x7,x7,x2,ror#8
    912 	orr	x17,x17,x28			// Ch(e,f,g)
    913 	eor	x28,x23,x24			// a^b, b^c in next round
    914 	eor	x16,x16,x27,ror#41	// Sigma1(e)
    915 	eor	x8,x8,x23,ror#34
    916 	add	x22,x22,x17			// h+=Ch(e,f,g)
    917 	and	x19,x19,x28			// (b^c)&=(a^b)
    918 	eor	x6,x6,x15,ror#61
    919 	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
    920 	add	x22,x22,x16			// h+=Sigma1(e)
    921 	eor	x19,x19,x24			// Maj(a,b,c)
    922 	eor	x17,x8,x23,ror#39	// Sigma0(a)
    923 	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
    924 	add	x1,x1,x10
    925 	add	x26,x26,x22			// d+=h
    926 	add	x22,x22,x19			// h+=Maj(a,b,c)
    927 	ldr	x19,[x30],#8		// *K++, x28 in next round
    928 	add	x1,x1,x7
    929 	add	x22,x22,x17			// h+=Sigma0(a)
    930 	add	x1,x1,x6
    931 	ldr	x6,[sp,#24]
    932 	str	x9,[sp,#16]
    933 	ror	x16,x26,#14
    934 	add	x21,x21,x19			// h+=K[i]
    935 	ror	x8,x3,#1
    936 	and	x17,x27,x26
    937 	ror	x7,x0,#19
    938 	bic	x19,x20,x26
    939 	ror	x9,x22,#28
    940 	add	x21,x21,x1			// h+=X[i]
    941 	eor	x16,x16,x26,ror#18
    942 	eor	x8,x8,x3,ror#8
    943 	orr	x17,x17,x19			// Ch(e,f,g)
    944 	eor	x19,x22,x23			// a^b, b^c in next round
    945 	eor	x16,x16,x26,ror#41	// Sigma1(e)
    946 	eor	x9,x9,x22,ror#34
    947 	add	x21,x21,x17			// h+=Ch(e,f,g)
    948 	and	x28,x28,x19			// (b^c)&=(a^b)
    949 	eor	x7,x7,x0,ror#61
    950 	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
    951 	add	x21,x21,x16			// h+=Sigma1(e)
    952 	eor	x28,x28,x23			// Maj(a,b,c)
    953 	eor	x17,x9,x22,ror#39	// Sigma0(a)
    954 	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
    955 	add	x2,x2,x11
    956 	add	x25,x25,x21			// d+=h
    957 	add	x21,x21,x28			// h+=Maj(a,b,c)
    958 	ldr	x28,[x30],#8		// *K++, x19 in next round
    959 	add	x2,x2,x8
    960 	add	x21,x21,x17			// h+=Sigma0(a)
    961 	add	x2,x2,x7
    962 	ldr	x7,[sp,#0]
    963 	str	x10,[sp,#24]
    964 	ror	x16,x25,#14
    965 	add	x20,x20,x28			// h+=K[i]
    966 	ror	x9,x4,#1
    967 	and	x17,x26,x25
    968 	ror	x8,x1,#19
    969 	bic	x28,x27,x25
    970 	ror	x10,x21,#28
    971 	add	x20,x20,x2			// h+=X[i]
    972 	eor	x16,x16,x25,ror#18
    973 	eor	x9,x9,x4,ror#8
    974 	orr	x17,x17,x28			// Ch(e,f,g)
    975 	eor	x28,x21,x22			// a^b, b^c in next round
    976 	eor	x16,x16,x25,ror#41	// Sigma1(e)
    977 	eor	x10,x10,x21,ror#34
    978 	add	x20,x20,x17			// h+=Ch(e,f,g)
    979 	and	x19,x19,x28			// (b^c)&=(a^b)
    980 	eor	x8,x8,x1,ror#61
    981 	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
    982 	add	x20,x20,x16			// h+=Sigma1(e)
    983 	eor	x19,x19,x22			// Maj(a,b,c)
    984 	eor	x17,x10,x21,ror#39	// Sigma0(a)
    985 	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
    986 	add	x3,x3,x12
    987 	add	x24,x24,x20			// d+=h
    988 	add	x20,x20,x19			// h+=Maj(a,b,c)
    989 	ldr	x19,[x30],#8		// *K++, x28 in next round
    990 	add	x3,x3,x9
    991 	add	x20,x20,x17			// h+=Sigma0(a)
    992 	add	x3,x3,x8
    993 	cbnz	x19,.Loop_16_xx
    994 
    995 	ldp	x0,x2,[x29,#96]
    996 	ldr	x1,[x29,#112]
    997 	sub	x30,x30,#648		// rewind
    998 
    999 	ldp	x3,x4,[x0]
   1000 	ldp	x5,x6,[x0,#2*8]
   1001 	add	x1,x1,#14*8			// advance input pointer
   1002 	ldp	x7,x8,[x0,#4*8]
   1003 	add	x20,x20,x3
   1004 	ldp	x9,x10,[x0,#6*8]
   1005 	add	x21,x21,x4
   1006 	add	x22,x22,x5
   1007 	add	x23,x23,x6
   1008 	stp	x20,x21,[x0]
   1009 	add	x24,x24,x7
   1010 	add	x25,x25,x8
   1011 	stp	x22,x23,[x0,#2*8]
   1012 	add	x26,x26,x9
   1013 	add	x27,x27,x10
   1014 	cmp	x1,x2
   1015 	stp	x24,x25,[x0,#4*8]
   1016 	stp	x26,x27,[x0,#6*8]
   1017 	b.ne	.Loop
   1018 
   1019 	ldp	x19,x20,[x29,#16]
   1020 	add	sp,sp,#4*8
   1021 	ldp	x21,x22,[x29,#32]
   1022 	ldp	x23,x24,[x29,#48]
   1023 	ldp	x25,x26,[x29,#64]
   1024 	ldp	x27,x28,[x29,#80]
   1025 	ldp	x29,x30,[sp],#128
   1026 	ret
   1027 .size	sha512_block_data_order,.-sha512_block_data_order
   1028 
   1029 .section	.rodata
   1030 .align	6
   1031 .type	.LK512,%object
   1032 .LK512:
   1033 .quad	0x428a2f98d728ae22,0x7137449123ef65cd
   1034 .quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
   1035 .quad	0x3956c25bf348b538,0x59f111f1b605d019
   1036 .quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
   1037 .quad	0xd807aa98a3030242,0x12835b0145706fbe
   1038 .quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
   1039 .quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
   1040 .quad	0x9bdc06a725c71235,0xc19bf174cf692694
   1041 .quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
   1042 .quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
   1043 .quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
   1044 .quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
   1045 .quad	0x983e5152ee66dfab,0xa831c66d2db43210
   1046 .quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
   1047 .quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
   1048 .quad	0x06ca6351e003826f,0x142929670a0e6e70
   1049 .quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
   1050 .quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
   1051 .quad	0x650a73548baf63de,0x766a0abb3c77b2a8
   1052 .quad	0x81c2c92e47edaee6,0x92722c851482353b
   1053 .quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
   1054 .quad	0xc24b8b70d0f89791,0xc76c51a30654be30
   1055 .quad	0xd192e819d6ef5218,0xd69906245565a910
   1056 .quad	0xf40e35855771202a,0x106aa07032bbd1b8
   1057 .quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
   1058 .quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
   1059 .quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
   1060 .quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
   1061 .quad	0x748f82ee5defb2fc,0x78a5636f43172f60
   1062 .quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
   1063 .quad	0x90befffa23631e28,0xa4506cebde82bde9
   1064 .quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
   1065 .quad	0xca273eceea26619c,0xd186b8c721c0c207
   1066 .quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
   1067 .quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
   1068 .quad	0x113f9804bef90dae,0x1b710b35131c471b
   1069 .quad	0x28db77f523047d84,0x32caab7b40c72493
   1070 .quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
   1071 .quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
   1072 .quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
   1073 .quad	0	// terminator
   1074 .size	.LK512,.-.LK512
   1075 .byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   1076 .align	2
   1077 .align	2
   1078 #ifndef	__KERNEL__
   1079 .comm	OPENSSL_armcap_P,4,4
   1080 .hidden	OPENSSL_armcap_P
   1081 #endif
   1082 #endif
   1083 #endif  // !OPENSSL_NO_ASM
   1084