Home | History | Annotate | Download | only in fipsmodule
      1 // This file is generated from a similarly-named Perl script in the BoringSSL
      2 // source tree. Do not edit by hand.
      3 
      4 #if defined(__has_feature)
      5 #if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
      6 #define OPENSSL_NO_ASM
      7 #endif
      8 #endif
      9 
     10 #if !defined(OPENSSL_NO_ASM)
     11 #if defined(__aarch64__)
     12 #if defined(BORINGSSL_PREFIX)
     13 #include <boringssl_prefix_symbols_asm.h>
     14 #endif
     15 #include <openssl/arm_arch.h>
     16 
     17 #if __ARM_MAX_ARCH__>=7
     18 .text
     19 .arch	armv8-a+crypto
     20 .section	.rodata
     21 .align	5
     22 .Lrcon:
     23 .long	0x01,0x01,0x01,0x01
     24 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
     25 .long	0x1b,0x1b,0x1b,0x1b
     26 
     27 .text
     28 
     29 .globl	aes_hw_set_encrypt_key
     30 .hidden	aes_hw_set_encrypt_key
     31 .type	aes_hw_set_encrypt_key,%function
     32 .align	5
     33 aes_hw_set_encrypt_key:
     34 .Lenc_key:
     35 	stp	x29,x30,[sp,#-16]!
     36 	add	x29,sp,#0
     37 	mov	x3,#-1
     38 	cmp	x0,#0
     39 	b.eq	.Lenc_key_abort
     40 	cmp	x2,#0
     41 	b.eq	.Lenc_key_abort
     42 	mov	x3,#-2
     43 	cmp	w1,#128
     44 	b.lt	.Lenc_key_abort
     45 	cmp	w1,#256
     46 	b.gt	.Lenc_key_abort
     47 	tst	w1,#0x3f
     48 	b.ne	.Lenc_key_abort
     49 
     50 	adrp	x3,.Lrcon
     51 	add	x3,x3,:lo12:.Lrcon
     52 	cmp	w1,#192
     53 
     54 	eor	v0.16b,v0.16b,v0.16b
     55 	ld1	{v3.16b},[x0],#16
     56 	mov	w1,#8		// reuse w1
     57 	ld1	{v1.4s,v2.4s},[x3],#32
     58 
     59 	b.lt	.Loop128
     60 	b.eq	.L192
     61 	b	.L256
     62 
     63 .align	4
     64 .Loop128:
     65 	tbl	v6.16b,{v3.16b},v2.16b
     66 	ext	v5.16b,v0.16b,v3.16b,#12
     67 	st1	{v3.4s},[x2],#16
     68 	aese	v6.16b,v0.16b
     69 	subs	w1,w1,#1
     70 
     71 	eor	v3.16b,v3.16b,v5.16b
     72 	ext	v5.16b,v0.16b,v5.16b,#12
     73 	eor	v3.16b,v3.16b,v5.16b
     74 	ext	v5.16b,v0.16b,v5.16b,#12
     75 	eor	v6.16b,v6.16b,v1.16b
     76 	eor	v3.16b,v3.16b,v5.16b
     77 	shl	v1.16b,v1.16b,#1
     78 	eor	v3.16b,v3.16b,v6.16b
     79 	b.ne	.Loop128
     80 
     81 	ld1	{v1.4s},[x3]
     82 
     83 	tbl	v6.16b,{v3.16b},v2.16b
     84 	ext	v5.16b,v0.16b,v3.16b,#12
     85 	st1	{v3.4s},[x2],#16
     86 	aese	v6.16b,v0.16b
     87 
     88 	eor	v3.16b,v3.16b,v5.16b
     89 	ext	v5.16b,v0.16b,v5.16b,#12
     90 	eor	v3.16b,v3.16b,v5.16b
     91 	ext	v5.16b,v0.16b,v5.16b,#12
     92 	eor	v6.16b,v6.16b,v1.16b
     93 	eor	v3.16b,v3.16b,v5.16b
     94 	shl	v1.16b,v1.16b,#1
     95 	eor	v3.16b,v3.16b,v6.16b
     96 
     97 	tbl	v6.16b,{v3.16b},v2.16b
     98 	ext	v5.16b,v0.16b,v3.16b,#12
     99 	st1	{v3.4s},[x2],#16
    100 	aese	v6.16b,v0.16b
    101 
    102 	eor	v3.16b,v3.16b,v5.16b
    103 	ext	v5.16b,v0.16b,v5.16b,#12
    104 	eor	v3.16b,v3.16b,v5.16b
    105 	ext	v5.16b,v0.16b,v5.16b,#12
    106 	eor	v6.16b,v6.16b,v1.16b
    107 	eor	v3.16b,v3.16b,v5.16b
    108 	eor	v3.16b,v3.16b,v6.16b
    109 	st1	{v3.4s},[x2]
    110 	add	x2,x2,#0x50
    111 
    112 	mov	w12,#10
    113 	b	.Ldone
    114 
    115 .align	4
    116 .L192:
    117 	ld1	{v4.8b},[x0],#8
    118 	movi	v6.16b,#8			// borrow v6.16b
    119 	st1	{v3.4s},[x2],#16
    120 	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
    121 
    122 .Loop192:
    123 	tbl	v6.16b,{v4.16b},v2.16b
    124 	ext	v5.16b,v0.16b,v3.16b,#12
    125 	st1	{v4.8b},[x2],#8
    126 	aese	v6.16b,v0.16b
    127 	subs	w1,w1,#1
    128 
    129 	eor	v3.16b,v3.16b,v5.16b
    130 	ext	v5.16b,v0.16b,v5.16b,#12
    131 	eor	v3.16b,v3.16b,v5.16b
    132 	ext	v5.16b,v0.16b,v5.16b,#12
    133 	eor	v3.16b,v3.16b,v5.16b
    134 
    135 	dup	v5.4s,v3.s[3]
    136 	eor	v5.16b,v5.16b,v4.16b
    137 	eor	v6.16b,v6.16b,v1.16b
    138 	ext	v4.16b,v0.16b,v4.16b,#12
    139 	shl	v1.16b,v1.16b,#1
    140 	eor	v4.16b,v4.16b,v5.16b
    141 	eor	v3.16b,v3.16b,v6.16b
    142 	eor	v4.16b,v4.16b,v6.16b
    143 	st1	{v3.4s},[x2],#16
    144 	b.ne	.Loop192
    145 
    146 	mov	w12,#12
    147 	add	x2,x2,#0x20
    148 	b	.Ldone
    149 
    150 .align	4
    151 .L256:
    152 	ld1	{v4.16b},[x0]
    153 	mov	w1,#7
    154 	mov	w12,#14
    155 	st1	{v3.4s},[x2],#16
    156 
    157 .Loop256:
    158 	tbl	v6.16b,{v4.16b},v2.16b
    159 	ext	v5.16b,v0.16b,v3.16b,#12
    160 	st1	{v4.4s},[x2],#16
    161 	aese	v6.16b,v0.16b
    162 	subs	w1,w1,#1
    163 
    164 	eor	v3.16b,v3.16b,v5.16b
    165 	ext	v5.16b,v0.16b,v5.16b,#12
    166 	eor	v3.16b,v3.16b,v5.16b
    167 	ext	v5.16b,v0.16b,v5.16b,#12
    168 	eor	v6.16b,v6.16b,v1.16b
    169 	eor	v3.16b,v3.16b,v5.16b
    170 	shl	v1.16b,v1.16b,#1
    171 	eor	v3.16b,v3.16b,v6.16b
    172 	st1	{v3.4s},[x2],#16
    173 	b.eq	.Ldone
    174 
    175 	dup	v6.4s,v3.s[3]		// just splat
    176 	ext	v5.16b,v0.16b,v4.16b,#12
    177 	aese	v6.16b,v0.16b
    178 
    179 	eor	v4.16b,v4.16b,v5.16b
    180 	ext	v5.16b,v0.16b,v5.16b,#12
    181 	eor	v4.16b,v4.16b,v5.16b
    182 	ext	v5.16b,v0.16b,v5.16b,#12
    183 	eor	v4.16b,v4.16b,v5.16b
    184 
    185 	eor	v4.16b,v4.16b,v6.16b
    186 	b	.Loop256
    187 
    188 .Ldone:
    189 	str	w12,[x2]
    190 	mov	x3,#0
    191 
    192 .Lenc_key_abort:
    193 	mov	x0,x3			// return value
    194 	ldr	x29,[sp],#16
    195 	ret
    196 .size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
    197 
    198 .globl	aes_hw_set_decrypt_key
    199 .hidden	aes_hw_set_decrypt_key
    200 .type	aes_hw_set_decrypt_key,%function
    201 .align	5
    202 aes_hw_set_decrypt_key:
    203 	stp	x29,x30,[sp,#-16]!
    204 	add	x29,sp,#0
    205 	bl	.Lenc_key
    206 
    207 	cmp	x0,#0
    208 	b.ne	.Ldec_key_abort
    209 
    210 	sub	x2,x2,#240		// restore original x2
    211 	mov	x4,#-16
    212 	add	x0,x2,x12,lsl#4	// end of key schedule
    213 
    214 	ld1	{v0.4s},[x2]
    215 	ld1	{v1.4s},[x0]
    216 	st1	{v0.4s},[x0],x4
    217 	st1	{v1.4s},[x2],#16
    218 
    219 .Loop_imc:
    220 	ld1	{v0.4s},[x2]
    221 	ld1	{v1.4s},[x0]
    222 	aesimc	v0.16b,v0.16b
    223 	aesimc	v1.16b,v1.16b
    224 	st1	{v0.4s},[x0],x4
    225 	st1	{v1.4s},[x2],#16
    226 	cmp	x0,x2
    227 	b.hi	.Loop_imc
    228 
    229 	ld1	{v0.4s},[x2]
    230 	aesimc	v0.16b,v0.16b
    231 	st1	{v0.4s},[x0]
    232 
    233 	eor	x0,x0,x0		// return value
    234 .Ldec_key_abort:
    235 	ldp	x29,x30,[sp],#16
    236 	ret
    237 .size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
    238 .globl	aes_hw_encrypt
    239 .hidden	aes_hw_encrypt
    240 .type	aes_hw_encrypt,%function
    241 .align	5
    242 aes_hw_encrypt:
    243 	ldr	w3,[x2,#240]
    244 	ld1	{v0.4s},[x2],#16
    245 	ld1	{v2.16b},[x0]
    246 	sub	w3,w3,#2
    247 	ld1	{v1.4s},[x2],#16
    248 
    249 .Loop_enc:
    250 	aese	v2.16b,v0.16b
    251 	aesmc	v2.16b,v2.16b
    252 	ld1	{v0.4s},[x2],#16
    253 	subs	w3,w3,#2
    254 	aese	v2.16b,v1.16b
    255 	aesmc	v2.16b,v2.16b
    256 	ld1	{v1.4s},[x2],#16
    257 	b.gt	.Loop_enc
    258 
    259 	aese	v2.16b,v0.16b
    260 	aesmc	v2.16b,v2.16b
    261 	ld1	{v0.4s},[x2]
    262 	aese	v2.16b,v1.16b
    263 	eor	v2.16b,v2.16b,v0.16b
    264 
    265 	st1	{v2.16b},[x1]
    266 	ret
    267 .size	aes_hw_encrypt,.-aes_hw_encrypt
    268 .globl	aes_hw_decrypt
    269 .hidden	aes_hw_decrypt
    270 .type	aes_hw_decrypt,%function
    271 .align	5
    272 aes_hw_decrypt:
    273 	ldr	w3,[x2,#240]
    274 	ld1	{v0.4s},[x2],#16
    275 	ld1	{v2.16b},[x0]
    276 	sub	w3,w3,#2
    277 	ld1	{v1.4s},[x2],#16
    278 
    279 .Loop_dec:
    280 	aesd	v2.16b,v0.16b
    281 	aesimc	v2.16b,v2.16b
    282 	ld1	{v0.4s},[x2],#16
    283 	subs	w3,w3,#2
    284 	aesd	v2.16b,v1.16b
    285 	aesimc	v2.16b,v2.16b
    286 	ld1	{v1.4s},[x2],#16
    287 	b.gt	.Loop_dec
    288 
    289 	aesd	v2.16b,v0.16b
    290 	aesimc	v2.16b,v2.16b
    291 	ld1	{v0.4s},[x2]
    292 	aesd	v2.16b,v1.16b
    293 	eor	v2.16b,v2.16b,v0.16b
    294 
    295 	st1	{v2.16b},[x1]
    296 	ret
    297 .size	aes_hw_decrypt,.-aes_hw_decrypt
    298 .globl	aes_hw_cbc_encrypt
    299 .hidden	aes_hw_cbc_encrypt
    300 .type	aes_hw_cbc_encrypt,%function
    301 .align	5
    302 aes_hw_cbc_encrypt:
    303 	stp	x29,x30,[sp,#-16]!
    304 	add	x29,sp,#0
    305 	subs	x2,x2,#16
    306 	mov	x8,#16
    307 	b.lo	.Lcbc_abort
    308 	csel	x8,xzr,x8,eq
    309 
    310 	cmp	w5,#0			// en- or decrypting?
    311 	ldr	w5,[x3,#240]
    312 	and	x2,x2,#-16
    313 	ld1	{v6.16b},[x4]
    314 	ld1	{v0.16b},[x0],x8
    315 
    316 	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
    317 	sub	w5,w5,#6
    318 	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
    319 	sub	w5,w5,#2
    320 	ld1	{v18.4s,v19.4s},[x7],#32
    321 	ld1	{v20.4s,v21.4s},[x7],#32
    322 	ld1	{v22.4s,v23.4s},[x7],#32
    323 	ld1	{v7.4s},[x7]
    324 
    325 	add	x7,x3,#32
    326 	mov	w6,w5
    327 	b.eq	.Lcbc_dec
    328 
    329 	cmp	w5,#2
    330 	eor	v0.16b,v0.16b,v6.16b
    331 	eor	v5.16b,v16.16b,v7.16b
    332 	b.eq	.Lcbc_enc128
    333 
    334 	ld1	{v2.4s,v3.4s},[x7]
    335 	add	x7,x3,#16
    336 	add	x6,x3,#16*4
    337 	add	x12,x3,#16*5
    338 	aese	v0.16b,v16.16b
    339 	aesmc	v0.16b,v0.16b
    340 	add	x14,x3,#16*6
    341 	add	x3,x3,#16*7
    342 	b	.Lenter_cbc_enc
    343 
    344 .align	4
    345 .Loop_cbc_enc:
    346 	aese	v0.16b,v16.16b
    347 	aesmc	v0.16b,v0.16b
    348 	st1	{v6.16b},[x1],#16
    349 .Lenter_cbc_enc:
    350 	aese	v0.16b,v17.16b
    351 	aesmc	v0.16b,v0.16b
    352 	aese	v0.16b,v2.16b
    353 	aesmc	v0.16b,v0.16b
    354 	ld1	{v16.4s},[x6]
    355 	cmp	w5,#4
    356 	aese	v0.16b,v3.16b
    357 	aesmc	v0.16b,v0.16b
    358 	ld1	{v17.4s},[x12]
    359 	b.eq	.Lcbc_enc192
    360 
    361 	aese	v0.16b,v16.16b
    362 	aesmc	v0.16b,v0.16b
    363 	ld1	{v16.4s},[x14]
    364 	aese	v0.16b,v17.16b
    365 	aesmc	v0.16b,v0.16b
    366 	ld1	{v17.4s},[x3]
    367 	nop
    368 
    369 .Lcbc_enc192:
    370 	aese	v0.16b,v16.16b
    371 	aesmc	v0.16b,v0.16b
    372 	subs	x2,x2,#16
    373 	aese	v0.16b,v17.16b
    374 	aesmc	v0.16b,v0.16b
    375 	csel	x8,xzr,x8,eq
    376 	aese	v0.16b,v18.16b
    377 	aesmc	v0.16b,v0.16b
    378 	aese	v0.16b,v19.16b
    379 	aesmc	v0.16b,v0.16b
    380 	ld1	{v16.16b},[x0],x8
    381 	aese	v0.16b,v20.16b
    382 	aesmc	v0.16b,v0.16b
    383 	eor	v16.16b,v16.16b,v5.16b
    384 	aese	v0.16b,v21.16b
    385 	aesmc	v0.16b,v0.16b
    386 	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
    387 	aese	v0.16b,v22.16b
    388 	aesmc	v0.16b,v0.16b
    389 	aese	v0.16b,v23.16b
    390 	eor	v6.16b,v0.16b,v7.16b
    391 	b.hs	.Loop_cbc_enc
    392 
    393 	st1	{v6.16b},[x1],#16
    394 	b	.Lcbc_done
    395 
    396 .align	5
    397 .Lcbc_enc128:
    398 	ld1	{v2.4s,v3.4s},[x7]
    399 	aese	v0.16b,v16.16b
    400 	aesmc	v0.16b,v0.16b
    401 	b	.Lenter_cbc_enc128
    402 .Loop_cbc_enc128:
    403 	aese	v0.16b,v16.16b
    404 	aesmc	v0.16b,v0.16b
    405 	st1	{v6.16b},[x1],#16
    406 .Lenter_cbc_enc128:
    407 	aese	v0.16b,v17.16b
    408 	aesmc	v0.16b,v0.16b
    409 	subs	x2,x2,#16
    410 	aese	v0.16b,v2.16b
    411 	aesmc	v0.16b,v0.16b
    412 	csel	x8,xzr,x8,eq
    413 	aese	v0.16b,v3.16b
    414 	aesmc	v0.16b,v0.16b
    415 	aese	v0.16b,v18.16b
    416 	aesmc	v0.16b,v0.16b
    417 	aese	v0.16b,v19.16b
    418 	aesmc	v0.16b,v0.16b
    419 	ld1	{v16.16b},[x0],x8
    420 	aese	v0.16b,v20.16b
    421 	aesmc	v0.16b,v0.16b
    422 	aese	v0.16b,v21.16b
    423 	aesmc	v0.16b,v0.16b
    424 	aese	v0.16b,v22.16b
    425 	aesmc	v0.16b,v0.16b
    426 	eor	v16.16b,v16.16b,v5.16b
    427 	aese	v0.16b,v23.16b
    428 	eor	v6.16b,v0.16b,v7.16b
    429 	b.hs	.Loop_cbc_enc128
    430 
    431 	st1	{v6.16b},[x1],#16
    432 	b	.Lcbc_done
    433 .align	5
    434 .Lcbc_dec:
    435 	ld1	{v18.16b},[x0],#16
    436 	subs	x2,x2,#32		// bias
    437 	add	w6,w5,#2
    438 	orr	v3.16b,v0.16b,v0.16b
    439 	orr	v1.16b,v0.16b,v0.16b
    440 	orr	v19.16b,v18.16b,v18.16b
    441 	b.lo	.Lcbc_dec_tail
    442 
    443 	orr	v1.16b,v18.16b,v18.16b
    444 	ld1	{v18.16b},[x0],#16
    445 	orr	v2.16b,v0.16b,v0.16b
    446 	orr	v3.16b,v1.16b,v1.16b
    447 	orr	v19.16b,v18.16b,v18.16b
    448 
    449 .Loop3x_cbc_dec:
    450 	aesd	v0.16b,v16.16b
    451 	aesimc	v0.16b,v0.16b
    452 	aesd	v1.16b,v16.16b
    453 	aesimc	v1.16b,v1.16b
    454 	aesd	v18.16b,v16.16b
    455 	aesimc	v18.16b,v18.16b
    456 	ld1	{v16.4s},[x7],#16
    457 	subs	w6,w6,#2
    458 	aesd	v0.16b,v17.16b
    459 	aesimc	v0.16b,v0.16b
    460 	aesd	v1.16b,v17.16b
    461 	aesimc	v1.16b,v1.16b
    462 	aesd	v18.16b,v17.16b
    463 	aesimc	v18.16b,v18.16b
    464 	ld1	{v17.4s},[x7],#16
    465 	b.gt	.Loop3x_cbc_dec
    466 
    467 	aesd	v0.16b,v16.16b
    468 	aesimc	v0.16b,v0.16b
    469 	aesd	v1.16b,v16.16b
    470 	aesimc	v1.16b,v1.16b
    471 	aesd	v18.16b,v16.16b
    472 	aesimc	v18.16b,v18.16b
    473 	eor	v4.16b,v6.16b,v7.16b
    474 	subs	x2,x2,#0x30
    475 	eor	v5.16b,v2.16b,v7.16b
    476 	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
    477 	aesd	v0.16b,v17.16b
    478 	aesimc	v0.16b,v0.16b
    479 	aesd	v1.16b,v17.16b
    480 	aesimc	v1.16b,v1.16b
    481 	aesd	v18.16b,v17.16b
    482 	aesimc	v18.16b,v18.16b
    483 	eor	v17.16b,v3.16b,v7.16b
    484 	add	x0,x0,x6		// x0 is adjusted in such way that
    485 					// at exit from the loop v1.16b-v18.16b
    486 					// are loaded with last "words"
    487 	orr	v6.16b,v19.16b,v19.16b
    488 	mov	x7,x3
    489 	aesd	v0.16b,v20.16b
    490 	aesimc	v0.16b,v0.16b
    491 	aesd	v1.16b,v20.16b
    492 	aesimc	v1.16b,v1.16b
    493 	aesd	v18.16b,v20.16b
    494 	aesimc	v18.16b,v18.16b
    495 	ld1	{v2.16b},[x0],#16
    496 	aesd	v0.16b,v21.16b
    497 	aesimc	v0.16b,v0.16b
    498 	aesd	v1.16b,v21.16b
    499 	aesimc	v1.16b,v1.16b
    500 	aesd	v18.16b,v21.16b
    501 	aesimc	v18.16b,v18.16b
    502 	ld1	{v3.16b},[x0],#16
    503 	aesd	v0.16b,v22.16b
    504 	aesimc	v0.16b,v0.16b
    505 	aesd	v1.16b,v22.16b
    506 	aesimc	v1.16b,v1.16b
    507 	aesd	v18.16b,v22.16b
    508 	aesimc	v18.16b,v18.16b
    509 	ld1	{v19.16b},[x0],#16
    510 	aesd	v0.16b,v23.16b
    511 	aesd	v1.16b,v23.16b
    512 	aesd	v18.16b,v23.16b
    513 	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
    514 	add	w6,w5,#2
    515 	eor	v4.16b,v4.16b,v0.16b
    516 	eor	v5.16b,v5.16b,v1.16b
    517 	eor	v18.16b,v18.16b,v17.16b
    518 	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
    519 	st1	{v4.16b},[x1],#16
    520 	orr	v0.16b,v2.16b,v2.16b
    521 	st1	{v5.16b},[x1],#16
    522 	orr	v1.16b,v3.16b,v3.16b
    523 	st1	{v18.16b},[x1],#16
    524 	orr	v18.16b,v19.16b,v19.16b
    525 	b.hs	.Loop3x_cbc_dec
    526 
    527 	cmn	x2,#0x30
    528 	b.eq	.Lcbc_done
    529 	nop
    530 
    531 .Lcbc_dec_tail:
    532 	aesd	v1.16b,v16.16b
    533 	aesimc	v1.16b,v1.16b
    534 	aesd	v18.16b,v16.16b
    535 	aesimc	v18.16b,v18.16b
    536 	ld1	{v16.4s},[x7],#16
    537 	subs	w6,w6,#2
    538 	aesd	v1.16b,v17.16b
    539 	aesimc	v1.16b,v1.16b
    540 	aesd	v18.16b,v17.16b
    541 	aesimc	v18.16b,v18.16b
    542 	ld1	{v17.4s},[x7],#16
    543 	b.gt	.Lcbc_dec_tail
    544 
    545 	aesd	v1.16b,v16.16b
    546 	aesimc	v1.16b,v1.16b
    547 	aesd	v18.16b,v16.16b
    548 	aesimc	v18.16b,v18.16b
    549 	aesd	v1.16b,v17.16b
    550 	aesimc	v1.16b,v1.16b
    551 	aesd	v18.16b,v17.16b
    552 	aesimc	v18.16b,v18.16b
    553 	aesd	v1.16b,v20.16b
    554 	aesimc	v1.16b,v1.16b
    555 	aesd	v18.16b,v20.16b
    556 	aesimc	v18.16b,v18.16b
    557 	cmn	x2,#0x20
    558 	aesd	v1.16b,v21.16b
    559 	aesimc	v1.16b,v1.16b
    560 	aesd	v18.16b,v21.16b
    561 	aesimc	v18.16b,v18.16b
    562 	eor	v5.16b,v6.16b,v7.16b
    563 	aesd	v1.16b,v22.16b
    564 	aesimc	v1.16b,v1.16b
    565 	aesd	v18.16b,v22.16b
    566 	aesimc	v18.16b,v18.16b
    567 	eor	v17.16b,v3.16b,v7.16b
    568 	aesd	v1.16b,v23.16b
    569 	aesd	v18.16b,v23.16b
    570 	b.eq	.Lcbc_dec_one
    571 	eor	v5.16b,v5.16b,v1.16b
    572 	eor	v17.16b,v17.16b,v18.16b
    573 	orr	v6.16b,v19.16b,v19.16b
    574 	st1	{v5.16b},[x1],#16
    575 	st1	{v17.16b},[x1],#16
    576 	b	.Lcbc_done
    577 
    578 .Lcbc_dec_one:
    579 	eor	v5.16b,v5.16b,v18.16b
    580 	orr	v6.16b,v19.16b,v19.16b
    581 	st1	{v5.16b},[x1],#16
    582 
    583 .Lcbc_done:
    584 	st1	{v6.16b},[x4]
    585 .Lcbc_abort:
    586 	ldr	x29,[sp],#16
    587 	ret
    588 .size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
    589 .globl	aes_hw_ctr32_encrypt_blocks
    590 .hidden	aes_hw_ctr32_encrypt_blocks
    591 .type	aes_hw_ctr32_encrypt_blocks,%function
    592 .align	5
    593 aes_hw_ctr32_encrypt_blocks:
    594 	stp	x29,x30,[sp,#-16]!
    595 	add	x29,sp,#0
    596 	ldr	w5,[x3,#240]
    597 
    598 	ldr	w8, [x4, #12]
    599 	ld1	{v0.4s},[x4]
    600 
    601 	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
    602 	sub	w5,w5,#4
    603 	mov	x12,#16
    604 	cmp	x2,#2
    605 	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
    606 	sub	w5,w5,#2
    607 	ld1	{v20.4s,v21.4s},[x7],#32
    608 	ld1	{v22.4s,v23.4s},[x7],#32
    609 	ld1	{v7.4s},[x7]
    610 	add	x7,x3,#32
    611 	mov	w6,w5
    612 	csel	x12,xzr,x12,lo
    613 #ifndef __ARMEB__
    614 	rev	w8, w8
    615 #endif
    616 	orr	v1.16b,v0.16b,v0.16b
    617 	add	w10, w8, #1
    618 	orr	v18.16b,v0.16b,v0.16b
    619 	add	w8, w8, #2
    620 	orr	v6.16b,v0.16b,v0.16b
    621 	rev	w10, w10
    622 	mov	v1.s[3],w10
    623 	b.ls	.Lctr32_tail
    624 	rev	w12, w8
    625 	sub	x2,x2,#3		// bias
    626 	mov	v18.s[3],w12
    627 	b	.Loop3x_ctr32
    628 
    629 .align	4
    630 .Loop3x_ctr32:
    631 	aese	v0.16b,v16.16b
    632 	aesmc	v0.16b,v0.16b
    633 	aese	v1.16b,v16.16b
    634 	aesmc	v1.16b,v1.16b
    635 	aese	v18.16b,v16.16b
    636 	aesmc	v18.16b,v18.16b
    637 	ld1	{v16.4s},[x7],#16
    638 	subs	w6,w6,#2
    639 	aese	v0.16b,v17.16b
    640 	aesmc	v0.16b,v0.16b
    641 	aese	v1.16b,v17.16b
    642 	aesmc	v1.16b,v1.16b
    643 	aese	v18.16b,v17.16b
    644 	aesmc	v18.16b,v18.16b
    645 	ld1	{v17.4s},[x7],#16
    646 	b.gt	.Loop3x_ctr32
    647 
    648 	aese	v0.16b,v16.16b
    649 	aesmc	v4.16b,v0.16b
    650 	aese	v1.16b,v16.16b
    651 	aesmc	v5.16b,v1.16b
    652 	ld1	{v2.16b},[x0],#16
    653 	orr	v0.16b,v6.16b,v6.16b
    654 	aese	v18.16b,v16.16b
    655 	aesmc	v18.16b,v18.16b
    656 	ld1	{v3.16b},[x0],#16
    657 	orr	v1.16b,v6.16b,v6.16b
    658 	aese	v4.16b,v17.16b
    659 	aesmc	v4.16b,v4.16b
    660 	aese	v5.16b,v17.16b
    661 	aesmc	v5.16b,v5.16b
    662 	ld1	{v19.16b},[x0],#16
    663 	mov	x7,x3
    664 	aese	v18.16b,v17.16b
    665 	aesmc	v17.16b,v18.16b
    666 	orr	v18.16b,v6.16b,v6.16b
    667 	add	w9,w8,#1
    668 	aese	v4.16b,v20.16b
    669 	aesmc	v4.16b,v4.16b
    670 	aese	v5.16b,v20.16b
    671 	aesmc	v5.16b,v5.16b
    672 	eor	v2.16b,v2.16b,v7.16b
    673 	add	w10,w8,#2
    674 	aese	v17.16b,v20.16b
    675 	aesmc	v17.16b,v17.16b
    676 	eor	v3.16b,v3.16b,v7.16b
    677 	add	w8,w8,#3
    678 	aese	v4.16b,v21.16b
    679 	aesmc	v4.16b,v4.16b
    680 	aese	v5.16b,v21.16b
    681 	aesmc	v5.16b,v5.16b
    682 	eor	v19.16b,v19.16b,v7.16b
    683 	rev	w9,w9
    684 	aese	v17.16b,v21.16b
    685 	aesmc	v17.16b,v17.16b
    686 	mov	v0.s[3], w9
    687 	rev	w10,w10
    688 	aese	v4.16b,v22.16b
    689 	aesmc	v4.16b,v4.16b
    690 	aese	v5.16b,v22.16b
    691 	aesmc	v5.16b,v5.16b
    692 	mov	v1.s[3], w10
    693 	rev	w12,w8
    694 	aese	v17.16b,v22.16b
    695 	aesmc	v17.16b,v17.16b
    696 	mov	v18.s[3], w12
    697 	subs	x2,x2,#3
    698 	aese	v4.16b,v23.16b
    699 	aese	v5.16b,v23.16b
    700 	aese	v17.16b,v23.16b
    701 
    702 	eor	v2.16b,v2.16b,v4.16b
    703 	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
    704 	st1	{v2.16b},[x1],#16
    705 	eor	v3.16b,v3.16b,v5.16b
    706 	mov	w6,w5
    707 	st1	{v3.16b},[x1],#16
    708 	eor	v19.16b,v19.16b,v17.16b
    709 	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
    710 	st1	{v19.16b},[x1],#16
    711 	b.hs	.Loop3x_ctr32
    712 
    713 	adds	x2,x2,#3
    714 	b.eq	.Lctr32_done
    715 	cmp	x2,#1
    716 	mov	x12,#16
    717 	csel	x12,xzr,x12,eq
    718 
    719 .Lctr32_tail:
    720 	aese	v0.16b,v16.16b
    721 	aesmc	v0.16b,v0.16b
    722 	aese	v1.16b,v16.16b
    723 	aesmc	v1.16b,v1.16b
    724 	ld1	{v16.4s},[x7],#16
    725 	subs	w6,w6,#2
    726 	aese	v0.16b,v17.16b
    727 	aesmc	v0.16b,v0.16b
    728 	aese	v1.16b,v17.16b
    729 	aesmc	v1.16b,v1.16b
    730 	ld1	{v17.4s},[x7],#16
    731 	b.gt	.Lctr32_tail
    732 
    733 	aese	v0.16b,v16.16b
    734 	aesmc	v0.16b,v0.16b
    735 	aese	v1.16b,v16.16b
    736 	aesmc	v1.16b,v1.16b
    737 	aese	v0.16b,v17.16b
    738 	aesmc	v0.16b,v0.16b
    739 	aese	v1.16b,v17.16b
    740 	aesmc	v1.16b,v1.16b
    741 	ld1	{v2.16b},[x0],x12
    742 	aese	v0.16b,v20.16b
    743 	aesmc	v0.16b,v0.16b
    744 	aese	v1.16b,v20.16b
    745 	aesmc	v1.16b,v1.16b
    746 	ld1	{v3.16b},[x0]
    747 	aese	v0.16b,v21.16b
    748 	aesmc	v0.16b,v0.16b
    749 	aese	v1.16b,v21.16b
    750 	aesmc	v1.16b,v1.16b
    751 	eor	v2.16b,v2.16b,v7.16b
    752 	aese	v0.16b,v22.16b
    753 	aesmc	v0.16b,v0.16b
    754 	aese	v1.16b,v22.16b
    755 	aesmc	v1.16b,v1.16b
    756 	eor	v3.16b,v3.16b,v7.16b
    757 	aese	v0.16b,v23.16b
    758 	aese	v1.16b,v23.16b
    759 
    760 	cmp	x2,#1
    761 	eor	v2.16b,v2.16b,v0.16b
    762 	eor	v3.16b,v3.16b,v1.16b
    763 	st1	{v2.16b},[x1],#16
    764 	b.eq	.Lctr32_done
    765 	st1	{v3.16b},[x1]
    766 
    767 .Lctr32_done:
    768 	ldr	x29,[sp],#16
    769 	ret
    770 .size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
    771 #endif
    772 #endif
    773 #endif  // !OPENSSL_NO_ASM
    774