Home | History | Annotate | Download | only in fipsmodule
      1 #if defined(__aarch64__)
      2 #include <openssl/arm_arch.h>
      3 
      4 #if __ARM_MAX_ARCH__>=7
      5 .text
      6 #if !defined(__clang__) || defined(BORINGSSL_CLANG_SUPPORTS_DOT_ARCH)
      7 .arch	armv8-a+crypto
      8 #endif
      9 .align	5
     10 .Lrcon:
     11 .long	0x01,0x01,0x01,0x01
     12 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
     13 .long	0x1b,0x1b,0x1b,0x1b
     14 
     15 .globl	aes_hw_set_encrypt_key
     16 .hidden	aes_hw_set_encrypt_key
     17 .type	aes_hw_set_encrypt_key,%function
     18 .align	5
     19 aes_hw_set_encrypt_key:
     20 .Lenc_key:
     21 	stp	x29,x30,[sp,#-16]!
     22 	add	x29,sp,#0
     23 	mov	x3,#-1
     24 	cmp	x0,#0
     25 	b.eq	.Lenc_key_abort
     26 	cmp	x2,#0
     27 	b.eq	.Lenc_key_abort
     28 	mov	x3,#-2
     29 	cmp	w1,#128
     30 	b.lt	.Lenc_key_abort
     31 	cmp	w1,#256
     32 	b.gt	.Lenc_key_abort
     33 	tst	w1,#0x3f
     34 	b.ne	.Lenc_key_abort
     35 
     36 	adr	x3,.Lrcon
     37 	cmp	w1,#192
     38 
     39 	eor	v0.16b,v0.16b,v0.16b
     40 	ld1	{v3.16b},[x0],#16
     41 	mov	w1,#8		// reuse w1
     42 	ld1	{v1.4s,v2.4s},[x3],#32
     43 
     44 	b.lt	.Loop128
     45 	b.eq	.L192
     46 	b	.L256
     47 
     48 .align	4
     49 .Loop128:
     50 	tbl	v6.16b,{v3.16b},v2.16b
     51 	ext	v5.16b,v0.16b,v3.16b,#12
     52 	st1	{v3.4s},[x2],#16
     53 	aese	v6.16b,v0.16b
     54 	subs	w1,w1,#1
     55 
     56 	eor	v3.16b,v3.16b,v5.16b
     57 	ext	v5.16b,v0.16b,v5.16b,#12
     58 	eor	v3.16b,v3.16b,v5.16b
     59 	ext	v5.16b,v0.16b,v5.16b,#12
     60 	eor	v6.16b,v6.16b,v1.16b
     61 	eor	v3.16b,v3.16b,v5.16b
     62 	shl	v1.16b,v1.16b,#1
     63 	eor	v3.16b,v3.16b,v6.16b
     64 	b.ne	.Loop128
     65 
     66 	ld1	{v1.4s},[x3]
     67 
     68 	tbl	v6.16b,{v3.16b},v2.16b
     69 	ext	v5.16b,v0.16b,v3.16b,#12
     70 	st1	{v3.4s},[x2],#16
     71 	aese	v6.16b,v0.16b
     72 
     73 	eor	v3.16b,v3.16b,v5.16b
     74 	ext	v5.16b,v0.16b,v5.16b,#12
     75 	eor	v3.16b,v3.16b,v5.16b
     76 	ext	v5.16b,v0.16b,v5.16b,#12
     77 	eor	v6.16b,v6.16b,v1.16b
     78 	eor	v3.16b,v3.16b,v5.16b
     79 	shl	v1.16b,v1.16b,#1
     80 	eor	v3.16b,v3.16b,v6.16b
     81 
     82 	tbl	v6.16b,{v3.16b},v2.16b
     83 	ext	v5.16b,v0.16b,v3.16b,#12
     84 	st1	{v3.4s},[x2],#16
     85 	aese	v6.16b,v0.16b
     86 
     87 	eor	v3.16b,v3.16b,v5.16b
     88 	ext	v5.16b,v0.16b,v5.16b,#12
     89 	eor	v3.16b,v3.16b,v5.16b
     90 	ext	v5.16b,v0.16b,v5.16b,#12
     91 	eor	v6.16b,v6.16b,v1.16b
     92 	eor	v3.16b,v3.16b,v5.16b
     93 	eor	v3.16b,v3.16b,v6.16b
     94 	st1	{v3.4s},[x2]
     95 	add	x2,x2,#0x50
     96 
     97 	mov	w12,#10
     98 	b	.Ldone
     99 
    100 .align	4
    101 .L192:
    102 	ld1	{v4.8b},[x0],#8
    103 	movi	v6.16b,#8			// borrow v6.16b
    104 	st1	{v3.4s},[x2],#16
    105 	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
    106 
    107 .Loop192:
    108 	tbl	v6.16b,{v4.16b},v2.16b
    109 	ext	v5.16b,v0.16b,v3.16b,#12
    110 	st1	{v4.8b},[x2],#8
    111 	aese	v6.16b,v0.16b
    112 	subs	w1,w1,#1
    113 
    114 	eor	v3.16b,v3.16b,v5.16b
    115 	ext	v5.16b,v0.16b,v5.16b,#12
    116 	eor	v3.16b,v3.16b,v5.16b
    117 	ext	v5.16b,v0.16b,v5.16b,#12
    118 	eor	v3.16b,v3.16b,v5.16b
    119 
    120 	dup	v5.4s,v3.s[3]
    121 	eor	v5.16b,v5.16b,v4.16b
    122 	eor	v6.16b,v6.16b,v1.16b
    123 	ext	v4.16b,v0.16b,v4.16b,#12
    124 	shl	v1.16b,v1.16b,#1
    125 	eor	v4.16b,v4.16b,v5.16b
    126 	eor	v3.16b,v3.16b,v6.16b
    127 	eor	v4.16b,v4.16b,v6.16b
    128 	st1	{v3.4s},[x2],#16
    129 	b.ne	.Loop192
    130 
    131 	mov	w12,#12
    132 	add	x2,x2,#0x20
    133 	b	.Ldone
    134 
    135 .align	4
    136 .L256:
    137 	ld1	{v4.16b},[x0]
    138 	mov	w1,#7
    139 	mov	w12,#14
    140 	st1	{v3.4s},[x2],#16
    141 
    142 .Loop256:
    143 	tbl	v6.16b,{v4.16b},v2.16b
    144 	ext	v5.16b,v0.16b,v3.16b,#12
    145 	st1	{v4.4s},[x2],#16
    146 	aese	v6.16b,v0.16b
    147 	subs	w1,w1,#1
    148 
    149 	eor	v3.16b,v3.16b,v5.16b
    150 	ext	v5.16b,v0.16b,v5.16b,#12
    151 	eor	v3.16b,v3.16b,v5.16b
    152 	ext	v5.16b,v0.16b,v5.16b,#12
    153 	eor	v6.16b,v6.16b,v1.16b
    154 	eor	v3.16b,v3.16b,v5.16b
    155 	shl	v1.16b,v1.16b,#1
    156 	eor	v3.16b,v3.16b,v6.16b
    157 	st1	{v3.4s},[x2],#16
    158 	b.eq	.Ldone
    159 
    160 	dup	v6.4s,v3.s[3]		// just splat
    161 	ext	v5.16b,v0.16b,v4.16b,#12
    162 	aese	v6.16b,v0.16b
    163 
    164 	eor	v4.16b,v4.16b,v5.16b
    165 	ext	v5.16b,v0.16b,v5.16b,#12
    166 	eor	v4.16b,v4.16b,v5.16b
    167 	ext	v5.16b,v0.16b,v5.16b,#12
    168 	eor	v4.16b,v4.16b,v5.16b
    169 
    170 	eor	v4.16b,v4.16b,v6.16b
    171 	b	.Loop256
    172 
    173 .Ldone:
    174 	str	w12,[x2]
    175 	mov	x3,#0
    176 
    177 .Lenc_key_abort:
    178 	mov	x0,x3			// return value
    179 	ldr	x29,[sp],#16
    180 	ret
    181 .size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
    182 
    183 .globl	aes_hw_set_decrypt_key
    184 .hidden	aes_hw_set_decrypt_key
    185 .type	aes_hw_set_decrypt_key,%function
    186 .align	5
    187 aes_hw_set_decrypt_key:
    188 	stp	x29,x30,[sp,#-16]!
    189 	add	x29,sp,#0
    190 	bl	.Lenc_key
    191 
    192 	cmp	x0,#0
    193 	b.ne	.Ldec_key_abort
    194 
    195 	sub	x2,x2,#240		// restore original x2
    196 	mov	x4,#-16
    197 	add	x0,x2,x12,lsl#4	// end of key schedule
    198 
    199 	ld1	{v0.4s},[x2]
    200 	ld1	{v1.4s},[x0]
    201 	st1	{v0.4s},[x0],x4
    202 	st1	{v1.4s},[x2],#16
    203 
    204 .Loop_imc:
    205 	ld1	{v0.4s},[x2]
    206 	ld1	{v1.4s},[x0]
    207 	aesimc	v0.16b,v0.16b
    208 	aesimc	v1.16b,v1.16b
    209 	st1	{v0.4s},[x0],x4
    210 	st1	{v1.4s},[x2],#16
    211 	cmp	x0,x2
    212 	b.hi	.Loop_imc
    213 
    214 	ld1	{v0.4s},[x2]
    215 	aesimc	v0.16b,v0.16b
    216 	st1	{v0.4s},[x0]
    217 
    218 	eor	x0,x0,x0		// return value
    219 .Ldec_key_abort:
    220 	ldp	x29,x30,[sp],#16
    221 	ret
    222 .size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
    223 .globl	aes_hw_encrypt
    224 .hidden	aes_hw_encrypt
    225 .type	aes_hw_encrypt,%function
    226 .align	5
    227 aes_hw_encrypt:
    228 	ldr	w3,[x2,#240]
    229 	ld1	{v0.4s},[x2],#16
    230 	ld1	{v2.16b},[x0]
    231 	sub	w3,w3,#2
    232 	ld1	{v1.4s},[x2],#16
    233 
    234 .Loop_enc:
    235 	aese	v2.16b,v0.16b
    236 	aesmc	v2.16b,v2.16b
    237 	ld1	{v0.4s},[x2],#16
    238 	subs	w3,w3,#2
    239 	aese	v2.16b,v1.16b
    240 	aesmc	v2.16b,v2.16b
    241 	ld1	{v1.4s},[x2],#16
    242 	b.gt	.Loop_enc
    243 
    244 	aese	v2.16b,v0.16b
    245 	aesmc	v2.16b,v2.16b
    246 	ld1	{v0.4s},[x2]
    247 	aese	v2.16b,v1.16b
    248 	eor	v2.16b,v2.16b,v0.16b
    249 
    250 	st1	{v2.16b},[x1]
    251 	ret
    252 .size	aes_hw_encrypt,.-aes_hw_encrypt
    253 .globl	aes_hw_decrypt
    254 .hidden	aes_hw_decrypt
    255 .type	aes_hw_decrypt,%function
    256 .align	5
    257 aes_hw_decrypt:
    258 	ldr	w3,[x2,#240]
    259 	ld1	{v0.4s},[x2],#16
    260 	ld1	{v2.16b},[x0]
    261 	sub	w3,w3,#2
    262 	ld1	{v1.4s},[x2],#16
    263 
    264 .Loop_dec:
    265 	aesd	v2.16b,v0.16b
    266 	aesimc	v2.16b,v2.16b
    267 	ld1	{v0.4s},[x2],#16
    268 	subs	w3,w3,#2
    269 	aesd	v2.16b,v1.16b
    270 	aesimc	v2.16b,v2.16b
    271 	ld1	{v1.4s},[x2],#16
    272 	b.gt	.Loop_dec
    273 
    274 	aesd	v2.16b,v0.16b
    275 	aesimc	v2.16b,v2.16b
    276 	ld1	{v0.4s},[x2]
    277 	aesd	v2.16b,v1.16b
    278 	eor	v2.16b,v2.16b,v0.16b
    279 
    280 	st1	{v2.16b},[x1]
    281 	ret
    282 .size	aes_hw_decrypt,.-aes_hw_decrypt
    283 .globl	aes_hw_cbc_encrypt
    284 .hidden	aes_hw_cbc_encrypt
    285 .type	aes_hw_cbc_encrypt,%function
    286 .align	5
    287 aes_hw_cbc_encrypt:
    288 	stp	x29,x30,[sp,#-16]!
    289 	add	x29,sp,#0
    290 	subs	x2,x2,#16
    291 	mov	x8,#16
    292 	b.lo	.Lcbc_abort
    293 	csel	x8,xzr,x8,eq
    294 
    295 	cmp	w5,#0			// en- or decrypting?
    296 	ldr	w5,[x3,#240]
    297 	and	x2,x2,#-16
    298 	ld1	{v6.16b},[x4]
    299 	ld1	{v0.16b},[x0],x8
    300 
    301 	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
    302 	sub	w5,w5,#6
    303 	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
    304 	sub	w5,w5,#2
    305 	ld1	{v18.4s,v19.4s},[x7],#32
    306 	ld1	{v20.4s,v21.4s},[x7],#32
    307 	ld1	{v22.4s,v23.4s},[x7],#32
    308 	ld1	{v7.4s},[x7]
    309 
    310 	add	x7,x3,#32
    311 	mov	w6,w5
    312 	b.eq	.Lcbc_dec
    313 
    314 	cmp	w5,#2
    315 	eor	v0.16b,v0.16b,v6.16b
    316 	eor	v5.16b,v16.16b,v7.16b
    317 	b.eq	.Lcbc_enc128
    318 
    319 	ld1	{v2.4s,v3.4s},[x7]
    320 	add	x7,x3,#16
    321 	add	x6,x3,#16*4
    322 	add	x12,x3,#16*5
    323 	aese	v0.16b,v16.16b
    324 	aesmc	v0.16b,v0.16b
    325 	add	x14,x3,#16*6
    326 	add	x3,x3,#16*7
    327 	b	.Lenter_cbc_enc
    328 
    329 .align	4
    330 .Loop_cbc_enc:
    331 	aese	v0.16b,v16.16b
    332 	aesmc	v0.16b,v0.16b
    333 	st1	{v6.16b},[x1],#16
    334 .Lenter_cbc_enc:
    335 	aese	v0.16b,v17.16b
    336 	aesmc	v0.16b,v0.16b
    337 	aese	v0.16b,v2.16b
    338 	aesmc	v0.16b,v0.16b
    339 	ld1	{v16.4s},[x6]
    340 	cmp	w5,#4
    341 	aese	v0.16b,v3.16b
    342 	aesmc	v0.16b,v0.16b
    343 	ld1	{v17.4s},[x12]
    344 	b.eq	.Lcbc_enc192
    345 
    346 	aese	v0.16b,v16.16b
    347 	aesmc	v0.16b,v0.16b
    348 	ld1	{v16.4s},[x14]
    349 	aese	v0.16b,v17.16b
    350 	aesmc	v0.16b,v0.16b
    351 	ld1	{v17.4s},[x3]
    352 	nop
    353 
    354 .Lcbc_enc192:
    355 	aese	v0.16b,v16.16b
    356 	aesmc	v0.16b,v0.16b
    357 	subs	x2,x2,#16
    358 	aese	v0.16b,v17.16b
    359 	aesmc	v0.16b,v0.16b
    360 	csel	x8,xzr,x8,eq
    361 	aese	v0.16b,v18.16b
    362 	aesmc	v0.16b,v0.16b
    363 	aese	v0.16b,v19.16b
    364 	aesmc	v0.16b,v0.16b
    365 	ld1	{v16.16b},[x0],x8
    366 	aese	v0.16b,v20.16b
    367 	aesmc	v0.16b,v0.16b
    368 	eor	v16.16b,v16.16b,v5.16b
    369 	aese	v0.16b,v21.16b
    370 	aesmc	v0.16b,v0.16b
    371 	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
    372 	aese	v0.16b,v22.16b
    373 	aesmc	v0.16b,v0.16b
    374 	aese	v0.16b,v23.16b
    375 	eor	v6.16b,v0.16b,v7.16b
    376 	b.hs	.Loop_cbc_enc
    377 
    378 	st1	{v6.16b},[x1],#16
    379 	b	.Lcbc_done
    380 
    381 .align	5
    382 .Lcbc_enc128:
    383 	ld1	{v2.4s,v3.4s},[x7]
    384 	aese	v0.16b,v16.16b
    385 	aesmc	v0.16b,v0.16b
    386 	b	.Lenter_cbc_enc128
    387 .Loop_cbc_enc128:
    388 	aese	v0.16b,v16.16b
    389 	aesmc	v0.16b,v0.16b
    390 	st1	{v6.16b},[x1],#16
    391 .Lenter_cbc_enc128:
    392 	aese	v0.16b,v17.16b
    393 	aesmc	v0.16b,v0.16b
    394 	subs	x2,x2,#16
    395 	aese	v0.16b,v2.16b
    396 	aesmc	v0.16b,v0.16b
    397 	csel	x8,xzr,x8,eq
    398 	aese	v0.16b,v3.16b
    399 	aesmc	v0.16b,v0.16b
    400 	aese	v0.16b,v18.16b
    401 	aesmc	v0.16b,v0.16b
    402 	aese	v0.16b,v19.16b
    403 	aesmc	v0.16b,v0.16b
    404 	ld1	{v16.16b},[x0],x8
    405 	aese	v0.16b,v20.16b
    406 	aesmc	v0.16b,v0.16b
    407 	aese	v0.16b,v21.16b
    408 	aesmc	v0.16b,v0.16b
    409 	aese	v0.16b,v22.16b
    410 	aesmc	v0.16b,v0.16b
    411 	eor	v16.16b,v16.16b,v5.16b
    412 	aese	v0.16b,v23.16b
    413 	eor	v6.16b,v0.16b,v7.16b
    414 	b.hs	.Loop_cbc_enc128
    415 
    416 	st1	{v6.16b},[x1],#16
    417 	b	.Lcbc_done
    418 .align	5
    419 .Lcbc_dec:
    420 	ld1	{v18.16b},[x0],#16
    421 	subs	x2,x2,#32		// bias
    422 	add	w6,w5,#2
    423 	orr	v3.16b,v0.16b,v0.16b
    424 	orr	v1.16b,v0.16b,v0.16b
    425 	orr	v19.16b,v18.16b,v18.16b
    426 	b.lo	.Lcbc_dec_tail
    427 
    428 	orr	v1.16b,v18.16b,v18.16b
    429 	ld1	{v18.16b},[x0],#16
    430 	orr	v2.16b,v0.16b,v0.16b
    431 	orr	v3.16b,v1.16b,v1.16b
    432 	orr	v19.16b,v18.16b,v18.16b
    433 
    434 .Loop3x_cbc_dec:
    435 	aesd	v0.16b,v16.16b
    436 	aesimc	v0.16b,v0.16b
    437 	aesd	v1.16b,v16.16b
    438 	aesimc	v1.16b,v1.16b
    439 	aesd	v18.16b,v16.16b
    440 	aesimc	v18.16b,v18.16b
    441 	ld1	{v16.4s},[x7],#16
    442 	subs	w6,w6,#2
    443 	aesd	v0.16b,v17.16b
    444 	aesimc	v0.16b,v0.16b
    445 	aesd	v1.16b,v17.16b
    446 	aesimc	v1.16b,v1.16b
    447 	aesd	v18.16b,v17.16b
    448 	aesimc	v18.16b,v18.16b
    449 	ld1	{v17.4s},[x7],#16
    450 	b.gt	.Loop3x_cbc_dec
    451 
    452 	aesd	v0.16b,v16.16b
    453 	aesimc	v0.16b,v0.16b
    454 	aesd	v1.16b,v16.16b
    455 	aesimc	v1.16b,v1.16b
    456 	aesd	v18.16b,v16.16b
    457 	aesimc	v18.16b,v18.16b
    458 	eor	v4.16b,v6.16b,v7.16b
    459 	subs	x2,x2,#0x30
    460 	eor	v5.16b,v2.16b,v7.16b
    461 	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
    462 	aesd	v0.16b,v17.16b
    463 	aesimc	v0.16b,v0.16b
    464 	aesd	v1.16b,v17.16b
    465 	aesimc	v1.16b,v1.16b
    466 	aesd	v18.16b,v17.16b
    467 	aesimc	v18.16b,v18.16b
    468 	eor	v17.16b,v3.16b,v7.16b
    469 	add	x0,x0,x6		// x0 is adjusted in such way that
    470 					// at exit from the loop v1.16b-v18.16b
    471 					// are loaded with last "words"
    472 	orr	v6.16b,v19.16b,v19.16b
    473 	mov	x7,x3
    474 	aesd	v0.16b,v20.16b
    475 	aesimc	v0.16b,v0.16b
    476 	aesd	v1.16b,v20.16b
    477 	aesimc	v1.16b,v1.16b
    478 	aesd	v18.16b,v20.16b
    479 	aesimc	v18.16b,v18.16b
    480 	ld1	{v2.16b},[x0],#16
    481 	aesd	v0.16b,v21.16b
    482 	aesimc	v0.16b,v0.16b
    483 	aesd	v1.16b,v21.16b
    484 	aesimc	v1.16b,v1.16b
    485 	aesd	v18.16b,v21.16b
    486 	aesimc	v18.16b,v18.16b
    487 	ld1	{v3.16b},[x0],#16
    488 	aesd	v0.16b,v22.16b
    489 	aesimc	v0.16b,v0.16b
    490 	aesd	v1.16b,v22.16b
    491 	aesimc	v1.16b,v1.16b
    492 	aesd	v18.16b,v22.16b
    493 	aesimc	v18.16b,v18.16b
    494 	ld1	{v19.16b},[x0],#16
    495 	aesd	v0.16b,v23.16b
    496 	aesd	v1.16b,v23.16b
    497 	aesd	v18.16b,v23.16b
    498 	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
    499 	add	w6,w5,#2
    500 	eor	v4.16b,v4.16b,v0.16b
    501 	eor	v5.16b,v5.16b,v1.16b
    502 	eor	v18.16b,v18.16b,v17.16b
    503 	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
    504 	st1	{v4.16b},[x1],#16
    505 	orr	v0.16b,v2.16b,v2.16b
    506 	st1	{v5.16b},[x1],#16
    507 	orr	v1.16b,v3.16b,v3.16b
    508 	st1	{v18.16b},[x1],#16
    509 	orr	v18.16b,v19.16b,v19.16b
    510 	b.hs	.Loop3x_cbc_dec
    511 
    512 	cmn	x2,#0x30
    513 	b.eq	.Lcbc_done
    514 	nop
    515 
    516 .Lcbc_dec_tail:
    517 	aesd	v1.16b,v16.16b
    518 	aesimc	v1.16b,v1.16b
    519 	aesd	v18.16b,v16.16b
    520 	aesimc	v18.16b,v18.16b
    521 	ld1	{v16.4s},[x7],#16
    522 	subs	w6,w6,#2
    523 	aesd	v1.16b,v17.16b
    524 	aesimc	v1.16b,v1.16b
    525 	aesd	v18.16b,v17.16b
    526 	aesimc	v18.16b,v18.16b
    527 	ld1	{v17.4s},[x7],#16
    528 	b.gt	.Lcbc_dec_tail
    529 
    530 	aesd	v1.16b,v16.16b
    531 	aesimc	v1.16b,v1.16b
    532 	aesd	v18.16b,v16.16b
    533 	aesimc	v18.16b,v18.16b
    534 	aesd	v1.16b,v17.16b
    535 	aesimc	v1.16b,v1.16b
    536 	aesd	v18.16b,v17.16b
    537 	aesimc	v18.16b,v18.16b
    538 	aesd	v1.16b,v20.16b
    539 	aesimc	v1.16b,v1.16b
    540 	aesd	v18.16b,v20.16b
    541 	aesimc	v18.16b,v18.16b
    542 	cmn	x2,#0x20
    543 	aesd	v1.16b,v21.16b
    544 	aesimc	v1.16b,v1.16b
    545 	aesd	v18.16b,v21.16b
    546 	aesimc	v18.16b,v18.16b
    547 	eor	v5.16b,v6.16b,v7.16b
    548 	aesd	v1.16b,v22.16b
    549 	aesimc	v1.16b,v1.16b
    550 	aesd	v18.16b,v22.16b
    551 	aesimc	v18.16b,v18.16b
    552 	eor	v17.16b,v3.16b,v7.16b
    553 	aesd	v1.16b,v23.16b
    554 	aesd	v18.16b,v23.16b
    555 	b.eq	.Lcbc_dec_one
    556 	eor	v5.16b,v5.16b,v1.16b
    557 	eor	v17.16b,v17.16b,v18.16b
    558 	orr	v6.16b,v19.16b,v19.16b
    559 	st1	{v5.16b},[x1],#16
    560 	st1	{v17.16b},[x1],#16
    561 	b	.Lcbc_done
    562 
    563 .Lcbc_dec_one:
    564 	eor	v5.16b,v5.16b,v18.16b
    565 	orr	v6.16b,v19.16b,v19.16b
    566 	st1	{v5.16b},[x1],#16
    567 
    568 .Lcbc_done:
    569 	st1	{v6.16b},[x4]
    570 .Lcbc_abort:
    571 	ldr	x29,[sp],#16
    572 	ret
    573 .size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
    574 .globl	aes_hw_ctr32_encrypt_blocks
    575 .hidden	aes_hw_ctr32_encrypt_blocks
    576 .type	aes_hw_ctr32_encrypt_blocks,%function
    577 .align	5
    578 aes_hw_ctr32_encrypt_blocks:
    579 	stp	x29,x30,[sp,#-16]!
    580 	add	x29,sp,#0
    581 	ldr	w5,[x3,#240]
    582 
    583 	ldr	w8, [x4, #12]
    584 	ld1	{v0.4s},[x4]
    585 
    586 	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
    587 	sub	w5,w5,#4
    588 	mov	x12,#16
    589 	cmp	x2,#2
    590 	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
    591 	sub	w5,w5,#2
    592 	ld1	{v20.4s,v21.4s},[x7],#32
    593 	ld1	{v22.4s,v23.4s},[x7],#32
    594 	ld1	{v7.4s},[x7]
    595 	add	x7,x3,#32
    596 	mov	w6,w5
    597 	csel	x12,xzr,x12,lo
    598 #ifndef __ARMEB__
    599 	rev	w8, w8
    600 #endif
    601 	orr	v1.16b,v0.16b,v0.16b
    602 	add	w10, w8, #1
    603 	orr	v18.16b,v0.16b,v0.16b
    604 	add	w8, w8, #2
    605 	orr	v6.16b,v0.16b,v0.16b
    606 	rev	w10, w10
    607 	mov	v1.s[3],w10
    608 	b.ls	.Lctr32_tail
    609 	rev	w12, w8
    610 	sub	x2,x2,#3		// bias
    611 	mov	v18.s[3],w12
    612 	b	.Loop3x_ctr32
    613 
    614 .align	4
    615 .Loop3x_ctr32:
    616 	aese	v0.16b,v16.16b
    617 	aesmc	v0.16b,v0.16b
    618 	aese	v1.16b,v16.16b
    619 	aesmc	v1.16b,v1.16b
    620 	aese	v18.16b,v16.16b
    621 	aesmc	v18.16b,v18.16b
    622 	ld1	{v16.4s},[x7],#16
    623 	subs	w6,w6,#2
    624 	aese	v0.16b,v17.16b
    625 	aesmc	v0.16b,v0.16b
    626 	aese	v1.16b,v17.16b
    627 	aesmc	v1.16b,v1.16b
    628 	aese	v18.16b,v17.16b
    629 	aesmc	v18.16b,v18.16b
    630 	ld1	{v17.4s},[x7],#16
    631 	b.gt	.Loop3x_ctr32
    632 
    633 	aese	v0.16b,v16.16b
    634 	aesmc	v4.16b,v0.16b
    635 	aese	v1.16b,v16.16b
    636 	aesmc	v5.16b,v1.16b
    637 	ld1	{v2.16b},[x0],#16
    638 	orr	v0.16b,v6.16b,v6.16b
    639 	aese	v18.16b,v16.16b
    640 	aesmc	v18.16b,v18.16b
    641 	ld1	{v3.16b},[x0],#16
    642 	orr	v1.16b,v6.16b,v6.16b
    643 	aese	v4.16b,v17.16b
    644 	aesmc	v4.16b,v4.16b
    645 	aese	v5.16b,v17.16b
    646 	aesmc	v5.16b,v5.16b
    647 	ld1	{v19.16b},[x0],#16
    648 	mov	x7,x3
    649 	aese	v18.16b,v17.16b
    650 	aesmc	v17.16b,v18.16b
    651 	orr	v18.16b,v6.16b,v6.16b
    652 	add	w9,w8,#1
    653 	aese	v4.16b,v20.16b
    654 	aesmc	v4.16b,v4.16b
    655 	aese	v5.16b,v20.16b
    656 	aesmc	v5.16b,v5.16b
    657 	eor	v2.16b,v2.16b,v7.16b
    658 	add	w10,w8,#2
    659 	aese	v17.16b,v20.16b
    660 	aesmc	v17.16b,v17.16b
    661 	eor	v3.16b,v3.16b,v7.16b
    662 	add	w8,w8,#3
    663 	aese	v4.16b,v21.16b
    664 	aesmc	v4.16b,v4.16b
    665 	aese	v5.16b,v21.16b
    666 	aesmc	v5.16b,v5.16b
    667 	eor	v19.16b,v19.16b,v7.16b
    668 	rev	w9,w9
    669 	aese	v17.16b,v21.16b
    670 	aesmc	v17.16b,v17.16b
    671 	mov	v0.s[3], w9
    672 	rev	w10,w10
    673 	aese	v4.16b,v22.16b
    674 	aesmc	v4.16b,v4.16b
    675 	aese	v5.16b,v22.16b
    676 	aesmc	v5.16b,v5.16b
    677 	mov	v1.s[3], w10
    678 	rev	w12,w8
    679 	aese	v17.16b,v22.16b
    680 	aesmc	v17.16b,v17.16b
    681 	mov	v18.s[3], w12
    682 	subs	x2,x2,#3
    683 	aese	v4.16b,v23.16b
    684 	aese	v5.16b,v23.16b
    685 	aese	v17.16b,v23.16b
    686 
    687 	eor	v2.16b,v2.16b,v4.16b
    688 	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
    689 	st1	{v2.16b},[x1],#16
    690 	eor	v3.16b,v3.16b,v5.16b
    691 	mov	w6,w5
    692 	st1	{v3.16b},[x1],#16
    693 	eor	v19.16b,v19.16b,v17.16b
    694 	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
    695 	st1	{v19.16b},[x1],#16
    696 	b.hs	.Loop3x_ctr32
    697 
    698 	adds	x2,x2,#3
    699 	b.eq	.Lctr32_done
    700 	cmp	x2,#1
    701 	mov	x12,#16
    702 	csel	x12,xzr,x12,eq
    703 
    704 .Lctr32_tail:
    705 	aese	v0.16b,v16.16b
    706 	aesmc	v0.16b,v0.16b
    707 	aese	v1.16b,v16.16b
    708 	aesmc	v1.16b,v1.16b
    709 	ld1	{v16.4s},[x7],#16
    710 	subs	w6,w6,#2
    711 	aese	v0.16b,v17.16b
    712 	aesmc	v0.16b,v0.16b
    713 	aese	v1.16b,v17.16b
    714 	aesmc	v1.16b,v1.16b
    715 	ld1	{v17.4s},[x7],#16
    716 	b.gt	.Lctr32_tail
    717 
    718 	aese	v0.16b,v16.16b
    719 	aesmc	v0.16b,v0.16b
    720 	aese	v1.16b,v16.16b
    721 	aesmc	v1.16b,v1.16b
    722 	aese	v0.16b,v17.16b
    723 	aesmc	v0.16b,v0.16b
    724 	aese	v1.16b,v17.16b
    725 	aesmc	v1.16b,v1.16b
    726 	ld1	{v2.16b},[x0],x12
    727 	aese	v0.16b,v20.16b
    728 	aesmc	v0.16b,v0.16b
    729 	aese	v1.16b,v20.16b
    730 	aesmc	v1.16b,v1.16b
    731 	ld1	{v3.16b},[x0]
    732 	aese	v0.16b,v21.16b
    733 	aesmc	v0.16b,v0.16b
    734 	aese	v1.16b,v21.16b
    735 	aesmc	v1.16b,v1.16b
    736 	eor	v2.16b,v2.16b,v7.16b
    737 	aese	v0.16b,v22.16b
    738 	aesmc	v0.16b,v0.16b
    739 	aese	v1.16b,v22.16b
    740 	aesmc	v1.16b,v1.16b
    741 	eor	v3.16b,v3.16b,v7.16b
    742 	aese	v0.16b,v23.16b
    743 	aese	v1.16b,v23.16b
    744 
    745 	cmp	x2,#1
    746 	eor	v2.16b,v2.16b,v0.16b
    747 	eor	v3.16b,v3.16b,v1.16b
    748 	st1	{v2.16b},[x1],#16
    749 	b.eq	.Lctr32_done
    750 	st1	{v3.16b},[x1]
    751 
    752 .Lctr32_done:
    753 	ldr	x29,[sp],#16
    754 	ret
    755 .size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
    756 #endif
    757 #endif
    758