Home | History | Annotate | Download | only in aes
      1 #include "arm_arch.h"
      2 
      3 #if __ARM_MAX_ARCH__>=7
      4 .text
      5 #if !defined(__clang__)
      6 .arch	armv8-a+crypto
      7 #endif
      8 .align	5
      9 .Lrcon:
     10 .long	0x01,0x01,0x01,0x01
     11 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
     12 .long	0x1b,0x1b,0x1b,0x1b
     13 
     14 .globl	aes_v8_set_encrypt_key
     15 .type	aes_v8_set_encrypt_key,%function
     16 .align	5
     17 aes_v8_set_encrypt_key:
     18 .Lenc_key:
     19 	stp	x29,x30,[sp,#-16]!
     20 	add	x29,sp,#0
     21 	mov	x3,#-1
     22 	cmp	x0,#0
     23 	b.eq	.Lenc_key_abort
     24 	cmp	x2,#0
     25 	b.eq	.Lenc_key_abort
     26 	mov	x3,#-2
     27 	cmp	w1,#128
     28 	b.lt	.Lenc_key_abort
     29 	cmp	w1,#256
     30 	b.gt	.Lenc_key_abort
     31 	tst	w1,#0x3f
     32 	b.ne	.Lenc_key_abort
     33 
     34 	adr	x3,.Lrcon
     35 	cmp	w1,#192
     36 
     37 	eor	v0.16b,v0.16b,v0.16b
     38 	ld1	{v3.16b},[x0],#16
     39 	mov	w1,#8		// reuse w1
     40 	ld1	{v1.4s,v2.4s},[x3],#32
     41 
     42 	b.lt	.Loop128
     43 	b.eq	.L192
     44 	b	.L256
     45 
     46 .align	4
     47 .Loop128:
     48 	tbl	v6.16b,{v3.16b},v2.16b
     49 	ext	v5.16b,v0.16b,v3.16b,#12
     50 	st1	{v3.4s},[x2],#16
     51 	aese	v6.16b,v0.16b
     52 	subs	w1,w1,#1
     53 
     54 	eor	v3.16b,v3.16b,v5.16b
     55 	ext	v5.16b,v0.16b,v5.16b,#12
     56 	eor	v3.16b,v3.16b,v5.16b
     57 	ext	v5.16b,v0.16b,v5.16b,#12
     58 	eor	v6.16b,v6.16b,v1.16b
     59 	eor	v3.16b,v3.16b,v5.16b
     60 	shl	v1.16b,v1.16b,#1
     61 	eor	v3.16b,v3.16b,v6.16b
     62 	b.ne	.Loop128
     63 
     64 	ld1	{v1.4s},[x3]
     65 
     66 	tbl	v6.16b,{v3.16b},v2.16b
     67 	ext	v5.16b,v0.16b,v3.16b,#12
     68 	st1	{v3.4s},[x2],#16
     69 	aese	v6.16b,v0.16b
     70 
     71 	eor	v3.16b,v3.16b,v5.16b
     72 	ext	v5.16b,v0.16b,v5.16b,#12
     73 	eor	v3.16b,v3.16b,v5.16b
     74 	ext	v5.16b,v0.16b,v5.16b,#12
     75 	eor	v6.16b,v6.16b,v1.16b
     76 	eor	v3.16b,v3.16b,v5.16b
     77 	shl	v1.16b,v1.16b,#1
     78 	eor	v3.16b,v3.16b,v6.16b
     79 
     80 	tbl	v6.16b,{v3.16b},v2.16b
     81 	ext	v5.16b,v0.16b,v3.16b,#12
     82 	st1	{v3.4s},[x2],#16
     83 	aese	v6.16b,v0.16b
     84 
     85 	eor	v3.16b,v3.16b,v5.16b
     86 	ext	v5.16b,v0.16b,v5.16b,#12
     87 	eor	v3.16b,v3.16b,v5.16b
     88 	ext	v5.16b,v0.16b,v5.16b,#12
     89 	eor	v6.16b,v6.16b,v1.16b
     90 	eor	v3.16b,v3.16b,v5.16b
     91 	eor	v3.16b,v3.16b,v6.16b
     92 	st1	{v3.4s},[x2]
     93 	add	x2,x2,#0x50
     94 
     95 	mov	w12,#10
     96 	b	.Ldone
     97 
     98 .align	4
     99 .L192:
    100 	ld1	{v4.8b},[x0],#8
    101 	movi	v6.16b,#8			// borrow v6.16b
    102 	st1	{v3.4s},[x2],#16
    103 	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
    104 
    105 .Loop192:
    106 	tbl	v6.16b,{v4.16b},v2.16b
    107 	ext	v5.16b,v0.16b,v3.16b,#12
    108 	st1	{v4.8b},[x2],#8
    109 	aese	v6.16b,v0.16b
    110 	subs	w1,w1,#1
    111 
    112 	eor	v3.16b,v3.16b,v5.16b
    113 	ext	v5.16b,v0.16b,v5.16b,#12
    114 	eor	v3.16b,v3.16b,v5.16b
    115 	ext	v5.16b,v0.16b,v5.16b,#12
    116 	eor	v3.16b,v3.16b,v5.16b
    117 
    118 	dup	v5.4s,v3.s[3]
    119 	eor	v5.16b,v5.16b,v4.16b
    120 	eor	v6.16b,v6.16b,v1.16b
    121 	ext	v4.16b,v0.16b,v4.16b,#12
    122 	shl	v1.16b,v1.16b,#1
    123 	eor	v4.16b,v4.16b,v5.16b
    124 	eor	v3.16b,v3.16b,v6.16b
    125 	eor	v4.16b,v4.16b,v6.16b
    126 	st1	{v3.4s},[x2],#16
    127 	b.ne	.Loop192
    128 
    129 	mov	w12,#12
    130 	add	x2,x2,#0x20
    131 	b	.Ldone
    132 
    133 .align	4
    134 .L256:
    135 	ld1	{v4.16b},[x0]
    136 	mov	w1,#7
    137 	mov	w12,#14
    138 	st1	{v3.4s},[x2],#16
    139 
    140 .Loop256:
    141 	tbl	v6.16b,{v4.16b},v2.16b
    142 	ext	v5.16b,v0.16b,v3.16b,#12
    143 	st1	{v4.4s},[x2],#16
    144 	aese	v6.16b,v0.16b
    145 	subs	w1,w1,#1
    146 
    147 	eor	v3.16b,v3.16b,v5.16b
    148 	ext	v5.16b,v0.16b,v5.16b,#12
    149 	eor	v3.16b,v3.16b,v5.16b
    150 	ext	v5.16b,v0.16b,v5.16b,#12
    151 	eor	v6.16b,v6.16b,v1.16b
    152 	eor	v3.16b,v3.16b,v5.16b
    153 	shl	v1.16b,v1.16b,#1
    154 	eor	v3.16b,v3.16b,v6.16b
    155 	st1	{v3.4s},[x2],#16
    156 	b.eq	.Ldone
    157 
    158 	dup	v6.4s,v3.s[3]		// just splat
    159 	ext	v5.16b,v0.16b,v4.16b,#12
    160 	aese	v6.16b,v0.16b
    161 
    162 	eor	v4.16b,v4.16b,v5.16b
    163 	ext	v5.16b,v0.16b,v5.16b,#12
    164 	eor	v4.16b,v4.16b,v5.16b
    165 	ext	v5.16b,v0.16b,v5.16b,#12
    166 	eor	v4.16b,v4.16b,v5.16b
    167 
    168 	eor	v4.16b,v4.16b,v6.16b
    169 	b	.Loop256
    170 
    171 .Ldone:
    172 	str	w12,[x2]
    173 	mov	x3,#0
    174 
    175 .Lenc_key_abort:
    176 	mov	x0,x3			// return value
    177 	ldr	x29,[sp],#16
    178 	ret
    179 .size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
    180 
    181 .globl	aes_v8_set_decrypt_key
    182 .type	aes_v8_set_decrypt_key,%function
    183 .align	5
    184 aes_v8_set_decrypt_key:
    185 	stp	x29,x30,[sp,#-16]!
    186 	add	x29,sp,#0
    187 	bl	.Lenc_key
    188 
    189 	cmp	x0,#0
    190 	b.ne	.Ldec_key_abort
    191 
    192 	sub	x2,x2,#240		// restore original x2
    193 	mov	x4,#-16
    194 	add	x0,x2,x12,lsl#4	// end of key schedule
    195 
    196 	ld1	{v0.4s},[x2]
    197 	ld1	{v1.4s},[x0]
    198 	st1	{v0.4s},[x0],x4
    199 	st1	{v1.4s},[x2],#16
    200 
    201 .Loop_imc:
    202 	ld1	{v0.4s},[x2]
    203 	ld1	{v1.4s},[x0]
    204 	aesimc	v0.16b,v0.16b
    205 	aesimc	v1.16b,v1.16b
    206 	st1	{v0.4s},[x0],x4
    207 	st1	{v1.4s},[x2],#16
    208 	cmp	x0,x2
    209 	b.hi	.Loop_imc
    210 
    211 	ld1	{v0.4s},[x2]
    212 	aesimc	v0.16b,v0.16b
    213 	st1	{v0.4s},[x0]
    214 
    215 	eor	x0,x0,x0		// return value
    216 .Ldec_key_abort:
    217 	ldp	x29,x30,[sp],#16
    218 	ret
    219 .size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
    220 .globl	aes_v8_encrypt
    221 .type	aes_v8_encrypt,%function
    222 .align	5
    223 aes_v8_encrypt:
    224 	ldr	w3,[x2,#240]
    225 	ld1	{v0.4s},[x2],#16
    226 	ld1	{v2.16b},[x0]
    227 	sub	w3,w3,#2
    228 	ld1	{v1.4s},[x2],#16
    229 
    230 .Loop_enc:
    231 	aese	v2.16b,v0.16b
    232 	aesmc	v2.16b,v2.16b
    233 	ld1	{v0.4s},[x2],#16
    234 	subs	w3,w3,#2
    235 	aese	v2.16b,v1.16b
    236 	aesmc	v2.16b,v2.16b
    237 	ld1	{v1.4s},[x2],#16
    238 	b.gt	.Loop_enc
    239 
    240 	aese	v2.16b,v0.16b
    241 	aesmc	v2.16b,v2.16b
    242 	ld1	{v0.4s},[x2]
    243 	aese	v2.16b,v1.16b
    244 	eor	v2.16b,v2.16b,v0.16b
    245 
    246 	st1	{v2.16b},[x1]
    247 	ret
    248 .size	aes_v8_encrypt,.-aes_v8_encrypt
    249 .globl	aes_v8_decrypt
    250 .type	aes_v8_decrypt,%function
    251 .align	5
    252 aes_v8_decrypt:
    253 	ldr	w3,[x2,#240]
    254 	ld1	{v0.4s},[x2],#16
    255 	ld1	{v2.16b},[x0]
    256 	sub	w3,w3,#2
    257 	ld1	{v1.4s},[x2],#16
    258 
    259 .Loop_dec:
    260 	aesd	v2.16b,v0.16b
    261 	aesimc	v2.16b,v2.16b
    262 	ld1	{v0.4s},[x2],#16
    263 	subs	w3,w3,#2
    264 	aesd	v2.16b,v1.16b
    265 	aesimc	v2.16b,v2.16b
    266 	ld1	{v1.4s},[x2],#16
    267 	b.gt	.Loop_dec
    268 
    269 	aesd	v2.16b,v0.16b
    270 	aesimc	v2.16b,v2.16b
    271 	ld1	{v0.4s},[x2]
    272 	aesd	v2.16b,v1.16b
    273 	eor	v2.16b,v2.16b,v0.16b
    274 
    275 	st1	{v2.16b},[x1]
    276 	ret
    277 .size	aes_v8_decrypt,.-aes_v8_decrypt
    278 .globl	aes_v8_cbc_encrypt
    279 .type	aes_v8_cbc_encrypt,%function
    280 .align	5
    281 aes_v8_cbc_encrypt:
    282 	stp	x29,x30,[sp,#-16]!
    283 	add	x29,sp,#0
    284 	subs	x2,x2,#16
    285 	mov	x8,#16
    286 	b.lo	.Lcbc_abort
    287 	csel	x8,xzr,x8,eq
    288 
    289 	cmp	w5,#0			// en- or decrypting?
    290 	ldr	w5,[x3,#240]
    291 	and	x2,x2,#-16
    292 	ld1	{v6.16b},[x4]
    293 	ld1	{v0.16b},[x0],x8
    294 
    295 	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
    296 	sub	w5,w5,#6
    297 	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
    298 	sub	w5,w5,#2
    299 	ld1	{v18.4s,v19.4s},[x7],#32
    300 	ld1	{v20.4s,v21.4s},[x7],#32
    301 	ld1	{v22.4s,v23.4s},[x7],#32
    302 	ld1	{v7.4s},[x7]
    303 
    304 	add	x7,x3,#32
    305 	mov	w6,w5
    306 	b.eq	.Lcbc_dec
    307 
    308 	cmp	w5,#2
    309 	eor	v0.16b,v0.16b,v6.16b
    310 	eor	v5.16b,v16.16b,v7.16b
    311 	b.eq	.Lcbc_enc128
    312 
    313 	ld1	{v2.4s,v3.4s},[x7]
    314 	add	x7,x3,#16
    315 	add	x6,x3,#16*4
    316 	add	x12,x3,#16*5
    317 	aese	v0.16b,v16.16b
    318 	aesmc	v0.16b,v0.16b
    319 	add	x14,x3,#16*6
    320 	add	x3,x3,#16*7
    321 	b	.Lenter_cbc_enc
    322 
    323 .align	4
    324 .Loop_cbc_enc:
    325 	aese	v0.16b,v16.16b
    326 	aesmc	v0.16b,v0.16b
    327 	st1	{v6.16b},[x1],#16
    328 .Lenter_cbc_enc:
    329 	aese	v0.16b,v17.16b
    330 	aesmc	v0.16b,v0.16b
    331 	aese	v0.16b,v2.16b
    332 	aesmc	v0.16b,v0.16b
    333 	ld1	{v16.4s},[x6]
    334 	cmp	w5,#4
    335 	aese	v0.16b,v3.16b
    336 	aesmc	v0.16b,v0.16b
    337 	ld1	{v17.4s},[x12]
    338 	b.eq	.Lcbc_enc192
    339 
    340 	aese	v0.16b,v16.16b
    341 	aesmc	v0.16b,v0.16b
    342 	ld1	{v16.4s},[x14]
    343 	aese	v0.16b,v17.16b
    344 	aesmc	v0.16b,v0.16b
    345 	ld1	{v17.4s},[x3]
    346 	nop
    347 
    348 .Lcbc_enc192:
    349 	aese	v0.16b,v16.16b
    350 	aesmc	v0.16b,v0.16b
    351 	subs	x2,x2,#16
    352 	aese	v0.16b,v17.16b
    353 	aesmc	v0.16b,v0.16b
    354 	csel	x8,xzr,x8,eq
    355 	aese	v0.16b,v18.16b
    356 	aesmc	v0.16b,v0.16b
    357 	aese	v0.16b,v19.16b
    358 	aesmc	v0.16b,v0.16b
    359 	ld1	{v16.16b},[x0],x8
    360 	aese	v0.16b,v20.16b
    361 	aesmc	v0.16b,v0.16b
    362 	eor	v16.16b,v16.16b,v5.16b
    363 	aese	v0.16b,v21.16b
    364 	aesmc	v0.16b,v0.16b
    365 	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
    366 	aese	v0.16b,v22.16b
    367 	aesmc	v0.16b,v0.16b
    368 	aese	v0.16b,v23.16b
    369 	eor	v6.16b,v0.16b,v7.16b
    370 	b.hs	.Loop_cbc_enc
    371 
    372 	st1	{v6.16b},[x1],#16
    373 	b	.Lcbc_done
    374 
    375 .align	5
    376 .Lcbc_enc128:
    377 	ld1	{v2.4s,v3.4s},[x7]
    378 	aese	v0.16b,v16.16b
    379 	aesmc	v0.16b,v0.16b
    380 	b	.Lenter_cbc_enc128
    381 .Loop_cbc_enc128:
    382 	aese	v0.16b,v16.16b
    383 	aesmc	v0.16b,v0.16b
    384 	st1	{v6.16b},[x1],#16
    385 .Lenter_cbc_enc128:
    386 	aese	v0.16b,v17.16b
    387 	aesmc	v0.16b,v0.16b
    388 	subs	x2,x2,#16
    389 	aese	v0.16b,v2.16b
    390 	aesmc	v0.16b,v0.16b
    391 	csel	x8,xzr,x8,eq
    392 	aese	v0.16b,v3.16b
    393 	aesmc	v0.16b,v0.16b
    394 	aese	v0.16b,v18.16b
    395 	aesmc	v0.16b,v0.16b
    396 	aese	v0.16b,v19.16b
    397 	aesmc	v0.16b,v0.16b
    398 	ld1	{v16.16b},[x0],x8
    399 	aese	v0.16b,v20.16b
    400 	aesmc	v0.16b,v0.16b
    401 	aese	v0.16b,v21.16b
    402 	aesmc	v0.16b,v0.16b
    403 	aese	v0.16b,v22.16b
    404 	aesmc	v0.16b,v0.16b
    405 	eor	v16.16b,v16.16b,v5.16b
    406 	aese	v0.16b,v23.16b
    407 	eor	v6.16b,v0.16b,v7.16b
    408 	b.hs	.Loop_cbc_enc128
    409 
    410 	st1	{v6.16b},[x1],#16
    411 	b	.Lcbc_done
    412 .align	5
    413 .Lcbc_dec:
    414 	ld1	{v18.16b},[x0],#16
    415 	subs	x2,x2,#32		// bias
    416 	add	w6,w5,#2
    417 	orr	v3.16b,v0.16b,v0.16b
    418 	orr	v1.16b,v0.16b,v0.16b
    419 	orr	v19.16b,v18.16b,v18.16b
    420 	b.lo	.Lcbc_dec_tail
    421 
    422 	orr	v1.16b,v18.16b,v18.16b
    423 	ld1	{v18.16b},[x0],#16
    424 	orr	v2.16b,v0.16b,v0.16b
    425 	orr	v3.16b,v1.16b,v1.16b
    426 	orr	v19.16b,v18.16b,v18.16b
    427 
    428 .Loop3x_cbc_dec:
    429 	aesd	v0.16b,v16.16b
    430 	aesimc	v0.16b,v0.16b
    431 	aesd	v1.16b,v16.16b
    432 	aesimc	v1.16b,v1.16b
    433 	aesd	v18.16b,v16.16b
    434 	aesimc	v18.16b,v18.16b
    435 	ld1	{v16.4s},[x7],#16
    436 	subs	w6,w6,#2
    437 	aesd	v0.16b,v17.16b
    438 	aesimc	v0.16b,v0.16b
    439 	aesd	v1.16b,v17.16b
    440 	aesimc	v1.16b,v1.16b
    441 	aesd	v18.16b,v17.16b
    442 	aesimc	v18.16b,v18.16b
    443 	ld1	{v17.4s},[x7],#16
    444 	b.gt	.Loop3x_cbc_dec
    445 
    446 	aesd	v0.16b,v16.16b
    447 	aesimc	v0.16b,v0.16b
    448 	aesd	v1.16b,v16.16b
    449 	aesimc	v1.16b,v1.16b
    450 	aesd	v18.16b,v16.16b
    451 	aesimc	v18.16b,v18.16b
    452 	eor	v4.16b,v6.16b,v7.16b
    453 	subs	x2,x2,#0x30
    454 	eor	v5.16b,v2.16b,v7.16b
    455 	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
    456 	aesd	v0.16b,v17.16b
    457 	aesimc	v0.16b,v0.16b
    458 	aesd	v1.16b,v17.16b
    459 	aesimc	v1.16b,v1.16b
    460 	aesd	v18.16b,v17.16b
    461 	aesimc	v18.16b,v18.16b
    462 	eor	v17.16b,v3.16b,v7.16b
    463 	add	x0,x0,x6		// x0 is adjusted in such way that
    464 					// at exit from the loop v1.16b-v18.16b
    465 					// are loaded with last "words"
    466 	orr	v6.16b,v19.16b,v19.16b
    467 	mov	x7,x3
    468 	aesd	v0.16b,v20.16b
    469 	aesimc	v0.16b,v0.16b
    470 	aesd	v1.16b,v20.16b
    471 	aesimc	v1.16b,v1.16b
    472 	aesd	v18.16b,v20.16b
    473 	aesimc	v18.16b,v18.16b
    474 	ld1	{v2.16b},[x0],#16
    475 	aesd	v0.16b,v21.16b
    476 	aesimc	v0.16b,v0.16b
    477 	aesd	v1.16b,v21.16b
    478 	aesimc	v1.16b,v1.16b
    479 	aesd	v18.16b,v21.16b
    480 	aesimc	v18.16b,v18.16b
    481 	ld1	{v3.16b},[x0],#16
    482 	aesd	v0.16b,v22.16b
    483 	aesimc	v0.16b,v0.16b
    484 	aesd	v1.16b,v22.16b
    485 	aesimc	v1.16b,v1.16b
    486 	aesd	v18.16b,v22.16b
    487 	aesimc	v18.16b,v18.16b
    488 	ld1	{v19.16b},[x0],#16
    489 	aesd	v0.16b,v23.16b
    490 	aesd	v1.16b,v23.16b
    491 	aesd	v18.16b,v23.16b
    492 	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
    493 	add	w6,w5,#2
    494 	eor	v4.16b,v4.16b,v0.16b
    495 	eor	v5.16b,v5.16b,v1.16b
    496 	eor	v18.16b,v18.16b,v17.16b
    497 	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
    498 	st1	{v4.16b},[x1],#16
    499 	orr	v0.16b,v2.16b,v2.16b
    500 	st1	{v5.16b},[x1],#16
    501 	orr	v1.16b,v3.16b,v3.16b
    502 	st1	{v18.16b},[x1],#16
    503 	orr	v18.16b,v19.16b,v19.16b
    504 	b.hs	.Loop3x_cbc_dec
    505 
    506 	cmn	x2,#0x30
    507 	b.eq	.Lcbc_done
    508 	nop
    509 
    510 .Lcbc_dec_tail:
    511 	aesd	v1.16b,v16.16b
    512 	aesimc	v1.16b,v1.16b
    513 	aesd	v18.16b,v16.16b
    514 	aesimc	v18.16b,v18.16b
    515 	ld1	{v16.4s},[x7],#16
    516 	subs	w6,w6,#2
    517 	aesd	v1.16b,v17.16b
    518 	aesimc	v1.16b,v1.16b
    519 	aesd	v18.16b,v17.16b
    520 	aesimc	v18.16b,v18.16b
    521 	ld1	{v17.4s},[x7],#16
    522 	b.gt	.Lcbc_dec_tail
    523 
    524 	aesd	v1.16b,v16.16b
    525 	aesimc	v1.16b,v1.16b
    526 	aesd	v18.16b,v16.16b
    527 	aesimc	v18.16b,v18.16b
    528 	aesd	v1.16b,v17.16b
    529 	aesimc	v1.16b,v1.16b
    530 	aesd	v18.16b,v17.16b
    531 	aesimc	v18.16b,v18.16b
    532 	aesd	v1.16b,v20.16b
    533 	aesimc	v1.16b,v1.16b
    534 	aesd	v18.16b,v20.16b
    535 	aesimc	v18.16b,v18.16b
    536 	cmn	x2,#0x20
    537 	aesd	v1.16b,v21.16b
    538 	aesimc	v1.16b,v1.16b
    539 	aesd	v18.16b,v21.16b
    540 	aesimc	v18.16b,v18.16b
    541 	eor	v5.16b,v6.16b,v7.16b
    542 	aesd	v1.16b,v22.16b
    543 	aesimc	v1.16b,v1.16b
    544 	aesd	v18.16b,v22.16b
    545 	aesimc	v18.16b,v18.16b
    546 	eor	v17.16b,v3.16b,v7.16b
    547 	aesd	v1.16b,v23.16b
    548 	aesd	v18.16b,v23.16b
    549 	b.eq	.Lcbc_dec_one
    550 	eor	v5.16b,v5.16b,v1.16b
    551 	eor	v17.16b,v17.16b,v18.16b
    552 	orr	v6.16b,v19.16b,v19.16b
    553 	st1	{v5.16b},[x1],#16
    554 	st1	{v17.16b},[x1],#16
    555 	b	.Lcbc_done
    556 
    557 .Lcbc_dec_one:
    558 	eor	v5.16b,v5.16b,v18.16b
    559 	orr	v6.16b,v19.16b,v19.16b
    560 	st1	{v5.16b},[x1],#16
    561 
    562 .Lcbc_done:
    563 	st1	{v6.16b},[x4]
    564 .Lcbc_abort:
    565 	ldr	x29,[sp],#16
    566 	ret
    567 .size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
    568 .globl	aes_v8_ctr32_encrypt_blocks
    569 .type	aes_v8_ctr32_encrypt_blocks,%function
    570 .align	5
    571 aes_v8_ctr32_encrypt_blocks:
    572 	stp	x29,x30,[sp,#-16]!
    573 	add	x29,sp,#0
    574 	ldr	w5,[x3,#240]
    575 
    576 	ldr	w8, [x4, #12]
    577 	ld1	{v0.4s},[x4]
    578 
    579 	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
    580 	sub	w5,w5,#4
    581 	mov	x12,#16
    582 	cmp	x2,#2
    583 	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
    584 	sub	w5,w5,#2
    585 	ld1	{v20.4s,v21.4s},[x7],#32
    586 	ld1	{v22.4s,v23.4s},[x7],#32
    587 	ld1	{v7.4s},[x7]
    588 	add	x7,x3,#32
    589 	mov	w6,w5
    590 	csel	x12,xzr,x12,lo
    591 #ifndef __ARMEB__
    592 	rev	w8, w8
    593 #endif
    594 	orr	v1.16b,v0.16b,v0.16b
    595 	add	w10, w8, #1
    596 	orr	v18.16b,v0.16b,v0.16b
    597 	add	w8, w8, #2
    598 	orr	v6.16b,v0.16b,v0.16b
    599 	rev	w10, w10
    600 	mov	v1.s[3],w10
    601 	b.ls	.Lctr32_tail
    602 	rev	w12, w8
    603 	sub	x2,x2,#3		// bias
    604 	mov	v18.s[3],w12
    605 	b	.Loop3x_ctr32
    606 
    607 .align	4
    608 .Loop3x_ctr32:
    609 	aese	v0.16b,v16.16b
    610 	aesmc	v0.16b,v0.16b
    611 	aese	v1.16b,v16.16b
    612 	aesmc	v1.16b,v1.16b
    613 	aese	v18.16b,v16.16b
    614 	aesmc	v18.16b,v18.16b
    615 	ld1	{v16.4s},[x7],#16
    616 	subs	w6,w6,#2
    617 	aese	v0.16b,v17.16b
    618 	aesmc	v0.16b,v0.16b
    619 	aese	v1.16b,v17.16b
    620 	aesmc	v1.16b,v1.16b
    621 	aese	v18.16b,v17.16b
    622 	aesmc	v18.16b,v18.16b
    623 	ld1	{v17.4s},[x7],#16
    624 	b.gt	.Loop3x_ctr32
    625 
    626 	aese	v0.16b,v16.16b
    627 	aesmc	v4.16b,v0.16b
    628 	aese	v1.16b,v16.16b
    629 	aesmc	v5.16b,v1.16b
    630 	ld1	{v2.16b},[x0],#16
    631 	orr	v0.16b,v6.16b,v6.16b
    632 	aese	v18.16b,v16.16b
    633 	aesmc	v18.16b,v18.16b
    634 	ld1	{v3.16b},[x0],#16
    635 	orr	v1.16b,v6.16b,v6.16b
    636 	aese	v4.16b,v17.16b
    637 	aesmc	v4.16b,v4.16b
    638 	aese	v5.16b,v17.16b
    639 	aesmc	v5.16b,v5.16b
    640 	ld1	{v19.16b},[x0],#16
    641 	mov	x7,x3
    642 	aese	v18.16b,v17.16b
    643 	aesmc	v17.16b,v18.16b
    644 	orr	v18.16b,v6.16b,v6.16b
    645 	add	w9,w8,#1
    646 	aese	v4.16b,v20.16b
    647 	aesmc	v4.16b,v4.16b
    648 	aese	v5.16b,v20.16b
    649 	aesmc	v5.16b,v5.16b
    650 	eor	v2.16b,v2.16b,v7.16b
    651 	add	w10,w8,#2
    652 	aese	v17.16b,v20.16b
    653 	aesmc	v17.16b,v17.16b
    654 	eor	v3.16b,v3.16b,v7.16b
    655 	add	w8,w8,#3
    656 	aese	v4.16b,v21.16b
    657 	aesmc	v4.16b,v4.16b
    658 	aese	v5.16b,v21.16b
    659 	aesmc	v5.16b,v5.16b
    660 	eor	v19.16b,v19.16b,v7.16b
    661 	rev	w9,w9
    662 	aese	v17.16b,v21.16b
    663 	aesmc	v17.16b,v17.16b
    664 	mov	v0.s[3], w9
    665 	rev	w10,w10
    666 	aese	v4.16b,v22.16b
    667 	aesmc	v4.16b,v4.16b
    668 	aese	v5.16b,v22.16b
    669 	aesmc	v5.16b,v5.16b
    670 	mov	v1.s[3], w10
    671 	rev	w12,w8
    672 	aese	v17.16b,v22.16b
    673 	aesmc	v17.16b,v17.16b
    674 	mov	v18.s[3], w12
    675 	subs	x2,x2,#3
    676 	aese	v4.16b,v23.16b
    677 	aese	v5.16b,v23.16b
    678 	aese	v17.16b,v23.16b
    679 
    680 	eor	v2.16b,v2.16b,v4.16b
    681 	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
    682 	st1	{v2.16b},[x1],#16
    683 	eor	v3.16b,v3.16b,v5.16b
    684 	mov	w6,w5
    685 	st1	{v3.16b},[x1],#16
    686 	eor	v19.16b,v19.16b,v17.16b
    687 	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
    688 	st1	{v19.16b},[x1],#16
    689 	b.hs	.Loop3x_ctr32
    690 
    691 	adds	x2,x2,#3
    692 	b.eq	.Lctr32_done
    693 	cmp	x2,#1
    694 	mov	x12,#16
    695 	csel	x12,xzr,x12,eq
    696 
    697 .Lctr32_tail:
    698 	aese	v0.16b,v16.16b
    699 	aesmc	v0.16b,v0.16b
    700 	aese	v1.16b,v16.16b
    701 	aesmc	v1.16b,v1.16b
    702 	ld1	{v16.4s},[x7],#16
    703 	subs	w6,w6,#2
    704 	aese	v0.16b,v17.16b
    705 	aesmc	v0.16b,v0.16b
    706 	aese	v1.16b,v17.16b
    707 	aesmc	v1.16b,v1.16b
    708 	ld1	{v17.4s},[x7],#16
    709 	b.gt	.Lctr32_tail
    710 
    711 	aese	v0.16b,v16.16b
    712 	aesmc	v0.16b,v0.16b
    713 	aese	v1.16b,v16.16b
    714 	aesmc	v1.16b,v1.16b
    715 	aese	v0.16b,v17.16b
    716 	aesmc	v0.16b,v0.16b
    717 	aese	v1.16b,v17.16b
    718 	aesmc	v1.16b,v1.16b
    719 	ld1	{v2.16b},[x0],x12
    720 	aese	v0.16b,v20.16b
    721 	aesmc	v0.16b,v0.16b
    722 	aese	v1.16b,v20.16b
    723 	aesmc	v1.16b,v1.16b
    724 	ld1	{v3.16b},[x0]
    725 	aese	v0.16b,v21.16b
    726 	aesmc	v0.16b,v0.16b
    727 	aese	v1.16b,v21.16b
    728 	aesmc	v1.16b,v1.16b
    729 	eor	v2.16b,v2.16b,v7.16b
    730 	aese	v0.16b,v22.16b
    731 	aesmc	v0.16b,v0.16b
    732 	aese	v1.16b,v22.16b
    733 	aesmc	v1.16b,v1.16b
    734 	eor	v3.16b,v3.16b,v7.16b
    735 	aese	v0.16b,v23.16b
    736 	aese	v1.16b,v23.16b
    737 
    738 	cmp	x2,#1
    739 	eor	v2.16b,v2.16b,v0.16b
    740 	eor	v3.16b,v3.16b,v1.16b
    741 	st1	{v2.16b},[x1],#16
    742 	b.eq	.Lctr32_done
    743 	st1	{v3.16b},[x1]
    744 
    745 .Lctr32_done:
    746 	ldr	x29,[sp],#16
    747 	ret
    748 .size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
    749 #endif
    750