Home | History | Annotate | Download | only in asm
      1 #include "arm_arch.h"
      2 
      3 #if __ARM_ARCH__>=7
      4 .text
      5 .arch	armv8-a+crypto
      6 .align	5
      7 rcon:
      8 .long	0x01,0x01,0x01,0x01
      9 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
     10 .long	0x1b,0x1b,0x1b,0x1b
     11 
     12 .globl	aes_v8_set_encrypt_key
     13 .type	aes_v8_set_encrypt_key,%function
     14 .align	5
     15 aes_v8_set_encrypt_key:
     16 .Lenc_key:
     17 	stp	x29,x30,[sp,#-16]!
     18 	add	x29,sp,#0
     19 	adr	x3,rcon
     20 	cmp	w1,#192
     21 
     22 	eor	v0.16b,v0.16b,v0.16b
     23 	ld1	{v3.16b},[x0],#16
     24 	mov	w1,#8		// reuse w1
     25 	ld1	{v1.4s,v2.4s},[x3],#32
     26 
     27 	b.lt	.Loop128
     28 	b.eq	.L192
     29 	b	.L256
     30 
     31 .align	4
     32 .Loop128:
     33 	tbl	v6.16b,{v3.16b},v2.16b
     34 	ext	v5.16b,v0.16b,v3.16b,#12
     35 	st1	{v3.4s},[x2],#16
     36 	aese	v6.16b,v0.16b
     37 	subs	w1,w1,#1
     38 
     39 	eor	v3.16b,v3.16b,v5.16b
     40 	ext	v5.16b,v0.16b,v5.16b,#12
     41 	eor	v3.16b,v3.16b,v5.16b
     42 	ext	v5.16b,v0.16b,v5.16b,#12
     43 	 eor	v6.16b,v6.16b,v1.16b
     44 	eor	v3.16b,v3.16b,v5.16b
     45 	shl	v1.16b,v1.16b,#1
     46 	eor	v3.16b,v3.16b,v6.16b
     47 	b.ne	.Loop128
     48 
     49 	ld1	{v1.4s},[x3]
     50 
     51 	tbl	v6.16b,{v3.16b},v2.16b
     52 	ext	v5.16b,v0.16b,v3.16b,#12
     53 	st1	{v3.4s},[x2],#16
     54 	aese	v6.16b,v0.16b
     55 
     56 	eor	v3.16b,v3.16b,v5.16b
     57 	ext	v5.16b,v0.16b,v5.16b,#12
     58 	eor	v3.16b,v3.16b,v5.16b
     59 	ext	v5.16b,v0.16b,v5.16b,#12
     60 	 eor	v6.16b,v6.16b,v1.16b
     61 	eor	v3.16b,v3.16b,v5.16b
     62 	shl	v1.16b,v1.16b,#1
     63 	eor	v3.16b,v3.16b,v6.16b
     64 
     65 	tbl	v6.16b,{v3.16b},v2.16b
     66 	ext	v5.16b,v0.16b,v3.16b,#12
     67 	st1	{v3.4s},[x2],#16
     68 	aese	v6.16b,v0.16b
     69 
     70 	eor	v3.16b,v3.16b,v5.16b
     71 	ext	v5.16b,v0.16b,v5.16b,#12
     72 	eor	v3.16b,v3.16b,v5.16b
     73 	ext	v5.16b,v0.16b,v5.16b,#12
     74 	 eor	v6.16b,v6.16b,v1.16b
     75 	eor	v3.16b,v3.16b,v5.16b
     76 	eor	v3.16b,v3.16b,v6.16b
     77 	st1	{v3.4s},[x2]
     78 	add	x2,x2,#0x50
     79 
     80 	mov	w12,#10
     81 	b	.Ldone
     82 
     83 .align	4
     84 .L192:
     85 	ld1	{v4.8b},[x0],#8
     86 	movi	v6.16b,#8			// borrow v6.16b
     87 	st1	{v3.4s},[x2],#16
     88 	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
     89 
     90 .Loop192:
     91 	tbl	v6.16b,{v4.16b},v2.16b
     92 	ext	v5.16b,v0.16b,v3.16b,#12
     93 	st1	{v4.8b},[x2],#8
     94 	aese	v6.16b,v0.16b
     95 	subs	w1,w1,#1
     96 
     97 	eor	v3.16b,v3.16b,v5.16b
     98 	ext	v5.16b,v0.16b,v5.16b,#12
     99 	eor	v3.16b,v3.16b,v5.16b
    100 	ext	v5.16b,v0.16b,v5.16b,#12
    101 	eor	v3.16b,v3.16b,v5.16b
    102 
    103 	dup	v5.4s,v3.s[3]
    104 	eor	v5.16b,v5.16b,v4.16b
    105 	 eor	v6.16b,v6.16b,v1.16b
    106 	ext	v4.16b,v0.16b,v4.16b,#12
    107 	shl	v1.16b,v1.16b,#1
    108 	eor	v4.16b,v4.16b,v5.16b
    109 	eor	v3.16b,v3.16b,v6.16b
    110 	eor	v4.16b,v4.16b,v6.16b
    111 	st1	{v3.4s},[x2],#16
    112 	b.ne	.Loop192
    113 
    114 	mov	w12,#12
    115 	add	x2,x2,#0x20
    116 	b	.Ldone
    117 
    118 .align	4
    119 .L256:
    120 	ld1	{v4.16b},[x0]
    121 	mov	w1,#7
    122 	mov	w12,#14
    123 	st1	{v3.4s},[x2],#16
    124 
    125 .Loop256:
    126 	tbl	v6.16b,{v4.16b},v2.16b
    127 	ext	v5.16b,v0.16b,v3.16b,#12
    128 	st1	{v4.4s},[x2],#16
    129 	aese	v6.16b,v0.16b
    130 	subs	w1,w1,#1
    131 
    132 	eor	v3.16b,v3.16b,v5.16b
    133 	ext	v5.16b,v0.16b,v5.16b,#12
    134 	eor	v3.16b,v3.16b,v5.16b
    135 	ext	v5.16b,v0.16b,v5.16b,#12
    136 	 eor	v6.16b,v6.16b,v1.16b
    137 	eor	v3.16b,v3.16b,v5.16b
    138 	shl	v1.16b,v1.16b,#1
    139 	eor	v3.16b,v3.16b,v6.16b
    140 	st1	{v3.4s},[x2],#16
    141 	b.eq	.Ldone
    142 
    143 	dup	v6.4s,v3.s[3]		// just splat
    144 	ext	v5.16b,v0.16b,v4.16b,#12
    145 	aese	v6.16b,v0.16b
    146 
    147 	eor	v4.16b,v4.16b,v5.16b
    148 	ext	v5.16b,v0.16b,v5.16b,#12
    149 	eor	v4.16b,v4.16b,v5.16b
    150 	ext	v5.16b,v0.16b,v5.16b,#12
    151 	eor	v4.16b,v4.16b,v5.16b
    152 
    153 	eor	v4.16b,v4.16b,v6.16b
    154 	b	.Loop256
    155 
    156 .Ldone:
    157 	str	w12,[x2]
    158 
    159 	eor	x0,x0,x0		// return value
    160 	ldr	x29,[sp],#16
    161 	ret
    162 .size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
    163 
    164 .globl	aes_v8_set_decrypt_key
    165 .type	aes_v8_set_decrypt_key,%function
    166 .align	5
    167 aes_v8_set_decrypt_key:
    168 	stp	x29,x30,[sp,#-16]!
    169 	add	x29,sp,#0
    170 	bl	.Lenc_key
    171 
    172 	sub	x2,x2,#240		// restore original x2
    173 	mov	x4,#-16
    174 	add	x0,x2,x12,lsl#4	// end of key schedule
    175 
    176 	ld1	{v0.4s},[x2]
    177 	ld1	{v1.4s},[x0]
    178 	st1	{v0.4s},[x0],x4
    179 	st1	{v1.4s},[x2],#16
    180 
    181 .Loop_imc:
    182 	ld1	{v0.4s},[x2]
    183 	ld1	{v1.4s},[x0]
    184 	aesimc	v0.16b,v0.16b
    185 	aesimc	v1.16b,v1.16b
    186 	st1	{v0.4s},[x0],x4
    187 	st1	{v1.4s},[x2],#16
    188 	cmp	x0,x2
    189 	b.hi	.Loop_imc
    190 
    191 	ld1	{v0.4s},[x2]
    192 	aesimc	v0.16b,v0.16b
    193 	st1	{v0.4s},[x0]
    194 
    195 	eor	x0,x0,x0		// return value
    196 	ldp	x29,x30,[sp],#16
    197 	ret
    198 .size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
    199 .globl	aes_v8_encrypt
    200 .type	aes_v8_encrypt,%function
    201 .align	5
    202 aes_v8_encrypt:
    203 	ldr	w3,[x2,#240]
    204 	ld1	{v0.4s},[x2],#16
    205 	ld1	{v2.16b},[x0]
    206 	sub	w3,w3,#2
    207 	ld1	{v1.4s},[x2],#16
    208 
    209 .Loop_enc:
    210 	aese	v2.16b,v0.16b
    211 	ld1	{v0.4s},[x2],#16
    212 	aesmc	v2.16b,v2.16b
    213 	subs	w3,w3,#2
    214 	aese	v2.16b,v1.16b
    215 	ld1	{v1.4s},[x2],#16
    216 	aesmc	v2.16b,v2.16b
    217 	b.gt	.Loop_enc
    218 
    219 	aese	v2.16b,v0.16b
    220 	ld1	{v0.4s},[x2]
    221 	aesmc	v2.16b,v2.16b
    222 	aese	v2.16b,v1.16b
    223 	eor	v2.16b,v2.16b,v0.16b
    224 
    225 	st1	{v2.16b},[x1]
    226 	ret
    227 .size	aes_v8_encrypt,.-aes_v8_encrypt
    228 .globl	aes_v8_decrypt
    229 .type	aes_v8_decrypt,%function
    230 .align	5
    231 aes_v8_decrypt:
    232 	ldr	w3,[x2,#240]
    233 	ld1	{v0.4s},[x2],#16
    234 	ld1	{v2.16b},[x0]
    235 	sub	w3,w3,#2
    236 	ld1	{v1.4s},[x2],#16
    237 
    238 .Loop_dec:
    239 	aesd	v2.16b,v0.16b
    240 	ld1	{v0.4s},[x2],#16
    241 	aesimc	v2.16b,v2.16b
    242 	subs	w3,w3,#2
    243 	aesd	v2.16b,v1.16b
    244 	ld1	{v1.4s},[x2],#16
    245 	aesimc	v2.16b,v2.16b
    246 	b.gt	.Loop_dec
    247 
    248 	aesd	v2.16b,v0.16b
    249 	ld1	{v0.4s},[x2]
    250 	aesimc	v2.16b,v2.16b
    251 	aesd	v2.16b,v1.16b
    252 	eor	v2.16b,v2.16b,v0.16b
    253 
    254 	st1	{v2.16b},[x1]
    255 	ret
    256 .size	aes_v8_decrypt,.-aes_v8_decrypt
    257 .globl	aes_v8_cbc_encrypt
    258 .type	aes_v8_cbc_encrypt,%function
    259 .align	5
    260 aes_v8_cbc_encrypt:
    261 	stp	x29,x30,[sp,#-16]!
    262 	add	x29,sp,#0
    263 	subs	x2,x2,#16
    264 	mov	x8,#16
    265 	b.lo	.Lcbc_abort
    266 	csel	x8,xzr,x8,eq
    267 
    268 	cmp	w5,#0			// en- or decrypting?
    269 	ldr	w5,[x3,#240]
    270 	and	x2,x2,#-16
    271 	ld1	{v6.16b},[x4]
    272 	ld1	{v0.16b},[x0],x8
    273 
    274 	ld1	{v16.4s-v17.4s},[x3]		// load key schedule...
    275 	sub	w5,w5,#6
    276 	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
    277 	sub	w5,w5,#2
    278 	ld1	{v18.4s-v19.4s},[x7],#32
    279 	ld1	{v20.4s-v21.4s},[x7],#32
    280 	ld1	{v22.4s-v23.4s},[x7],#32
    281 	ld1	{v7.4s},[x7]
    282 
    283 	add	x7,x3,#32
    284 	mov	w6,w5
    285 	b.eq	.Lcbc_dec
    286 
    287 	cmp	w5,#2
    288 	eor	v0.16b,v0.16b,v6.16b
    289 	eor	v5.16b,v16.16b,v7.16b
    290 	b.eq	.Lcbc_enc128
    291 
    292 .Loop_cbc_enc:
    293 	aese	v0.16b,v16.16b
    294 	ld1	{v16.4s},[x7],#16
    295 	aesmc	v0.16b,v0.16b
    296 	subs	w6,w6,#2
    297 	aese	v0.16b,v17.16b
    298 	ld1	{v17.4s},[x7],#16
    299 	aesmc	v0.16b,v0.16b
    300 	b.gt	.Loop_cbc_enc
    301 
    302 	aese	v0.16b,v16.16b
    303 	aesmc	v0.16b,v0.16b
    304 	 subs	x2,x2,#16
    305 	aese	v0.16b,v17.16b
    306 	aesmc	v0.16b,v0.16b
    307 	 csel	x8,xzr,x8,eq
    308 	aese	v0.16b,v18.16b
    309 	aesmc	v0.16b,v0.16b
    310 	 add	x7,x3,#16
    311 	aese	v0.16b,v19.16b
    312 	aesmc	v0.16b,v0.16b
    313 	 ld1	{v16.16b},[x0],x8
    314 	aese	v0.16b,v20.16b
    315 	aesmc	v0.16b,v0.16b
    316 	 eor	v16.16b,v16.16b,v5.16b
    317 	aese	v0.16b,v21.16b
    318 	aesmc	v0.16b,v0.16b
    319 	 ld1 {v17.4s},[x7],#16	// re-pre-load rndkey[1]
    320 	aese	v0.16b,v22.16b
    321 	aesmc	v0.16b,v0.16b
    322 	aese	v0.16b,v23.16b
    323 
    324 	 mov	w6,w5
    325 	eor	v6.16b,v0.16b,v7.16b
    326 	st1	{v6.16b},[x1],#16
    327 	b.hs	.Loop_cbc_enc
    328 
    329 	b	.Lcbc_done
    330 
    331 .align	5
    332 .Lcbc_enc128:
    333 	ld1	{v2.4s-v3.4s},[x7]
    334 	aese	v0.16b,v16.16b
    335 	aesmc	v0.16b,v0.16b
    336 	b	.Lenter_cbc_enc128
    337 .Loop_cbc_enc128:
    338 	aese	v0.16b,v16.16b
    339 	aesmc	v0.16b,v0.16b
    340 	 st1	{v6.16b},[x1],#16
    341 .Lenter_cbc_enc128:
    342 	aese	v0.16b,v17.16b
    343 	aesmc	v0.16b,v0.16b
    344 	 subs	x2,x2,#16
    345 	aese	v0.16b,v2.16b
    346 	aesmc	v0.16b,v0.16b
    347 	 csel	x8,xzr,x8,eq
    348 	aese	v0.16b,v3.16b
    349 	aesmc	v0.16b,v0.16b
    350 	aese	v0.16b,v18.16b
    351 	aesmc	v0.16b,v0.16b
    352 	aese	v0.16b,v19.16b
    353 	aesmc	v0.16b,v0.16b
    354 	 ld1	{v16.16b},[x0],x8
    355 	aese	v0.16b,v20.16b
    356 	aesmc	v0.16b,v0.16b
    357 	aese	v0.16b,v21.16b
    358 	aesmc	v0.16b,v0.16b
    359 	aese	v0.16b,v22.16b
    360 	aesmc	v0.16b,v0.16b
    361 	 eor	v16.16b,v16.16b,v5.16b
    362 	aese	v0.16b,v23.16b
    363 	eor	v6.16b,v0.16b,v7.16b
    364 	b.hs	.Loop_cbc_enc128
    365 
    366 	st1	{v6.16b},[x1],#16
    367 	b	.Lcbc_done
    368 
    369 .align	5
    370 .Lcbc_dec128:
    371 	ld1	{v4.4s-v5.4s},[x7]
    372 	eor	v6.16b,v6.16b,v7.16b
    373 	eor	v2.16b,v0.16b,v7.16b
    374 	mov	x12,x8
    375 
    376 .Loop2x_cbc_dec128:
    377 	aesd	v0.16b,v16.16b
    378 	aesd	v1.16b,v16.16b
    379 	aesimc	v0.16b,v0.16b
    380 	aesimc	v1.16b,v1.16b
    381 	 subs	x2,x2,#32
    382 	aesd	v0.16b,v17.16b
    383 	aesd	v1.16b,v17.16b
    384 	aesimc	v0.16b,v0.16b
    385 	aesimc	v1.16b,v1.16b
    386 	 csel	x8,xzr,x8,lo
    387 	aesd	v0.16b,v4.16b
    388 	aesd	v1.16b,v4.16b
    389 	aesimc	v0.16b,v0.16b
    390 	aesimc	v1.16b,v1.16b
    391 	 csel	x12,xzr,x12,ls
    392 	aesd	v0.16b,v5.16b
    393 	aesd	v1.16b,v5.16b
    394 	aesimc	v0.16b,v0.16b
    395 	aesimc	v1.16b,v1.16b
    396 	aesd	v0.16b,v18.16b
    397 	aesd	v1.16b,v18.16b
    398 	aesimc	v0.16b,v0.16b
    399 	aesimc	v1.16b,v1.16b
    400 	aesd	v0.16b,v19.16b
    401 	aesd	v1.16b,v19.16b
    402 	aesimc	v0.16b,v0.16b
    403 	aesimc	v1.16b,v1.16b
    404 	aesd	v0.16b,v20.16b
    405 	aesd	v1.16b,v20.16b
    406 	aesimc	v0.16b,v0.16b
    407 	aesimc	v1.16b,v1.16b
    408 	aesd	v0.16b,v21.16b
    409 	aesd	v1.16b,v21.16b
    410 	aesimc	v0.16b,v0.16b
    411 	aesimc	v1.16b,v1.16b
    412 	aesd	v0.16b,v22.16b
    413 	aesd	v1.16b,v22.16b
    414 	aesimc	v0.16b,v0.16b
    415 	aesimc	v1.16b,v1.16b
    416 	aesd	v0.16b,v23.16b
    417 	aesd	v1.16b,v23.16b
    418 
    419 	eor	v6.16b,v6.16b,v0.16b
    420 	ld1	{v0.16b},[x0],x8
    421 	eor	v2.16b,v2.16b,v1.16b
    422 	ld1	{v1.16b},[x0],x12
    423 	st1	{v6.16b},[x1],#16
    424 	eor	v6.16b,v3.16b,v7.16b
    425 	st1	{v2.16b},[x1],#16
    426 	eor	v2.16b,v0.16b,v7.16b
    427 	orr	v3.16b,v1.16b,v1.16b
    428 	b.hs	.Loop2x_cbc_dec128
    429 
    430 	adds	x2,x2,#32
    431 	eor	v6.16b,v6.16b,v7.16b
    432 	b.eq	.Lcbc_done
    433 	eor	v2.16b,v2.16b,v7.16b
    434 	b	.Lcbc_dec_tail
    435 
    436 .align	5
    437 .Lcbc_dec:
    438 	subs	x2,x2,#16
    439 	orr	v2.16b,v0.16b,v0.16b
    440 	b.lo	.Lcbc_dec_tail
    441 
    442 	csel	x8,xzr,x8,eq
    443 	cmp	w5,#2
    444 	ld1	{v1.16b},[x0],x8
    445 	orr	v3.16b,v1.16b,v1.16b
    446 	b.eq	.Lcbc_dec128
    447 
    448 .Loop2x_cbc_dec:
    449 	aesd	v0.16b,v16.16b
    450 	aesd	v1.16b,v16.16b
    451 	ld1	{v16.4s},[x7],#16
    452 	aesimc	v0.16b,v0.16b
    453 	aesimc	v1.16b,v1.16b
    454 	subs	w6,w6,#2
    455 	aesd	v0.16b,v17.16b
    456 	aesd	v1.16b,v17.16b
    457 	ld1	{v17.4s},[x7],#16
    458 	aesimc	v0.16b,v0.16b
    459 	aesimc	v1.16b,v1.16b
    460 	b.gt	.Loop2x_cbc_dec
    461 
    462 	aesd	v0.16b,v16.16b
    463 	aesd	v1.16b,v16.16b
    464 	aesimc	v0.16b,v0.16b
    465 	aesimc	v1.16b,v1.16b
    466 	 eor	v4.16b,v6.16b,v7.16b
    467 	 eor	v5.16b,v2.16b,v7.16b
    468 	aesd	v0.16b,v17.16b
    469 	aesd	v1.16b,v17.16b
    470 	aesimc	v0.16b,v0.16b
    471 	aesimc	v1.16b,v1.16b
    472 	 orr	v6.16b,v3.16b,v3.16b
    473 	 subs	x2,x2,#32
    474 	aesd	v0.16b,v18.16b
    475 	aesd	v1.16b,v18.16b
    476 	aesimc	v0.16b,v0.16b
    477 	 csel	x8,xzr,x8,lo
    478 	aesimc	v1.16b,v1.16b
    479 	 mov	x7,x3
    480 	aesd	v0.16b,v19.16b
    481 	aesd	v1.16b,v19.16b
    482 	aesimc	v0.16b,v0.16b
    483 	 ld1	{v2.16b},[x0],x8
    484 	aesimc	v1.16b,v1.16b
    485 	 csel	x8,xzr,x8,ls
    486 	aesd	v0.16b,v20.16b
    487 	aesd	v1.16b,v20.16b
    488 	aesimc	v0.16b,v0.16b
    489 	aesimc	v1.16b,v1.16b
    490 	 ld1	{v3.16b},[x0],x8
    491 	aesd	v0.16b,v21.16b
    492 	aesd	v1.16b,v21.16b
    493 	aesimc	v0.16b,v0.16b
    494 	aesimc	v1.16b,v1.16b
    495 	 ld1 {v16.4s},[x7],#16	// re-pre-load rndkey[0]
    496 	aesd	v0.16b,v22.16b
    497 	aesd	v1.16b,v22.16b
    498 	aesimc	v0.16b,v0.16b
    499 	aesimc	v1.16b,v1.16b
    500 	 ld1 {v17.4s},[x7],#16	// re-pre-load rndkey[1]
    501 	aesd	v0.16b,v23.16b
    502 	aesd	v1.16b,v23.16b
    503 
    504 	 mov	w6,w5
    505 	eor	v4.16b,v4.16b,v0.16b
    506 	eor	v5.16b,v5.16b,v1.16b
    507 	 orr	v0.16b,v2.16b,v2.16b
    508 	st1	{v4.16b},[x1],#16
    509 	 orr	v1.16b,v3.16b,v3.16b
    510 	st1	{v5.16b},[x1],#16
    511 	b.hs	.Loop2x_cbc_dec
    512 
    513 	adds	x2,x2,#32
    514 	b.eq	.Lcbc_done
    515 
    516 .Lcbc_dec_tail:
    517 	aesd	v0.16b,v16.16b
    518 	ld1	{v16.4s},[x7],#16
    519 	aesimc	v0.16b,v0.16b
    520 	subs	w6,w6,#2
    521 	aesd	v0.16b,v17.16b
    522 	ld1	{v17.4s},[x7],#16
    523 	aesimc	v0.16b,v0.16b
    524 	b.gt	.Lcbc_dec_tail
    525 
    526 	aesd	v0.16b,v16.16b
    527 	aesimc	v0.16b,v0.16b
    528 	aesd	v0.16b,v17.16b
    529 	aesimc	v0.16b,v0.16b
    530 	 eor	v4.16b,v6.16b,v7.16b
    531 	aesd	v0.16b,v18.16b
    532 	aesimc	v0.16b,v0.16b
    533 	 orr	v6.16b,v2.16b,v2.16b
    534 	aesd	v0.16b,v19.16b
    535 	aesimc	v0.16b,v0.16b
    536 	aesd	v0.16b,v20.16b
    537 	aesimc	v0.16b,v0.16b
    538 	aesd	v0.16b,v21.16b
    539 	aesimc	v0.16b,v0.16b
    540 	aesd	v0.16b,v22.16b
    541 	aesimc	v0.16b,v0.16b
    542 	aesd	v0.16b,v23.16b
    543 
    544 	eor	v4.16b,v4.16b,v0.16b
    545 	st1	{v4.16b},[x1],#16
    546 
    547 .Lcbc_done:
    548 	st1	{v6.16b},[x4]
    549 .Lcbc_abort:
    550 	ldr	x29,[sp],#16
    551 	ret
    552 .size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
    553 .globl	aes_v8_ctr32_encrypt_blocks
    554 .type	aes_v8_ctr32_encrypt_blocks,%function
    555 .align	5
    556 aes_v8_ctr32_encrypt_blocks:
    557 	stp		x29,x30,[sp,#-16]!
    558 	add		x29,sp,#0
    559 	ldr		w5,[x3,#240]
    560 
    561 	ldr		w8, [x4, #12]
    562 	ld1		{v0.4s},[x4]
    563 
    564 	ld1		{v16.4s-v17.4s},[x3]		// load key schedule...
    565 	sub		w5,w5,#6
    566 	add		x7,x3,x5,lsl#4	// pointer to last 7 round keys
    567 	sub		w5,w5,#2
    568 	ld1		{v18.4s-v19.4s},[x7],#32
    569 	ld1		{v20.4s-v21.4s},[x7],#32
    570 	ld1		{v22.4s-v23.4s},[x7],#32
    571 	ld1		{v7.4s},[x7]
    572 
    573 	add		x7,x3,#32
    574 	mov		w6,w5
    575 
    576 	subs		x2,x2,#2
    577 	b.lo		.Lctr32_tail
    578 
    579 #ifndef __ARMEB__
    580 	rev		w8, w8
    581 #endif
    582 	orr		v1.16b,v0.16b,v0.16b
    583 	add		w8, w8, #1
    584 	orr		v6.16b,v0.16b,v0.16b
    585 	rev		w10, w8
    586 	cmp		w5,#2
    587 	mov		v1.s[3],w10
    588 	b.eq		.Lctr32_128
    589 
    590 .Loop2x_ctr32:
    591 	aese		v0.16b,v16.16b
    592 	aese		v1.16b,v16.16b
    593 	ld1		{v16.4s},[x7],#16
    594 	aesmc		v0.16b,v0.16b
    595 	aesmc		v1.16b,v1.16b
    596 	subs		w6,w6,#2
    597 	aese		v0.16b,v17.16b
    598 	aese		v1.16b,v17.16b
    599 	ld1		{v17.4s},[x7],#16
    600 	aesmc		v0.16b,v0.16b
    601 	aesmc		v1.16b,v1.16b
    602 	b.gt		.Loop2x_ctr32
    603 
    604 	aese		v0.16b,v16.16b
    605 	aese		v1.16b,v16.16b
    606 	aesmc		v4.16b,v0.16b
    607 	 orr		v0.16b,v6.16b,v6.16b
    608 	aesmc		v5.16b,v1.16b
    609 	 orr		v1.16b,v6.16b,v6.16b
    610 	aese		v4.16b,v17.16b
    611 	aese		v5.16b,v17.16b
    612 	 ld1		{v2.16b},[x0],#16
    613 	aesmc		v4.16b,v4.16b
    614 	 ld1		{v3.16b},[x0],#16
    615 	aesmc		v5.16b,v5.16b
    616 	 add		w8,w8,#1
    617 	aese		v4.16b,v18.16b
    618 	aese		v5.16b,v18.16b
    619 	 rev		w9,w8
    620 	aesmc		v4.16b,v4.16b
    621 	aesmc		v5.16b,v5.16b
    622 	 add		w8,w8,#1
    623 	aese		v4.16b,v19.16b
    624 	aese		v5.16b,v19.16b
    625 	 eor		v2.16b,v2.16b,v7.16b
    626 	 rev		w10,w8
    627 	aesmc		v4.16b,v4.16b
    628 	aesmc		v5.16b,v5.16b
    629 	 eor		v3.16b,v3.16b,v7.16b
    630 	 mov		x7,x3
    631 	aese		v4.16b,v20.16b
    632 	aese		v5.16b,v20.16b
    633 	 subs		x2,x2,#2
    634 	aesmc		v4.16b,v4.16b
    635 	aesmc		v5.16b,v5.16b
    636 	 ld1	 {v16.4s-v17.4s},[x7],#32	// re-pre-load rndkey[0-1]
    637 	aese		v4.16b,v21.16b
    638 	aese		v5.16b,v21.16b
    639 	aesmc		v4.16b,v4.16b
    640 	aesmc		v5.16b,v5.16b
    641 	aese		v4.16b,v22.16b
    642 	aese		v5.16b,v22.16b
    643 	 mov	v0.s[3], w9
    644 	aesmc		v4.16b,v4.16b
    645 	 mov	v1.s[3], w10
    646 	aesmc		v5.16b,v5.16b
    647 	aese		v4.16b,v23.16b
    648 	aese		v5.16b,v23.16b
    649 
    650 	 mov		w6,w5
    651 	eor		v2.16b,v2.16b,v4.16b
    652 	eor		v3.16b,v3.16b,v5.16b
    653 	st1		{v2.16b},[x1],#16
    654 	st1		{v3.16b},[x1],#16
    655 	b.hs		.Loop2x_ctr32
    656 
    657 	adds		x2,x2,#2
    658 	b.eq		.Lctr32_done
    659 	b		.Lctr32_tail
    660 
    661 .Lctr32_128:
    662 	ld1		{v4.4s-v5.4s},[x7]
    663 
    664 .Loop2x_ctr32_128:
    665 	aese		v0.16b,v16.16b
    666 	aese		v1.16b,v16.16b
    667 	aesmc		v0.16b,v0.16b
    668 	 ld1		{v2.16b},[x0],#16
    669 	aesmc		v1.16b,v1.16b
    670 	 ld1		{v3.16b},[x0],#16
    671 	aese		v0.16b,v17.16b
    672 	aese		v1.16b,v17.16b
    673 	 add		w8,w8,#1
    674 	aesmc		v0.16b,v0.16b
    675 	aesmc		v1.16b,v1.16b
    676 	 rev		w9,w8
    677 	aese		v0.16b,v4.16b
    678 	aese		v1.16b,v4.16b
    679 	 add		w8,w8,#1
    680 	aesmc		v0.16b,v0.16b
    681 	aesmc		v1.16b,v1.16b
    682 	 rev		w10,w8
    683 	aese		v0.16b,v5.16b
    684 	aese		v1.16b,v5.16b
    685 	 subs		x2,x2,#2
    686 	aesmc		v0.16b,v0.16b
    687 	aesmc		v1.16b,v1.16b
    688 	aese		v0.16b,v18.16b
    689 	aese		v1.16b,v18.16b
    690 	aesmc		v0.16b,v0.16b
    691 	aesmc		v1.16b,v1.16b
    692 	aese		v0.16b,v19.16b
    693 	aese		v1.16b,v19.16b
    694 	aesmc		v0.16b,v0.16b
    695 	aesmc		v1.16b,v1.16b
    696 	aese		v0.16b,v20.16b
    697 	aese		v1.16b,v20.16b
    698 	aesmc		v0.16b,v0.16b
    699 	aesmc		v1.16b,v1.16b
    700 	aese		v0.16b,v21.16b
    701 	aese		v1.16b,v21.16b
    702 	aesmc		v0.16b,v0.16b
    703 	aesmc		v1.16b,v1.16b
    704 	aese		v0.16b,v22.16b
    705 	aese		v1.16b,v22.16b
    706 	aesmc		v0.16b,v0.16b
    707 	aesmc		v1.16b,v1.16b
    708 	 eor		v2.16b,v2.16b,v7.16b
    709 	aese		v0.16b,v23.16b
    710 	 eor		v3.16b,v3.16b,v7.16b
    711 	aese		v1.16b,v23.16b
    712 
    713 	eor		v2.16b,v2.16b,v0.16b
    714 	orr		v0.16b,v6.16b,v6.16b
    715 	eor		v3.16b,v3.16b,v1.16b
    716 	orr		v1.16b,v6.16b,v6.16b
    717 	st1		{v2.16b},[x1],#16
    718 	mov		v0.s[3], w9
    719 	st1		{v3.16b},[x1],#16
    720 	mov		v1.s[3], w10
    721 	b.hs		.Loop2x_ctr32_128
    722 
    723 	adds		x2,x2,#2
    724 	b.eq		.Lctr32_done
    725 
    726 .Lctr32_tail:
    727 	aese		v0.16b,v16.16b
    728 	ld1		{v16.4s},[x7],#16
    729 	aesmc		v0.16b,v0.16b
    730 	subs		w6,w6,#2
    731 	aese		v0.16b,v17.16b
    732 	ld1		{v17.4s},[x7],#16
    733 	aesmc		v0.16b,v0.16b
    734 	b.gt		.Lctr32_tail
    735 
    736 	aese		v0.16b,v16.16b
    737 	aesmc		v0.16b,v0.16b
    738 	aese		v0.16b,v17.16b
    739 	aesmc		v0.16b,v0.16b
    740 	 ld1		{v2.16b},[x0]
    741 	aese		v0.16b,v18.16b
    742 	aesmc		v0.16b,v0.16b
    743 	aese		v0.16b,v19.16b
    744 	aesmc		v0.16b,v0.16b
    745 	aese		v0.16b,v20.16b
    746 	aesmc		v0.16b,v0.16b
    747 	aese		v0.16b,v21.16b
    748 	aesmc		v0.16b,v0.16b
    749 	aese		v0.16b,v22.16b
    750 	aesmc		v0.16b,v0.16b
    751 	 eor		v2.16b,v2.16b,v7.16b
    752 	aese		v0.16b,v23.16b
    753 
    754 	eor		v2.16b,v2.16b,v0.16b
    755 	st1		{v2.16b},[x1]
    756 
    757 .Lctr32_done:
    758 	ldr		x29,[sp],#16
    759 	ret
    760 .size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
    761 #endif
    762