Home | History | Annotate | Download | only in aes
      1 #include "arm_arch.h"
      2 
      3 #if __ARM_MAX_ARCH__>=7
      4 .text
      5 .arch	armv7-a
      6 .fpu	neon
      7 .code	32
      8 .align	5
      9 .Lrcon:
     10 .long	0x01,0x01,0x01,0x01
     11 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
     12 .long	0x1b,0x1b,0x1b,0x1b
     13 
     14 .globl	aes_v8_set_encrypt_key
     15 .type	aes_v8_set_encrypt_key,%function
     16 .align	5
     17 aes_v8_set_encrypt_key:
     18 .Lenc_key:
     19 	mov	r3,#-1
     20 	cmp	r0,#0
     21 	beq	.Lenc_key_abort
     22 	cmp	r2,#0
     23 	beq	.Lenc_key_abort
     24 	mov	r3,#-2
     25 	cmp	r1,#128
     26 	blt	.Lenc_key_abort
     27 	cmp	r1,#256
     28 	bgt	.Lenc_key_abort
     29 	tst	r1,#0x3f
     30 	bne	.Lenc_key_abort
     31 
     32 	adr	r3,.Lrcon
     33 	cmp	r1,#192
     34 
     35 	veor	q0,q0,q0
     36 	vld1.8	{q3},[r0]!
     37 	mov	r1,#8		@ reuse r1
     38 	vld1.32	{q1,q2},[r3]!
     39 
     40 	blt	.Loop128
     41 	beq	.L192
     42 	b	.L256
     43 
     44 .align	4
     45 .Loop128:
     46 	vtbl.8	d20,{q3},d4
     47 	vtbl.8	d21,{q3},d5
     48 	vext.8	q9,q0,q3,#12
     49 	vst1.32	{q3},[r2]!
     50 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     51 	subs	r1,r1,#1
     52 
     53 	veor	q3,q3,q9
     54 	vext.8	q9,q0,q9,#12
     55 	veor	q3,q3,q9
     56 	vext.8	q9,q0,q9,#12
     57 	veor	q10,q10,q1
     58 	veor	q3,q3,q9
     59 	vshl.u8	q1,q1,#1
     60 	veor	q3,q3,q10
     61 	bne	.Loop128
     62 
     63 	vld1.32	{q1},[r3]
     64 
     65 	vtbl.8	d20,{q3},d4
     66 	vtbl.8	d21,{q3},d5
     67 	vext.8	q9,q0,q3,#12
     68 	vst1.32	{q3},[r2]!
     69 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     70 
     71 	veor	q3,q3,q9
     72 	vext.8	q9,q0,q9,#12
     73 	veor	q3,q3,q9
     74 	vext.8	q9,q0,q9,#12
     75 	veor	q10,q10,q1
     76 	veor	q3,q3,q9
     77 	vshl.u8	q1,q1,#1
     78 	veor	q3,q3,q10
     79 
     80 	vtbl.8	d20,{q3},d4
     81 	vtbl.8	d21,{q3},d5
     82 	vext.8	q9,q0,q3,#12
     83 	vst1.32	{q3},[r2]!
     84 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     85 
     86 	veor	q3,q3,q9
     87 	vext.8	q9,q0,q9,#12
     88 	veor	q3,q3,q9
     89 	vext.8	q9,q0,q9,#12
     90 	veor	q10,q10,q1
     91 	veor	q3,q3,q9
     92 	veor	q3,q3,q10
     93 	vst1.32	{q3},[r2]
     94 	add	r2,r2,#0x50
     95 
     96 	mov	r12,#10
     97 	b	.Ldone
     98 
     99 .align	4
    100 .L192:
    101 	vld1.8	{d16},[r0]!
    102 	vmov.i8	q10,#8			@ borrow q10
    103 	vst1.32	{q3},[r2]!
    104 	vsub.i8	q2,q2,q10	@ adjust the mask
    105 
    106 .Loop192:
    107 	vtbl.8	d20,{q8},d4
    108 	vtbl.8	d21,{q8},d5
    109 	vext.8	q9,q0,q3,#12
    110 	vst1.32	{d16},[r2]!
    111 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    112 	subs	r1,r1,#1
    113 
    114 	veor	q3,q3,q9
    115 	vext.8	q9,q0,q9,#12
    116 	veor	q3,q3,q9
    117 	vext.8	q9,q0,q9,#12
    118 	veor	q3,q3,q9
    119 
    120 	vdup.32	q9,d7[1]
    121 	veor	q9,q9,q8
    122 	veor	q10,q10,q1
    123 	vext.8	q8,q0,q8,#12
    124 	vshl.u8	q1,q1,#1
    125 	veor	q8,q8,q9
    126 	veor	q3,q3,q10
    127 	veor	q8,q8,q10
    128 	vst1.32	{q3},[r2]!
    129 	bne	.Loop192
    130 
    131 	mov	r12,#12
    132 	add	r2,r2,#0x20
    133 	b	.Ldone
    134 
    135 .align	4
    136 .L256:
    137 	vld1.8	{q8},[r0]
    138 	mov	r1,#7
    139 	mov	r12,#14
    140 	vst1.32	{q3},[r2]!
    141 
    142 .Loop256:
    143 	vtbl.8	d20,{q8},d4
    144 	vtbl.8	d21,{q8},d5
    145 	vext.8	q9,q0,q3,#12
    146 	vst1.32	{q8},[r2]!
    147 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    148 	subs	r1,r1,#1
    149 
    150 	veor	q3,q3,q9
    151 	vext.8	q9,q0,q9,#12
    152 	veor	q3,q3,q9
    153 	vext.8	q9,q0,q9,#12
    154 	veor	q10,q10,q1
    155 	veor	q3,q3,q9
    156 	vshl.u8	q1,q1,#1
    157 	veor	q3,q3,q10
    158 	vst1.32	{q3},[r2]!
    159 	beq	.Ldone
    160 
    161 	vdup.32	q10,d7[1]
    162 	vext.8	q9,q0,q8,#12
    163 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    164 
    165 	veor	q8,q8,q9
    166 	vext.8	q9,q0,q9,#12
    167 	veor	q8,q8,q9
    168 	vext.8	q9,q0,q9,#12
    169 	veor	q8,q8,q9
    170 
    171 	veor	q8,q8,q10
    172 	b	.Loop256
    173 
    174 .Ldone:
    175 	str	r12,[r2]
    176 	mov	r3,#0
    177 
    178 .Lenc_key_abort:
    179 	mov	r0,r3			@ return value
    180 
    181 	bx	lr
    182 .size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
    183 
    184 .globl	aes_v8_set_decrypt_key
    185 .type	aes_v8_set_decrypt_key,%function
    186 .align	5
    187 aes_v8_set_decrypt_key:
    188 	stmdb	sp!,{r4,lr}
    189 	bl	.Lenc_key
    190 
    191 	cmp	r0,#0
    192 	bne	.Ldec_key_abort
    193 
    194 	sub	r2,r2,#240		@ restore original r2
    195 	mov	r4,#-16
    196 	add	r0,r2,r12,lsl#4	@ end of key schedule
    197 
    198 	vld1.32	{q0},[r2]
    199 	vld1.32	{q1},[r0]
    200 	vst1.32	{q0},[r0],r4
    201 	vst1.32	{q1},[r2]!
    202 
    203 .Loop_imc:
    204 	vld1.32	{q0},[r2]
    205 	vld1.32	{q1},[r0]
    206 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    207 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    208 	vst1.32	{q0},[r0],r4
    209 	vst1.32	{q1},[r2]!
    210 	cmp	r0,r2
    211 	bhi	.Loop_imc
    212 
    213 	vld1.32	{q0},[r2]
    214 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    215 	vst1.32	{q0},[r0]
    216 
    217 	eor	r0,r0,r0		@ return value
    218 .Ldec_key_abort:
    219 	ldmia	sp!,{r4,pc}
    220 .size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
    221 .globl	aes_v8_encrypt
    222 .type	aes_v8_encrypt,%function
    223 .align	5
    224 aes_v8_encrypt:
    225 	ldr	r3,[r2,#240]
    226 	vld1.32	{q0},[r2]!
    227 	vld1.8	{q2},[r0]
    228 	sub	r3,r3,#2
    229 	vld1.32	{q1},[r2]!
    230 
    231 .Loop_enc:
    232 .byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
    233 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    234 	vld1.32	{q0},[r2]!
    235 	subs	r3,r3,#2
    236 .byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
    237 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    238 	vld1.32	{q1},[r2]!
    239 	bgt	.Loop_enc
    240 
    241 .byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
    242 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    243 	vld1.32	{q0},[r2]
    244 .byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
    245 	veor	q2,q2,q0
    246 
    247 	vst1.8	{q2},[r1]
    248 	bx	lr
    249 .size	aes_v8_encrypt,.-aes_v8_encrypt
    250 .globl	aes_v8_decrypt
    251 .type	aes_v8_decrypt,%function
    252 .align	5
    253 aes_v8_decrypt:
    254 	ldr	r3,[r2,#240]
    255 	vld1.32	{q0},[r2]!
    256 	vld1.8	{q2},[r0]
    257 	sub	r3,r3,#2
    258 	vld1.32	{q1},[r2]!
    259 
    260 .Loop_dec:
    261 .byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
    262 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    263 	vld1.32	{q0},[r2]!
    264 	subs	r3,r3,#2
    265 .byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
    266 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    267 	vld1.32	{q1},[r2]!
    268 	bgt	.Loop_dec
    269 
    270 .byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
    271 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    272 	vld1.32	{q0},[r2]
    273 .byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
    274 	veor	q2,q2,q0
    275 
    276 	vst1.8	{q2},[r1]
    277 	bx	lr
    278 .size	aes_v8_decrypt,.-aes_v8_decrypt
    279 .globl	aes_v8_cbc_encrypt
    280 .type	aes_v8_cbc_encrypt,%function
    281 .align	5
    282 aes_v8_cbc_encrypt:
    283 	mov	ip,sp
    284 	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
    285 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
    286 	ldmia	ip,{r4,r5}		@ load remaining args
    287 	subs	r2,r2,#16
    288 	mov	r8,#16
    289 	blo	.Lcbc_abort
    290 	moveq	r8,#0
    291 
    292 	cmp	r5,#0			@ en- or decrypting?
    293 	ldr	r5,[r3,#240]
    294 	and	r2,r2,#-16
    295 	vld1.8	{q6},[r4]
    296 	vld1.8	{q0},[r0],r8
    297 
    298 	vld1.32	{q8,q9},[r3]		@ load key schedule...
    299 	sub	r5,r5,#6
    300 	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
    301 	sub	r5,r5,#2
    302 	vld1.32	{q10,q11},[r7]!
    303 	vld1.32	{q12,q13},[r7]!
    304 	vld1.32	{q14,q15},[r7]!
    305 	vld1.32	{q7},[r7]
    306 
    307 	add	r7,r3,#32
    308 	mov	r6,r5
    309 	beq	.Lcbc_dec
    310 
    311 	cmp	r5,#2
    312 	veor	q0,q0,q6
    313 	veor	q5,q8,q7
    314 	beq	.Lcbc_enc128
    315 
    316 	vld1.32	{q2,q3},[r7]
    317 	add	r7,r3,#16
    318 	add	r6,r3,#16*4
    319 	add	r12,r3,#16*5
    320 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    321 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    322 	add	r14,r3,#16*6
    323 	add	r3,r3,#16*7
    324 	b	.Lenter_cbc_enc
    325 
    326 .align	4
    327 .Loop_cbc_enc:
    328 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    329 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    330 	vst1.8	{q6},[r1]!
    331 .Lenter_cbc_enc:
    332 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    333 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    334 .byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
    335 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    336 	vld1.32	{q8},[r6]
    337 	cmp	r5,#4
    338 .byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
    339 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    340 	vld1.32	{q9},[r12]
    341 	beq	.Lcbc_enc192
    342 
    343 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    344 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    345 	vld1.32	{q8},[r14]
    346 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    347 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    348 	vld1.32	{q9},[r3]
    349 	nop
    350 
    351 .Lcbc_enc192:
    352 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    353 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    354 	subs	r2,r2,#16
    355 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    356 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    357 	moveq	r8,#0
    358 .byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
    359 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    360 .byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
    361 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    362 	vld1.8	{q8},[r0],r8
    363 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    364 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    365 	veor	q8,q8,q5
    366 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    367 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    368 	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
    369 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    370 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    371 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    372 	veor	q6,q0,q7
    373 	bhs	.Loop_cbc_enc
    374 
    375 	vst1.8	{q6},[r1]!
    376 	b	.Lcbc_done
    377 
    378 .align	5
    379 .Lcbc_enc128:
    380 	vld1.32	{q2,q3},[r7]
    381 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    382 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    383 	b	.Lenter_cbc_enc128
    384 .Loop_cbc_enc128:
    385 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    386 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    387 	vst1.8	{q6},[r1]!
    388 .Lenter_cbc_enc128:
    389 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    390 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    391 	subs	r2,r2,#16
    392 .byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
    393 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    394 	moveq	r8,#0
    395 .byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
    396 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    397 .byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
    398 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    399 .byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
    400 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    401 	vld1.8	{q8},[r0],r8
    402 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    403 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    404 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    405 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    406 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    407 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    408 	veor	q8,q8,q5
    409 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    410 	veor	q6,q0,q7
    411 	bhs	.Loop_cbc_enc128
    412 
    413 	vst1.8	{q6},[r1]!
    414 	b	.Lcbc_done
    415 .align	5
    416 .Lcbc_dec:
    417 	vld1.8	{q10},[r0]!
    418 	subs	r2,r2,#32		@ bias
    419 	add	r6,r5,#2
    420 	vorr	q3,q0,q0
    421 	vorr	q1,q0,q0
    422 	vorr	q11,q10,q10
    423 	blo	.Lcbc_dec_tail
    424 
    425 	vorr	q1,q10,q10
    426 	vld1.8	{q10},[r0]!
    427 	vorr	q2,q0,q0
    428 	vorr	q3,q1,q1
    429 	vorr	q11,q10,q10
    430 
    431 .Loop3x_cbc_dec:
    432 .byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
    433 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    434 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    435 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    436 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    437 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    438 	vld1.32	{q8},[r7]!
    439 	subs	r6,r6,#2
    440 .byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
    441 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    442 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    443 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    444 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    445 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    446 	vld1.32	{q9},[r7]!
    447 	bgt	.Loop3x_cbc_dec
    448 
    449 .byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
    450 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    451 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    452 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    453 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    454 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    455 	veor	q4,q6,q7
    456 	subs	r2,r2,#0x30
    457 	veor	q5,q2,q7
    458 	movlo	r6,r2			@ r6, r6, is zero at this point
    459 .byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
    460 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    461 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    462 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    463 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    464 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    465 	veor	q9,q3,q7
    466 	add	r0,r0,r6		@ r0 is adjusted in such way that
    467 					@ at exit from the loop q1-q10
    468 					@ are loaded with last "words"
    469 	vorr	q6,q11,q11
    470 	mov	r7,r3
    471 .byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
    472 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    473 .byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
    474 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    475 .byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
    476 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    477 	vld1.8	{q2},[r0]!
    478 .byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
    479 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    480 .byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
    481 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    482 .byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
    483 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    484 	vld1.8	{q3},[r0]!
    485 .byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
    486 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    487 .byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
    488 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    489 .byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
    490 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    491 	vld1.8	{q11},[r0]!
    492 .byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
    493 .byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
    494 .byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
    495 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
    496 	add	r6,r5,#2
    497 	veor	q4,q4,q0
    498 	veor	q5,q5,q1
    499 	veor	q10,q10,q9
    500 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
    501 	vst1.8	{q4},[r1]!
    502 	vorr	q0,q2,q2
    503 	vst1.8	{q5},[r1]!
    504 	vorr	q1,q3,q3
    505 	vst1.8	{q10},[r1]!
    506 	vorr	q10,q11,q11
    507 	bhs	.Loop3x_cbc_dec
    508 
    509 	cmn	r2,#0x30
    510 	beq	.Lcbc_done
    511 	nop
    512 
    513 .Lcbc_dec_tail:
    514 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    515 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    516 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    517 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    518 	vld1.32	{q8},[r7]!
    519 	subs	r6,r6,#2
    520 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    521 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    522 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    523 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    524 	vld1.32	{q9},[r7]!
    525 	bgt	.Lcbc_dec_tail
    526 
    527 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    528 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    529 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    530 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    531 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    532 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    533 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    534 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    535 .byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
    536 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    537 .byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
    538 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    539 	cmn	r2,#0x20
    540 .byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
    541 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    542 .byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
    543 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    544 	veor	q5,q6,q7
    545 .byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
    546 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    547 .byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
    548 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    549 	veor	q9,q3,q7
    550 .byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
    551 .byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
    552 	beq	.Lcbc_dec_one
    553 	veor	q5,q5,q1
    554 	veor	q9,q9,q10
    555 	vorr	q6,q11,q11
    556 	vst1.8	{q5},[r1]!
    557 	vst1.8	{q9},[r1]!
    558 	b	.Lcbc_done
    559 
    560 .Lcbc_dec_one:
    561 	veor	q5,q5,q10
    562 	vorr	q6,q11,q11
    563 	vst1.8	{q5},[r1]!
    564 
    565 .Lcbc_done:
    566 	vst1.8	{q6},[r4]
    567 .Lcbc_abort:
    568 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
    569 	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
    570 .size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
    571 .globl	aes_v8_ctr32_encrypt_blocks
    572 .type	aes_v8_ctr32_encrypt_blocks,%function
    573 .align	5
    574 aes_v8_ctr32_encrypt_blocks:
    575 	mov	ip,sp
    576 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
    577 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
    578 	ldr	r4, [ip]		@ load remaining arg
    579 	ldr	r5,[r3,#240]
    580 
    581 	ldr	r8, [r4, #12]
    582 	vld1.32	{q0},[r4]
    583 
    584 	vld1.32	{q8,q9},[r3]		@ load key schedule...
    585 	sub	r5,r5,#4
    586 	mov	r12,#16
    587 	cmp	r2,#2
    588 	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
    589 	sub	r5,r5,#2
    590 	vld1.32	{q12,q13},[r7]!
    591 	vld1.32	{q14,q15},[r7]!
    592 	vld1.32	{q7},[r7]
    593 	add	r7,r3,#32
    594 	mov	r6,r5
    595 	movlo	r12,#0
    596 #ifndef __ARMEB__
    597 	rev	r8, r8
    598 #endif
    599 	vorr	q1,q0,q0
    600 	add	r10, r8, #1
    601 	vorr	q10,q0,q0
    602 	add	r8, r8, #2
    603 	vorr	q6,q0,q0
    604 	rev	r10, r10
    605 	vmov.32	d3[1],r10
    606 	bls	.Lctr32_tail
    607 	rev	r12, r8
    608 	sub	r2,r2,#3		@ bias
    609 	vmov.32	d21[1],r12
    610 	b	.Loop3x_ctr32
    611 
    612 .align	4
    613 .Loop3x_ctr32:
    614 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    615 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    616 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    617 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    618 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
    619 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    620 	vld1.32	{q8},[r7]!
    621 	subs	r6,r6,#2
    622 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    623 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    624 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    625 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    626 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
    627 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    628 	vld1.32	{q9},[r7]!
    629 	bgt	.Loop3x_ctr32
    630 
    631 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    632 .byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
    633 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    634 .byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
    635 	vld1.8	{q2},[r0]!
    636 	vorr	q0,q6,q6
    637 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
    638 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    639 	vld1.8	{q3},[r0]!
    640 	vorr	q1,q6,q6
    641 .byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
    642 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    643 .byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
    644 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    645 	vld1.8	{q11},[r0]!
    646 	mov	r7,r3
    647 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
    648 .byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
    649 	vorr	q10,q6,q6
    650 	add	r9,r8,#1
    651 .byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
    652 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    653 .byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
    654 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    655 	veor	q2,q2,q7
    656 	add	r10,r8,#2
    657 .byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
    658 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    659 	veor	q3,q3,q7
    660 	add	r8,r8,#3
    661 .byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
    662 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    663 .byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
    664 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    665 	veor	q11,q11,q7
    666 	rev	r9,r9
    667 .byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
    668 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    669 	vmov.32	d1[1], r9
    670 	rev	r10,r10
    671 .byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
    672 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    673 .byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
    674 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    675 	vmov.32	d3[1], r10
    676 	rev	r12,r8
    677 .byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
    678 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    679 	vmov.32	d21[1], r12
    680 	subs	r2,r2,#3
    681 .byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
    682 .byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
    683 .byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
    684 
    685 	veor	q2,q2,q4
    686 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
    687 	vst1.8	{q2},[r1]!
    688 	veor	q3,q3,q5
    689 	mov	r6,r5
    690 	vst1.8	{q3},[r1]!
    691 	veor	q11,q11,q9
    692 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
    693 	vst1.8	{q11},[r1]!
    694 	bhs	.Loop3x_ctr32
    695 
    696 	adds	r2,r2,#3
    697 	beq	.Lctr32_done
    698 	cmp	r2,#1
    699 	mov	r12,#16
    700 	moveq	r12,#0
    701 
    702 .Lctr32_tail:
    703 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    704 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    705 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    706 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    707 	vld1.32	{q8},[r7]!
    708 	subs	r6,r6,#2
    709 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    710 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    711 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    712 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    713 	vld1.32	{q9},[r7]!
    714 	bgt	.Lctr32_tail
    715 
    716 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    717 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    718 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    719 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    720 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    721 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    722 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    723 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    724 	vld1.8	{q2},[r0],r12
    725 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    726 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    727 .byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
    728 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    729 	vld1.8	{q3},[r0]
    730 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    731 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    732 .byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
    733 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    734 	veor	q2,q2,q7
    735 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    736 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    737 .byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
    738 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    739 	veor	q3,q3,q7
    740 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    741 .byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
    742 
    743 	cmp	r2,#1
    744 	veor	q2,q2,q0
    745 	veor	q3,q3,q1
    746 	vst1.8	{q2},[r1]!
    747 	beq	.Lctr32_done
    748 	vst1.8	{q3},[r1]
    749 
    750 .Lctr32_done:
    751 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
    752 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
    753 .size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
    754 #endif
    755