Home | History | Annotate | Download | only in aes
      1 #if defined(__arm__)
      2 #include <openssl/arm_arch.h>
      3 
      4 #if __ARM_MAX_ARCH__>=7
      5 .text
      6 .arch	armv7-a
      7 .fpu	neon
      8 .code	32
      9 .align	5
     10 .Lrcon:
     11 .long	0x01,0x01,0x01,0x01
     12 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
     13 .long	0x1b,0x1b,0x1b,0x1b
     14 
     15 .globl	aes_v8_set_encrypt_key
     16 .type	aes_v8_set_encrypt_key,%function
     17 .align	5
     18 aes_v8_set_encrypt_key:
     19 .Lenc_key:
     20 	mov	r3,#-1
     21 	cmp	r0,#0
     22 	beq	.Lenc_key_abort
     23 	cmp	r2,#0
     24 	beq	.Lenc_key_abort
     25 	mov	r3,#-2
     26 	cmp	r1,#128
     27 	blt	.Lenc_key_abort
     28 	cmp	r1,#256
     29 	bgt	.Lenc_key_abort
     30 	tst	r1,#0x3f
     31 	bne	.Lenc_key_abort
     32 
     33 	adr	r3,.Lrcon
     34 	cmp	r1,#192
     35 
     36 	veor	q0,q0,q0
     37 	vld1.8	{q3},[r0]!
     38 	mov	r1,#8		@ reuse r1
     39 	vld1.32	{q1,q2},[r3]!
     40 
     41 	blt	.Loop128
     42 	beq	.L192
     43 	b	.L256
     44 
     45 .align	4
     46 .Loop128:
     47 	vtbl.8	d20,{q3},d4
     48 	vtbl.8	d21,{q3},d5
     49 	vext.8	q9,q0,q3,#12
     50 	vst1.32	{q3},[r2]!
     51 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     52 	subs	r1,r1,#1
     53 
     54 	veor	q3,q3,q9
     55 	vext.8	q9,q0,q9,#12
     56 	veor	q3,q3,q9
     57 	vext.8	q9,q0,q9,#12
     58 	veor	q10,q10,q1
     59 	veor	q3,q3,q9
     60 	vshl.u8	q1,q1,#1
     61 	veor	q3,q3,q10
     62 	bne	.Loop128
     63 
     64 	vld1.32	{q1},[r3]
     65 
     66 	vtbl.8	d20,{q3},d4
     67 	vtbl.8	d21,{q3},d5
     68 	vext.8	q9,q0,q3,#12
     69 	vst1.32	{q3},[r2]!
     70 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     71 
     72 	veor	q3,q3,q9
     73 	vext.8	q9,q0,q9,#12
     74 	veor	q3,q3,q9
     75 	vext.8	q9,q0,q9,#12
     76 	veor	q10,q10,q1
     77 	veor	q3,q3,q9
     78 	vshl.u8	q1,q1,#1
     79 	veor	q3,q3,q10
     80 
     81 	vtbl.8	d20,{q3},d4
     82 	vtbl.8	d21,{q3},d5
     83 	vext.8	q9,q0,q3,#12
     84 	vst1.32	{q3},[r2]!
     85 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     86 
     87 	veor	q3,q3,q9
     88 	vext.8	q9,q0,q9,#12
     89 	veor	q3,q3,q9
     90 	vext.8	q9,q0,q9,#12
     91 	veor	q10,q10,q1
     92 	veor	q3,q3,q9
     93 	veor	q3,q3,q10
     94 	vst1.32	{q3},[r2]
     95 	add	r2,r2,#0x50
     96 
     97 	mov	r12,#10
     98 	b	.Ldone
     99 
    100 .align	4
    101 .L192:
    102 	vld1.8	{d16},[r0]!
    103 	vmov.i8	q10,#8			@ borrow q10
    104 	vst1.32	{q3},[r2]!
    105 	vsub.i8	q2,q2,q10	@ adjust the mask
    106 
    107 .Loop192:
    108 	vtbl.8	d20,{q8},d4
    109 	vtbl.8	d21,{q8},d5
    110 	vext.8	q9,q0,q3,#12
    111 	vst1.32	{d16},[r2]!
    112 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    113 	subs	r1,r1,#1
    114 
    115 	veor	q3,q3,q9
    116 	vext.8	q9,q0,q9,#12
    117 	veor	q3,q3,q9
    118 	vext.8	q9,q0,q9,#12
    119 	veor	q3,q3,q9
    120 
    121 	vdup.32	q9,d7[1]
    122 	veor	q9,q9,q8
    123 	veor	q10,q10,q1
    124 	vext.8	q8,q0,q8,#12
    125 	vshl.u8	q1,q1,#1
    126 	veor	q8,q8,q9
    127 	veor	q3,q3,q10
    128 	veor	q8,q8,q10
    129 	vst1.32	{q3},[r2]!
    130 	bne	.Loop192
    131 
    132 	mov	r12,#12
    133 	add	r2,r2,#0x20
    134 	b	.Ldone
    135 
    136 .align	4
    137 .L256:
    138 	vld1.8	{q8},[r0]
    139 	mov	r1,#7
    140 	mov	r12,#14
    141 	vst1.32	{q3},[r2]!
    142 
    143 .Loop256:
    144 	vtbl.8	d20,{q8},d4
    145 	vtbl.8	d21,{q8},d5
    146 	vext.8	q9,q0,q3,#12
    147 	vst1.32	{q8},[r2]!
    148 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    149 	subs	r1,r1,#1
    150 
    151 	veor	q3,q3,q9
    152 	vext.8	q9,q0,q9,#12
    153 	veor	q3,q3,q9
    154 	vext.8	q9,q0,q9,#12
    155 	veor	q10,q10,q1
    156 	veor	q3,q3,q9
    157 	vshl.u8	q1,q1,#1
    158 	veor	q3,q3,q10
    159 	vst1.32	{q3},[r2]!
    160 	beq	.Ldone
    161 
    162 	vdup.32	q10,d7[1]
    163 	vext.8	q9,q0,q8,#12
    164 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    165 
    166 	veor	q8,q8,q9
    167 	vext.8	q9,q0,q9,#12
    168 	veor	q8,q8,q9
    169 	vext.8	q9,q0,q9,#12
    170 	veor	q8,q8,q9
    171 
    172 	veor	q8,q8,q10
    173 	b	.Loop256
    174 
    175 .Ldone:
    176 	str	r12,[r2]
    177 	mov	r3,#0
    178 
    179 .Lenc_key_abort:
    180 	mov	r0,r3			@ return value
    181 
    182 	bx	lr
    183 .size	aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
    184 
    185 .globl	aes_v8_set_decrypt_key
    186 .type	aes_v8_set_decrypt_key,%function
    187 .align	5
    188 aes_v8_set_decrypt_key:
    189 	stmdb	sp!,{r4,lr}
    190 	bl	.Lenc_key
    191 
    192 	cmp	r0,#0
    193 	bne	.Ldec_key_abort
    194 
    195 	sub	r2,r2,#240		@ restore original r2
    196 	mov	r4,#-16
    197 	add	r0,r2,r12,lsl#4	@ end of key schedule
    198 
    199 	vld1.32	{q0},[r2]
    200 	vld1.32	{q1},[r0]
    201 	vst1.32	{q0},[r0],r4
    202 	vst1.32	{q1},[r2]!
    203 
    204 .Loop_imc:
    205 	vld1.32	{q0},[r2]
    206 	vld1.32	{q1},[r0]
    207 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    208 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    209 	vst1.32	{q0},[r0],r4
    210 	vst1.32	{q1},[r2]!
    211 	cmp	r0,r2
    212 	bhi	.Loop_imc
    213 
    214 	vld1.32	{q0},[r2]
    215 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    216 	vst1.32	{q0},[r0]
    217 
    218 	eor	r0,r0,r0		@ return value
    219 .Ldec_key_abort:
    220 	ldmia	sp!,{r4,pc}
    221 .size	aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
    222 .globl	aes_v8_encrypt
    223 .type	aes_v8_encrypt,%function
    224 .align	5
    225 aes_v8_encrypt:
    226 	ldr	r3,[r2,#240]
    227 	vld1.32	{q0},[r2]!
    228 	vld1.8	{q2},[r0]
    229 	sub	r3,r3,#2
    230 	vld1.32	{q1},[r2]!
    231 
    232 .Loop_enc:
    233 .byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
    234 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    235 	vld1.32	{q0},[r2]!
    236 	subs	r3,r3,#2
    237 .byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
    238 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    239 	vld1.32	{q1},[r2]!
    240 	bgt	.Loop_enc
    241 
    242 .byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
    243 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    244 	vld1.32	{q0},[r2]
    245 .byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
    246 	veor	q2,q2,q0
    247 
    248 	vst1.8	{q2},[r1]
    249 	bx	lr
    250 .size	aes_v8_encrypt,.-aes_v8_encrypt
    251 .globl	aes_v8_decrypt
    252 .type	aes_v8_decrypt,%function
    253 .align	5
    254 aes_v8_decrypt:
    255 	ldr	r3,[r2,#240]
    256 	vld1.32	{q0},[r2]!
    257 	vld1.8	{q2},[r0]
    258 	sub	r3,r3,#2
    259 	vld1.32	{q1},[r2]!
    260 
    261 .Loop_dec:
    262 .byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
    263 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    264 	vld1.32	{q0},[r2]!
    265 	subs	r3,r3,#2
    266 .byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
    267 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    268 	vld1.32	{q1},[r2]!
    269 	bgt	.Loop_dec
    270 
    271 .byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
    272 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    273 	vld1.32	{q0},[r2]
    274 .byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
    275 	veor	q2,q2,q0
    276 
    277 	vst1.8	{q2},[r1]
    278 	bx	lr
    279 .size	aes_v8_decrypt,.-aes_v8_decrypt
    280 .globl	aes_v8_cbc_encrypt
    281 .type	aes_v8_cbc_encrypt,%function
    282 .align	5
    283 aes_v8_cbc_encrypt:
    284 	mov	ip,sp
    285 	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
    286 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
    287 	ldmia	ip,{r4,r5}		@ load remaining args
    288 	subs	r2,r2,#16
    289 	mov	r8,#16
    290 	blo	.Lcbc_abort
    291 	moveq	r8,#0
    292 
    293 	cmp	r5,#0			@ en- or decrypting?
    294 	ldr	r5,[r3,#240]
    295 	and	r2,r2,#-16
    296 	vld1.8	{q6},[r4]
    297 	vld1.8	{q0},[r0],r8
    298 
    299 	vld1.32	{q8,q9},[r3]		@ load key schedule...
    300 	sub	r5,r5,#6
    301 	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
    302 	sub	r5,r5,#2
    303 	vld1.32	{q10,q11},[r7]!
    304 	vld1.32	{q12,q13},[r7]!
    305 	vld1.32	{q14,q15},[r7]!
    306 	vld1.32	{q7},[r7]
    307 
    308 	add	r7,r3,#32
    309 	mov	r6,r5
    310 	beq	.Lcbc_dec
    311 
    312 	cmp	r5,#2
    313 	veor	q0,q0,q6
    314 	veor	q5,q8,q7
    315 	beq	.Lcbc_enc128
    316 
    317 	vld1.32	{q2,q3},[r7]
    318 	add	r7,r3,#16
    319 	add	r6,r3,#16*4
    320 	add	r12,r3,#16*5
    321 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    322 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    323 	add	r14,r3,#16*6
    324 	add	r3,r3,#16*7
    325 	b	.Lenter_cbc_enc
    326 
    327 .align	4
    328 .Loop_cbc_enc:
    329 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    330 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    331 	vst1.8	{q6},[r1]!
    332 .Lenter_cbc_enc:
    333 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    334 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    335 .byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
    336 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    337 	vld1.32	{q8},[r6]
    338 	cmp	r5,#4
    339 .byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
    340 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    341 	vld1.32	{q9},[r12]
    342 	beq	.Lcbc_enc192
    343 
    344 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    345 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    346 	vld1.32	{q8},[r14]
    347 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    348 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    349 	vld1.32	{q9},[r3]
    350 	nop
    351 
    352 .Lcbc_enc192:
    353 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    354 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    355 	subs	r2,r2,#16
    356 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    357 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    358 	moveq	r8,#0
    359 .byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
    360 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    361 .byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
    362 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    363 	vld1.8	{q8},[r0],r8
    364 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    365 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    366 	veor	q8,q8,q5
    367 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    368 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    369 	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
    370 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    371 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    372 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    373 	veor	q6,q0,q7
    374 	bhs	.Loop_cbc_enc
    375 
    376 	vst1.8	{q6},[r1]!
    377 	b	.Lcbc_done
    378 
    379 .align	5
    380 .Lcbc_enc128:
    381 	vld1.32	{q2,q3},[r7]
    382 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    383 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    384 	b	.Lenter_cbc_enc128
    385 .Loop_cbc_enc128:
    386 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    387 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    388 	vst1.8	{q6},[r1]!
    389 .Lenter_cbc_enc128:
    390 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    391 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    392 	subs	r2,r2,#16
    393 .byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
    394 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    395 	moveq	r8,#0
    396 .byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
    397 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    398 .byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
    399 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    400 .byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
    401 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    402 	vld1.8	{q8},[r0],r8
    403 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    404 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    405 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    406 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    407 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    408 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    409 	veor	q8,q8,q5
    410 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    411 	veor	q6,q0,q7
    412 	bhs	.Loop_cbc_enc128
    413 
    414 	vst1.8	{q6},[r1]!
    415 	b	.Lcbc_done
    416 .align	5
    417 .Lcbc_dec:
    418 	vld1.8	{q10},[r0]!
    419 	subs	r2,r2,#32		@ bias
    420 	add	r6,r5,#2
    421 	vorr	q3,q0,q0
    422 	vorr	q1,q0,q0
    423 	vorr	q11,q10,q10
    424 	blo	.Lcbc_dec_tail
    425 
    426 	vorr	q1,q10,q10
    427 	vld1.8	{q10},[r0]!
    428 	vorr	q2,q0,q0
    429 	vorr	q3,q1,q1
    430 	vorr	q11,q10,q10
    431 
    432 .Loop3x_cbc_dec:
    433 .byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
    434 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    435 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    436 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    437 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    438 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    439 	vld1.32	{q8},[r7]!
    440 	subs	r6,r6,#2
    441 .byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
    442 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    443 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    444 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    445 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    446 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    447 	vld1.32	{q9},[r7]!
    448 	bgt	.Loop3x_cbc_dec
    449 
    450 .byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
    451 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    452 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    453 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    454 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    455 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    456 	veor	q4,q6,q7
    457 	subs	r2,r2,#0x30
    458 	veor	q5,q2,q7
    459 	movlo	r6,r2			@ r6, r6, is zero at this point
    460 .byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
    461 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    462 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    463 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    464 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    465 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    466 	veor	q9,q3,q7
    467 	add	r0,r0,r6		@ r0 is adjusted in such way that
    468 					@ at exit from the loop q1-q10
    469 					@ are loaded with last "words"
    470 	vorr	q6,q11,q11
    471 	mov	r7,r3
    472 .byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
    473 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    474 .byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
    475 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    476 .byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
    477 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    478 	vld1.8	{q2},[r0]!
    479 .byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
    480 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    481 .byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
    482 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    483 .byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
    484 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    485 	vld1.8	{q3},[r0]!
    486 .byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
    487 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    488 .byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
    489 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    490 .byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
    491 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    492 	vld1.8	{q11},[r0]!
    493 .byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
    494 .byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
    495 .byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
    496 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
    497 	add	r6,r5,#2
    498 	veor	q4,q4,q0
    499 	veor	q5,q5,q1
    500 	veor	q10,q10,q9
    501 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
    502 	vst1.8	{q4},[r1]!
    503 	vorr	q0,q2,q2
    504 	vst1.8	{q5},[r1]!
    505 	vorr	q1,q3,q3
    506 	vst1.8	{q10},[r1]!
    507 	vorr	q10,q11,q11
    508 	bhs	.Loop3x_cbc_dec
    509 
    510 	cmn	r2,#0x30
    511 	beq	.Lcbc_done
    512 	nop
    513 
    514 .Lcbc_dec_tail:
    515 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    516 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    517 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    518 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    519 	vld1.32	{q8},[r7]!
    520 	subs	r6,r6,#2
    521 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    522 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    523 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    524 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    525 	vld1.32	{q9},[r7]!
    526 	bgt	.Lcbc_dec_tail
    527 
    528 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    529 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    530 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    531 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    532 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    533 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    534 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    535 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    536 .byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
    537 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    538 .byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
    539 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    540 	cmn	r2,#0x20
    541 .byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
    542 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    543 .byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
    544 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    545 	veor	q5,q6,q7
    546 .byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
    547 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    548 .byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
    549 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    550 	veor	q9,q3,q7
    551 .byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
    552 .byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
    553 	beq	.Lcbc_dec_one
    554 	veor	q5,q5,q1
    555 	veor	q9,q9,q10
    556 	vorr	q6,q11,q11
    557 	vst1.8	{q5},[r1]!
    558 	vst1.8	{q9},[r1]!
    559 	b	.Lcbc_done
    560 
    561 .Lcbc_dec_one:
    562 	veor	q5,q5,q10
    563 	vorr	q6,q11,q11
    564 	vst1.8	{q5},[r1]!
    565 
    566 .Lcbc_done:
    567 	vst1.8	{q6},[r4]
    568 .Lcbc_abort:
    569 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
    570 	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
    571 .size	aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
    572 .globl	aes_v8_ctr32_encrypt_blocks
    573 .type	aes_v8_ctr32_encrypt_blocks,%function
    574 .align	5
    575 aes_v8_ctr32_encrypt_blocks:
    576 	mov	ip,sp
    577 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
    578 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
    579 	ldr	r4, [ip]		@ load remaining arg
    580 	ldr	r5,[r3,#240]
    581 
    582 	ldr	r8, [r4, #12]
    583 	vld1.32	{q0},[r4]
    584 
    585 	vld1.32	{q8,q9},[r3]		@ load key schedule...
    586 	sub	r5,r5,#4
    587 	mov	r12,#16
    588 	cmp	r2,#2
    589 	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
    590 	sub	r5,r5,#2
    591 	vld1.32	{q12,q13},[r7]!
    592 	vld1.32	{q14,q15},[r7]!
    593 	vld1.32	{q7},[r7]
    594 	add	r7,r3,#32
    595 	mov	r6,r5
    596 	movlo	r12,#0
    597 #ifndef __ARMEB__
    598 	rev	r8, r8
    599 #endif
    600 	vorr	q1,q0,q0
    601 	add	r10, r8, #1
    602 	vorr	q10,q0,q0
    603 	add	r8, r8, #2
    604 	vorr	q6,q0,q0
    605 	rev	r10, r10
    606 	vmov.32	d3[1],r10
    607 	bls	.Lctr32_tail
    608 	rev	r12, r8
    609 	sub	r2,r2,#3		@ bias
    610 	vmov.32	d21[1],r12
    611 	b	.Loop3x_ctr32
    612 
    613 .align	4
    614 .Loop3x_ctr32:
    615 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    616 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    617 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    618 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    619 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
    620 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    621 	vld1.32	{q8},[r7]!
    622 	subs	r6,r6,#2
    623 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    624 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    625 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    626 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    627 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
    628 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    629 	vld1.32	{q9},[r7]!
    630 	bgt	.Loop3x_ctr32
    631 
    632 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    633 .byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
    634 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    635 .byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
    636 	vld1.8	{q2},[r0]!
    637 	vorr	q0,q6,q6
    638 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
    639 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    640 	vld1.8	{q3},[r0]!
    641 	vorr	q1,q6,q6
    642 .byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
    643 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    644 .byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
    645 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    646 	vld1.8	{q11},[r0]!
    647 	mov	r7,r3
    648 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
    649 .byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
    650 	vorr	q10,q6,q6
    651 	add	r9,r8,#1
    652 .byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
    653 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    654 .byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
    655 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    656 	veor	q2,q2,q7
    657 	add	r10,r8,#2
    658 .byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
    659 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    660 	veor	q3,q3,q7
    661 	add	r8,r8,#3
    662 .byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
    663 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    664 .byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
    665 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    666 	veor	q11,q11,q7
    667 	rev	r9,r9
    668 .byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
    669 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    670 	vmov.32	d1[1], r9
    671 	rev	r10,r10
    672 .byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
    673 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    674 .byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
    675 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    676 	vmov.32	d3[1], r10
    677 	rev	r12,r8
    678 .byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
    679 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    680 	vmov.32	d21[1], r12
    681 	subs	r2,r2,#3
    682 .byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
    683 .byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
    684 .byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
    685 
    686 	veor	q2,q2,q4
    687 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
    688 	vst1.8	{q2},[r1]!
    689 	veor	q3,q3,q5
    690 	mov	r6,r5
    691 	vst1.8	{q3},[r1]!
    692 	veor	q11,q11,q9
    693 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
    694 	vst1.8	{q11},[r1]!
    695 	bhs	.Loop3x_ctr32
    696 
    697 	adds	r2,r2,#3
    698 	beq	.Lctr32_done
    699 	cmp	r2,#1
    700 	mov	r12,#16
    701 	moveq	r12,#0
    702 
    703 .Lctr32_tail:
    704 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    705 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    706 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    707 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    708 	vld1.32	{q8},[r7]!
    709 	subs	r6,r6,#2
    710 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    711 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    712 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    713 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    714 	vld1.32	{q9},[r7]!
    715 	bgt	.Lctr32_tail
    716 
    717 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    718 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    719 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    720 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    721 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    722 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    723 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    724 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    725 	vld1.8	{q2},[r0],r12
    726 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    727 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    728 .byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
    729 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    730 	vld1.8	{q3},[r0]
    731 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    732 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    733 .byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
    734 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    735 	veor	q2,q2,q7
    736 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    737 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    738 .byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
    739 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    740 	veor	q3,q3,q7
    741 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    742 .byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
    743 
    744 	cmp	r2,#1
    745 	veor	q2,q2,q0
    746 	veor	q3,q3,q1
    747 	vst1.8	{q2},[r1]!
    748 	beq	.Lctr32_done
    749 	vst1.8	{q3},[r1]
    750 
    751 .Lctr32_done:
    752 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
    753 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
    754 .size	aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
    755 #endif
    756 #endif