Home | History | Annotate | Download | only in fipsmodule
      1 #if defined(__arm__)
      2 #include <openssl/arm_arch.h>
      3 
      4 #if __ARM_MAX_ARCH__>=7
      5 .text
      6 .arch	armv7-a	@ don't confuse not-so-latest binutils with argv8 :-)
      7 .fpu	neon
      8 .code	32
      9 #undef	__thumb2__
     10 .align	5
     11 .Lrcon:
     12 .long	0x01,0x01,0x01,0x01
     13 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
     14 .long	0x1b,0x1b,0x1b,0x1b
     15 
     16 .globl	aes_hw_set_encrypt_key
     17 .hidden	aes_hw_set_encrypt_key
     18 .type	aes_hw_set_encrypt_key,%function
     19 .align	5
     20 aes_hw_set_encrypt_key:
     21 .Lenc_key:
     22 	mov	r3,#-1
     23 	cmp	r0,#0
     24 	beq	.Lenc_key_abort
     25 	cmp	r2,#0
     26 	beq	.Lenc_key_abort
     27 	mov	r3,#-2
     28 	cmp	r1,#128
     29 	blt	.Lenc_key_abort
     30 	cmp	r1,#256
     31 	bgt	.Lenc_key_abort
     32 	tst	r1,#0x3f
     33 	bne	.Lenc_key_abort
     34 
     35 	adr	r3,.Lrcon
     36 	cmp	r1,#192
     37 
     38 	veor	q0,q0,q0
     39 	vld1.8	{q3},[r0]!
     40 	mov	r1,#8		@ reuse r1
     41 	vld1.32	{q1,q2},[r3]!
     42 
     43 	blt	.Loop128
     44 	beq	.L192
     45 	b	.L256
     46 
     47 .align	4
     48 .Loop128:
     49 	vtbl.8	d20,{q3},d4
     50 	vtbl.8	d21,{q3},d5
     51 	vext.8	q9,q0,q3,#12
     52 	vst1.32	{q3},[r2]!
     53 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     54 	subs	r1,r1,#1
     55 
     56 	veor	q3,q3,q9
     57 	vext.8	q9,q0,q9,#12
     58 	veor	q3,q3,q9
     59 	vext.8	q9,q0,q9,#12
     60 	veor	q10,q10,q1
     61 	veor	q3,q3,q9
     62 	vshl.u8	q1,q1,#1
     63 	veor	q3,q3,q10
     64 	bne	.Loop128
     65 
     66 	vld1.32	{q1},[r3]
     67 
     68 	vtbl.8	d20,{q3},d4
     69 	vtbl.8	d21,{q3},d5
     70 	vext.8	q9,q0,q3,#12
     71 	vst1.32	{q3},[r2]!
     72 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     73 
     74 	veor	q3,q3,q9
     75 	vext.8	q9,q0,q9,#12
     76 	veor	q3,q3,q9
     77 	vext.8	q9,q0,q9,#12
     78 	veor	q10,q10,q1
     79 	veor	q3,q3,q9
     80 	vshl.u8	q1,q1,#1
     81 	veor	q3,q3,q10
     82 
     83 	vtbl.8	d20,{q3},d4
     84 	vtbl.8	d21,{q3},d5
     85 	vext.8	q9,q0,q3,#12
     86 	vst1.32	{q3},[r2]!
     87 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     88 
     89 	veor	q3,q3,q9
     90 	vext.8	q9,q0,q9,#12
     91 	veor	q3,q3,q9
     92 	vext.8	q9,q0,q9,#12
     93 	veor	q10,q10,q1
     94 	veor	q3,q3,q9
     95 	veor	q3,q3,q10
     96 	vst1.32	{q3},[r2]
     97 	add	r2,r2,#0x50
     98 
     99 	mov	r12,#10
    100 	b	.Ldone
    101 
    102 .align	4
    103 .L192:
    104 	vld1.8	{d16},[r0]!
    105 	vmov.i8	q10,#8			@ borrow q10
    106 	vst1.32	{q3},[r2]!
    107 	vsub.i8	q2,q2,q10	@ adjust the mask
    108 
    109 .Loop192:
    110 	vtbl.8	d20,{q8},d4
    111 	vtbl.8	d21,{q8},d5
    112 	vext.8	q9,q0,q3,#12
    113 	vst1.32	{d16},[r2]!
    114 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    115 	subs	r1,r1,#1
    116 
    117 	veor	q3,q3,q9
    118 	vext.8	q9,q0,q9,#12
    119 	veor	q3,q3,q9
    120 	vext.8	q9,q0,q9,#12
    121 	veor	q3,q3,q9
    122 
    123 	vdup.32	q9,d7[1]
    124 	veor	q9,q9,q8
    125 	veor	q10,q10,q1
    126 	vext.8	q8,q0,q8,#12
    127 	vshl.u8	q1,q1,#1
    128 	veor	q8,q8,q9
    129 	veor	q3,q3,q10
    130 	veor	q8,q8,q10
    131 	vst1.32	{q3},[r2]!
    132 	bne	.Loop192
    133 
    134 	mov	r12,#12
    135 	add	r2,r2,#0x20
    136 	b	.Ldone
    137 
    138 .align	4
    139 .L256:
    140 	vld1.8	{q8},[r0]
    141 	mov	r1,#7
    142 	mov	r12,#14
    143 	vst1.32	{q3},[r2]!
    144 
    145 .Loop256:
    146 	vtbl.8	d20,{q8},d4
    147 	vtbl.8	d21,{q8},d5
    148 	vext.8	q9,q0,q3,#12
    149 	vst1.32	{q8},[r2]!
    150 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    151 	subs	r1,r1,#1
    152 
    153 	veor	q3,q3,q9
    154 	vext.8	q9,q0,q9,#12
    155 	veor	q3,q3,q9
    156 	vext.8	q9,q0,q9,#12
    157 	veor	q10,q10,q1
    158 	veor	q3,q3,q9
    159 	vshl.u8	q1,q1,#1
    160 	veor	q3,q3,q10
    161 	vst1.32	{q3},[r2]!
    162 	beq	.Ldone
    163 
    164 	vdup.32	q10,d7[1]
    165 	vext.8	q9,q0,q8,#12
    166 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    167 
    168 	veor	q8,q8,q9
    169 	vext.8	q9,q0,q9,#12
    170 	veor	q8,q8,q9
    171 	vext.8	q9,q0,q9,#12
    172 	veor	q8,q8,q9
    173 
    174 	veor	q8,q8,q10
    175 	b	.Loop256
    176 
    177 .Ldone:
    178 	str	r12,[r2]
    179 	mov	r3,#0
    180 
    181 .Lenc_key_abort:
    182 	mov	r0,r3			@ return value
    183 
    184 	bx	lr
    185 .size	aes_hw_set_encrypt_key,.-aes_hw_set_encrypt_key
    186 
    187 .globl	aes_hw_set_decrypt_key
    188 .hidden	aes_hw_set_decrypt_key
    189 .type	aes_hw_set_decrypt_key,%function
    190 .align	5
    191 aes_hw_set_decrypt_key:
    192 	stmdb	sp!,{r4,lr}
    193 	bl	.Lenc_key
    194 
    195 	cmp	r0,#0
    196 	bne	.Ldec_key_abort
    197 
    198 	sub	r2,r2,#240		@ restore original r2
    199 	mov	r4,#-16
    200 	add	r0,r2,r12,lsl#4	@ end of key schedule
    201 
    202 	vld1.32	{q0},[r2]
    203 	vld1.32	{q1},[r0]
    204 	vst1.32	{q0},[r0],r4
    205 	vst1.32	{q1},[r2]!
    206 
    207 .Loop_imc:
    208 	vld1.32	{q0},[r2]
    209 	vld1.32	{q1},[r0]
    210 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    211 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    212 	vst1.32	{q0},[r0],r4
    213 	vst1.32	{q1},[r2]!
    214 	cmp	r0,r2
    215 	bhi	.Loop_imc
    216 
    217 	vld1.32	{q0},[r2]
    218 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    219 	vst1.32	{q0},[r0]
    220 
    221 	eor	r0,r0,r0		@ return value
    222 .Ldec_key_abort:
    223 	ldmia	sp!,{r4,pc}
    224 .size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
    225 .globl	aes_hw_encrypt
    226 .hidden	aes_hw_encrypt
    227 .type	aes_hw_encrypt,%function
    228 .align	5
    229 aes_hw_encrypt:
    230 	ldr	r3,[r2,#240]
    231 	vld1.32	{q0},[r2]!
    232 	vld1.8	{q2},[r0]
    233 	sub	r3,r3,#2
    234 	vld1.32	{q1},[r2]!
    235 
    236 .Loop_enc:
    237 .byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
    238 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    239 	vld1.32	{q0},[r2]!
    240 	subs	r3,r3,#2
    241 .byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
    242 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    243 	vld1.32	{q1},[r2]!
    244 	bgt	.Loop_enc
    245 
    246 .byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
    247 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    248 	vld1.32	{q0},[r2]
    249 .byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
    250 	veor	q2,q2,q0
    251 
    252 	vst1.8	{q2},[r1]
    253 	bx	lr
    254 .size	aes_hw_encrypt,.-aes_hw_encrypt
    255 .globl	aes_hw_decrypt
    256 .hidden	aes_hw_decrypt
    257 .type	aes_hw_decrypt,%function
    258 .align	5
    259 aes_hw_decrypt:
    260 	ldr	r3,[r2,#240]
    261 	vld1.32	{q0},[r2]!
    262 	vld1.8	{q2},[r0]
    263 	sub	r3,r3,#2
    264 	vld1.32	{q1},[r2]!
    265 
    266 .Loop_dec:
    267 .byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
    268 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    269 	vld1.32	{q0},[r2]!
    270 	subs	r3,r3,#2
    271 .byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
    272 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    273 	vld1.32	{q1},[r2]!
    274 	bgt	.Loop_dec
    275 
    276 .byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
    277 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    278 	vld1.32	{q0},[r2]
    279 .byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
    280 	veor	q2,q2,q0
    281 
    282 	vst1.8	{q2},[r1]
    283 	bx	lr
    284 .size	aes_hw_decrypt,.-aes_hw_decrypt
    285 .globl	aes_hw_cbc_encrypt
    286 .hidden	aes_hw_cbc_encrypt
    287 .type	aes_hw_cbc_encrypt,%function
    288 .align	5
    289 aes_hw_cbc_encrypt:
    290 	mov	ip,sp
    291 	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
    292 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
    293 	ldmia	ip,{r4,r5}		@ load remaining args
    294 	subs	r2,r2,#16
    295 	mov	r8,#16
    296 	blo	.Lcbc_abort
    297 	moveq	r8,#0
    298 
    299 	cmp	r5,#0			@ en- or decrypting?
    300 	ldr	r5,[r3,#240]
    301 	and	r2,r2,#-16
    302 	vld1.8	{q6},[r4]
    303 	vld1.8	{q0},[r0],r8
    304 
    305 	vld1.32	{q8,q9},[r3]		@ load key schedule...
    306 	sub	r5,r5,#6
    307 	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
    308 	sub	r5,r5,#2
    309 	vld1.32	{q10,q11},[r7]!
    310 	vld1.32	{q12,q13},[r7]!
    311 	vld1.32	{q14,q15},[r7]!
    312 	vld1.32	{q7},[r7]
    313 
    314 	add	r7,r3,#32
    315 	mov	r6,r5
    316 	beq	.Lcbc_dec
    317 
    318 	cmp	r5,#2
    319 	veor	q0,q0,q6
    320 	veor	q5,q8,q7
    321 	beq	.Lcbc_enc128
    322 
    323 	vld1.32	{q2,q3},[r7]
    324 	add	r7,r3,#16
    325 	add	r6,r3,#16*4
    326 	add	r12,r3,#16*5
    327 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    328 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    329 	add	r14,r3,#16*6
    330 	add	r3,r3,#16*7
    331 	b	.Lenter_cbc_enc
    332 
    333 .align	4
    334 .Loop_cbc_enc:
    335 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    336 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    337 	vst1.8	{q6},[r1]!
    338 .Lenter_cbc_enc:
    339 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    340 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    341 .byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
    342 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    343 	vld1.32	{q8},[r6]
    344 	cmp	r5,#4
    345 .byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
    346 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    347 	vld1.32	{q9},[r12]
    348 	beq	.Lcbc_enc192
    349 
    350 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    351 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    352 	vld1.32	{q8},[r14]
    353 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    354 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    355 	vld1.32	{q9},[r3]
    356 	nop
    357 
    358 .Lcbc_enc192:
    359 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    360 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    361 	subs	r2,r2,#16
    362 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    363 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    364 	moveq	r8,#0
    365 .byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
    366 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    367 .byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
    368 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    369 	vld1.8	{q8},[r0],r8
    370 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    371 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    372 	veor	q8,q8,q5
    373 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    374 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    375 	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
    376 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    377 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    378 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    379 	veor	q6,q0,q7
    380 	bhs	.Loop_cbc_enc
    381 
    382 	vst1.8	{q6},[r1]!
    383 	b	.Lcbc_done
    384 
    385 .align	5
    386 .Lcbc_enc128:
    387 	vld1.32	{q2,q3},[r7]
    388 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    389 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    390 	b	.Lenter_cbc_enc128
    391 .Loop_cbc_enc128:
    392 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    393 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    394 	vst1.8	{q6},[r1]!
    395 .Lenter_cbc_enc128:
    396 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    397 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    398 	subs	r2,r2,#16
    399 .byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
    400 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    401 	moveq	r8,#0
    402 .byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
    403 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    404 .byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
    405 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    406 .byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
    407 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    408 	vld1.8	{q8},[r0],r8
    409 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    410 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    411 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    412 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    413 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    414 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    415 	veor	q8,q8,q5
    416 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    417 	veor	q6,q0,q7
    418 	bhs	.Loop_cbc_enc128
    419 
    420 	vst1.8	{q6},[r1]!
    421 	b	.Lcbc_done
    422 .align	5
    423 .Lcbc_dec:
    424 	vld1.8	{q10},[r0]!
    425 	subs	r2,r2,#32		@ bias
    426 	add	r6,r5,#2
    427 	vorr	q3,q0,q0
    428 	vorr	q1,q0,q0
    429 	vorr	q11,q10,q10
    430 	blo	.Lcbc_dec_tail
    431 
    432 	vorr	q1,q10,q10
    433 	vld1.8	{q10},[r0]!
    434 	vorr	q2,q0,q0
    435 	vorr	q3,q1,q1
    436 	vorr	q11,q10,q10
    437 
    438 .Loop3x_cbc_dec:
    439 .byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
    440 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    441 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    442 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    443 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    444 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    445 	vld1.32	{q8},[r7]!
    446 	subs	r6,r6,#2
    447 .byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
    448 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    449 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    450 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    451 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    452 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    453 	vld1.32	{q9},[r7]!
    454 	bgt	.Loop3x_cbc_dec
    455 
    456 .byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
    457 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    458 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    459 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    460 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    461 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    462 	veor	q4,q6,q7
    463 	subs	r2,r2,#0x30
    464 	veor	q5,q2,q7
    465 	movlo	r6,r2			@ r6, r6, is zero at this point
    466 .byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
    467 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    468 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    469 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    470 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    471 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    472 	veor	q9,q3,q7
    473 	add	r0,r0,r6		@ r0 is adjusted in such way that
    474 					@ at exit from the loop q1-q10
    475 					@ are loaded with last "words"
    476 	vorr	q6,q11,q11
    477 	mov	r7,r3
    478 .byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
    479 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    480 .byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
    481 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    482 .byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
    483 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    484 	vld1.8	{q2},[r0]!
    485 .byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
    486 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    487 .byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
    488 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    489 .byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
    490 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    491 	vld1.8	{q3},[r0]!
    492 .byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
    493 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    494 .byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
    495 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    496 .byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
    497 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    498 	vld1.8	{q11},[r0]!
    499 .byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
    500 .byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
    501 .byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
    502 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
    503 	add	r6,r5,#2
    504 	veor	q4,q4,q0
    505 	veor	q5,q5,q1
    506 	veor	q10,q10,q9
    507 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
    508 	vst1.8	{q4},[r1]!
    509 	vorr	q0,q2,q2
    510 	vst1.8	{q5},[r1]!
    511 	vorr	q1,q3,q3
    512 	vst1.8	{q10},[r1]!
    513 	vorr	q10,q11,q11
    514 	bhs	.Loop3x_cbc_dec
    515 
    516 	cmn	r2,#0x30
    517 	beq	.Lcbc_done
    518 	nop
    519 
    520 .Lcbc_dec_tail:
    521 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    522 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    523 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    524 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    525 	vld1.32	{q8},[r7]!
    526 	subs	r6,r6,#2
    527 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    528 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    529 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    530 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    531 	vld1.32	{q9},[r7]!
    532 	bgt	.Lcbc_dec_tail
    533 
    534 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    535 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    536 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    537 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    538 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    539 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    540 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    541 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    542 .byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
    543 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    544 .byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
    545 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    546 	cmn	r2,#0x20
    547 .byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
    548 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    549 .byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
    550 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    551 	veor	q5,q6,q7
    552 .byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
    553 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    554 .byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
    555 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    556 	veor	q9,q3,q7
    557 .byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
    558 .byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
    559 	beq	.Lcbc_dec_one
    560 	veor	q5,q5,q1
    561 	veor	q9,q9,q10
    562 	vorr	q6,q11,q11
    563 	vst1.8	{q5},[r1]!
    564 	vst1.8	{q9},[r1]!
    565 	b	.Lcbc_done
    566 
    567 .Lcbc_dec_one:
    568 	veor	q5,q5,q10
    569 	vorr	q6,q11,q11
    570 	vst1.8	{q5},[r1]!
    571 
    572 .Lcbc_done:
    573 	vst1.8	{q6},[r4]
    574 .Lcbc_abort:
    575 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
    576 	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
    577 .size	aes_hw_cbc_encrypt,.-aes_hw_cbc_encrypt
    578 .globl	aes_hw_ctr32_encrypt_blocks
    579 .hidden	aes_hw_ctr32_encrypt_blocks
    580 .type	aes_hw_ctr32_encrypt_blocks,%function
    581 .align	5
    582 aes_hw_ctr32_encrypt_blocks:
    583 	mov	ip,sp
    584 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
    585 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
    586 	ldr	r4, [ip]		@ load remaining arg
    587 	ldr	r5,[r3,#240]
    588 
    589 	ldr	r8, [r4, #12]
    590 	vld1.32	{q0},[r4]
    591 
    592 	vld1.32	{q8,q9},[r3]		@ load key schedule...
    593 	sub	r5,r5,#4
    594 	mov	r12,#16
    595 	cmp	r2,#2
    596 	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
    597 	sub	r5,r5,#2
    598 	vld1.32	{q12,q13},[r7]!
    599 	vld1.32	{q14,q15},[r7]!
    600 	vld1.32	{q7},[r7]
    601 	add	r7,r3,#32
    602 	mov	r6,r5
    603 	movlo	r12,#0
    604 #ifndef __ARMEB__
    605 	rev	r8, r8
    606 #endif
    607 	vorr	q1,q0,q0
    608 	add	r10, r8, #1
    609 	vorr	q10,q0,q0
    610 	add	r8, r8, #2
    611 	vorr	q6,q0,q0
    612 	rev	r10, r10
    613 	vmov.32	d3[1],r10
    614 	bls	.Lctr32_tail
    615 	rev	r12, r8
    616 	sub	r2,r2,#3		@ bias
    617 	vmov.32	d21[1],r12
    618 	b	.Loop3x_ctr32
    619 
    620 .align	4
    621 .Loop3x_ctr32:
    622 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    623 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    624 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    625 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    626 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
    627 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    628 	vld1.32	{q8},[r7]!
    629 	subs	r6,r6,#2
    630 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    631 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    632 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    633 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    634 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
    635 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    636 	vld1.32	{q9},[r7]!
    637 	bgt	.Loop3x_ctr32
    638 
    639 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    640 .byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
    641 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    642 .byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
    643 	vld1.8	{q2},[r0]!
    644 	vorr	q0,q6,q6
    645 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
    646 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    647 	vld1.8	{q3},[r0]!
    648 	vorr	q1,q6,q6
    649 .byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
    650 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    651 .byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
    652 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    653 	vld1.8	{q11},[r0]!
    654 	mov	r7,r3
    655 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
    656 .byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
    657 	vorr	q10,q6,q6
    658 	add	r9,r8,#1
    659 .byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
    660 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    661 .byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
    662 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    663 	veor	q2,q2,q7
    664 	add	r10,r8,#2
    665 .byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
    666 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    667 	veor	q3,q3,q7
    668 	add	r8,r8,#3
    669 .byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
    670 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    671 .byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
    672 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    673 	veor	q11,q11,q7
    674 	rev	r9,r9
    675 .byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
    676 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    677 	vmov.32	d1[1], r9
    678 	rev	r10,r10
    679 .byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
    680 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    681 .byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
    682 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    683 	vmov.32	d3[1], r10
    684 	rev	r12,r8
    685 .byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
    686 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    687 	vmov.32	d21[1], r12
    688 	subs	r2,r2,#3
    689 .byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
    690 .byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
    691 .byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
    692 
    693 	veor	q2,q2,q4
    694 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
    695 	vst1.8	{q2},[r1]!
    696 	veor	q3,q3,q5
    697 	mov	r6,r5
    698 	vst1.8	{q3},[r1]!
    699 	veor	q11,q11,q9
    700 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
    701 	vst1.8	{q11},[r1]!
    702 	bhs	.Loop3x_ctr32
    703 
    704 	adds	r2,r2,#3
    705 	beq	.Lctr32_done
    706 	cmp	r2,#1
    707 	mov	r12,#16
    708 	moveq	r12,#0
    709 
    710 .Lctr32_tail:
    711 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    712 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    713 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    714 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    715 	vld1.32	{q8},[r7]!
    716 	subs	r6,r6,#2
    717 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    718 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    719 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    720 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    721 	vld1.32	{q9},[r7]!
    722 	bgt	.Lctr32_tail
    723 
    724 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    725 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    726 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    727 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    728 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    729 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    730 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    731 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    732 	vld1.8	{q2},[r0],r12
    733 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    734 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    735 .byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
    736 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    737 	vld1.8	{q3},[r0]
    738 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    739 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    740 .byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
    741 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    742 	veor	q2,q2,q7
    743 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    744 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    745 .byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
    746 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    747 	veor	q3,q3,q7
    748 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    749 .byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
    750 
    751 	cmp	r2,#1
    752 	veor	q2,q2,q0
    753 	veor	q3,q3,q1
    754 	vst1.8	{q2},[r1]!
    755 	beq	.Lctr32_done
    756 	vst1.8	{q3},[r1]
    757 
    758 .Lctr32_done:
    759 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
    760 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
    761 .size	aes_hw_ctr32_encrypt_blocks,.-aes_hw_ctr32_encrypt_blocks
    762 #endif
    763 #endif
    764