Home | History | Annotate | Download | only in fipsmodule
      1 #include <openssl/arm_arch.h>
      2 
      3 #if __ARM_MAX_ARCH__>=7
      4 .text
      5 
      6 
      7 .code	32
      8 #undef	__thumb2__
      9 .align	5
     10 Lrcon:
     11 .long	0x01,0x01,0x01,0x01
     12 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	@ rotate-n-splat
     13 .long	0x1b,0x1b,0x1b,0x1b
     14 
     15 .globl	_aes_hw_set_encrypt_key
     16 .private_extern	_aes_hw_set_encrypt_key
     17 #ifdef __thumb2__
     18 .thumb_func	_aes_hw_set_encrypt_key
     19 #endif
     20 .align	5
     21 _aes_hw_set_encrypt_key:
     22 Lenc_key:
     23 	mov	r3,#-1
     24 	cmp	r0,#0
     25 	beq	Lenc_key_abort
     26 	cmp	r2,#0
     27 	beq	Lenc_key_abort
     28 	mov	r3,#-2
     29 	cmp	r1,#128
     30 	blt	Lenc_key_abort
     31 	cmp	r1,#256
     32 	bgt	Lenc_key_abort
     33 	tst	r1,#0x3f
     34 	bne	Lenc_key_abort
     35 
     36 	adr	r3,Lrcon
     37 	cmp	r1,#192
     38 
     39 	veor	q0,q0,q0
     40 	vld1.8	{q3},[r0]!
     41 	mov	r1,#8		@ reuse r1
     42 	vld1.32	{q1,q2},[r3]!
     43 
     44 	blt	Loop128
     45 	beq	L192
     46 	b	L256
     47 
     48 .align	4
     49 Loop128:
     50 	vtbl.8	d20,{q3},d4
     51 	vtbl.8	d21,{q3},d5
     52 	vext.8	q9,q0,q3,#12
     53 	vst1.32	{q3},[r2]!
     54 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     55 	subs	r1,r1,#1
     56 
     57 	veor	q3,q3,q9
     58 	vext.8	q9,q0,q9,#12
     59 	veor	q3,q3,q9
     60 	vext.8	q9,q0,q9,#12
     61 	veor	q10,q10,q1
     62 	veor	q3,q3,q9
     63 	vshl.u8	q1,q1,#1
     64 	veor	q3,q3,q10
     65 	bne	Loop128
     66 
     67 	vld1.32	{q1},[r3]
     68 
     69 	vtbl.8	d20,{q3},d4
     70 	vtbl.8	d21,{q3},d5
     71 	vext.8	q9,q0,q3,#12
     72 	vst1.32	{q3},[r2]!
     73 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     74 
     75 	veor	q3,q3,q9
     76 	vext.8	q9,q0,q9,#12
     77 	veor	q3,q3,q9
     78 	vext.8	q9,q0,q9,#12
     79 	veor	q10,q10,q1
     80 	veor	q3,q3,q9
     81 	vshl.u8	q1,q1,#1
     82 	veor	q3,q3,q10
     83 
     84 	vtbl.8	d20,{q3},d4
     85 	vtbl.8	d21,{q3},d5
     86 	vext.8	q9,q0,q3,#12
     87 	vst1.32	{q3},[r2]!
     88 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
     89 
     90 	veor	q3,q3,q9
     91 	vext.8	q9,q0,q9,#12
     92 	veor	q3,q3,q9
     93 	vext.8	q9,q0,q9,#12
     94 	veor	q10,q10,q1
     95 	veor	q3,q3,q9
     96 	veor	q3,q3,q10
     97 	vst1.32	{q3},[r2]
     98 	add	r2,r2,#0x50
     99 
    100 	mov	r12,#10
    101 	b	Ldone
    102 
    103 .align	4
    104 L192:
    105 	vld1.8	{d16},[r0]!
    106 	vmov.i8	q10,#8			@ borrow q10
    107 	vst1.32	{q3},[r2]!
    108 	vsub.i8	q2,q2,q10	@ adjust the mask
    109 
    110 Loop192:
    111 	vtbl.8	d20,{q8},d4
    112 	vtbl.8	d21,{q8},d5
    113 	vext.8	q9,q0,q3,#12
    114 	vst1.32	{d16},[r2]!
    115 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    116 	subs	r1,r1,#1
    117 
    118 	veor	q3,q3,q9
    119 	vext.8	q9,q0,q9,#12
    120 	veor	q3,q3,q9
    121 	vext.8	q9,q0,q9,#12
    122 	veor	q3,q3,q9
    123 
    124 	vdup.32	q9,d7[1]
    125 	veor	q9,q9,q8
    126 	veor	q10,q10,q1
    127 	vext.8	q8,q0,q8,#12
    128 	vshl.u8	q1,q1,#1
    129 	veor	q8,q8,q9
    130 	veor	q3,q3,q10
    131 	veor	q8,q8,q10
    132 	vst1.32	{q3},[r2]!
    133 	bne	Loop192
    134 
    135 	mov	r12,#12
    136 	add	r2,r2,#0x20
    137 	b	Ldone
    138 
    139 .align	4
    140 L256:
    141 	vld1.8	{q8},[r0]
    142 	mov	r1,#7
    143 	mov	r12,#14
    144 	vst1.32	{q3},[r2]!
    145 
    146 Loop256:
    147 	vtbl.8	d20,{q8},d4
    148 	vtbl.8	d21,{q8},d5
    149 	vext.8	q9,q0,q3,#12
    150 	vst1.32	{q8},[r2]!
    151 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    152 	subs	r1,r1,#1
    153 
    154 	veor	q3,q3,q9
    155 	vext.8	q9,q0,q9,#12
    156 	veor	q3,q3,q9
    157 	vext.8	q9,q0,q9,#12
    158 	veor	q10,q10,q1
    159 	veor	q3,q3,q9
    160 	vshl.u8	q1,q1,#1
    161 	veor	q3,q3,q10
    162 	vst1.32	{q3},[r2]!
    163 	beq	Ldone
    164 
    165 	vdup.32	q10,d7[1]
    166 	vext.8	q9,q0,q8,#12
    167 .byte	0x00,0x43,0xf0,0xf3	@ aese q10,q0
    168 
    169 	veor	q8,q8,q9
    170 	vext.8	q9,q0,q9,#12
    171 	veor	q8,q8,q9
    172 	vext.8	q9,q0,q9,#12
    173 	veor	q8,q8,q9
    174 
    175 	veor	q8,q8,q10
    176 	b	Loop256
    177 
    178 Ldone:
    179 	str	r12,[r2]
    180 	mov	r3,#0
    181 
    182 Lenc_key_abort:
    183 	mov	r0,r3			@ return value
    184 
    185 	bx	lr
    186 
    187 
    188 .globl	_aes_hw_set_decrypt_key
    189 .private_extern	_aes_hw_set_decrypt_key
    190 #ifdef __thumb2__
    191 .thumb_func	_aes_hw_set_decrypt_key
    192 #endif
    193 .align	5
    194 _aes_hw_set_decrypt_key:
    195 	stmdb	sp!,{r4,lr}
    196 	bl	Lenc_key
    197 
    198 	cmp	r0,#0
    199 	bne	Ldec_key_abort
    200 
    201 	sub	r2,r2,#240		@ restore original r2
    202 	mov	r4,#-16
    203 	add	r0,r2,r12,lsl#4	@ end of key schedule
    204 
    205 	vld1.32	{q0},[r2]
    206 	vld1.32	{q1},[r0]
    207 	vst1.32	{q0},[r0],r4
    208 	vst1.32	{q1},[r2]!
    209 
    210 Loop_imc:
    211 	vld1.32	{q0},[r2]
    212 	vld1.32	{q1},[r0]
    213 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    214 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    215 	vst1.32	{q0},[r0],r4
    216 	vst1.32	{q1},[r2]!
    217 	cmp	r0,r2
    218 	bhi	Loop_imc
    219 
    220 	vld1.32	{q0},[r2]
    221 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    222 	vst1.32	{q0},[r0]
    223 
    224 	eor	r0,r0,r0		@ return value
    225 Ldec_key_abort:
    226 	ldmia	sp!,{r4,pc}
    227 
    228 .globl	_aes_hw_encrypt
    229 .private_extern	_aes_hw_encrypt
    230 #ifdef __thumb2__
    231 .thumb_func	_aes_hw_encrypt
    232 #endif
    233 .align	5
    234 _aes_hw_encrypt:
    235 	ldr	r3,[r2,#240]
    236 	vld1.32	{q0},[r2]!
    237 	vld1.8	{q2},[r0]
    238 	sub	r3,r3,#2
    239 	vld1.32	{q1},[r2]!
    240 
    241 Loop_enc:
    242 .byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
    243 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    244 	vld1.32	{q0},[r2]!
    245 	subs	r3,r3,#2
    246 .byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
    247 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    248 	vld1.32	{q1},[r2]!
    249 	bgt	Loop_enc
    250 
    251 .byte	0x00,0x43,0xb0,0xf3	@ aese q2,q0
    252 .byte	0x84,0x43,0xb0,0xf3	@ aesmc q2,q2
    253 	vld1.32	{q0},[r2]
    254 .byte	0x02,0x43,0xb0,0xf3	@ aese q2,q1
    255 	veor	q2,q2,q0
    256 
    257 	vst1.8	{q2},[r1]
    258 	bx	lr
    259 
    260 .globl	_aes_hw_decrypt
    261 .private_extern	_aes_hw_decrypt
    262 #ifdef __thumb2__
    263 .thumb_func	_aes_hw_decrypt
    264 #endif
    265 .align	5
    266 _aes_hw_decrypt:
    267 	ldr	r3,[r2,#240]
    268 	vld1.32	{q0},[r2]!
    269 	vld1.8	{q2},[r0]
    270 	sub	r3,r3,#2
    271 	vld1.32	{q1},[r2]!
    272 
    273 Loop_dec:
    274 .byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
    275 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    276 	vld1.32	{q0},[r2]!
    277 	subs	r3,r3,#2
    278 .byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
    279 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    280 	vld1.32	{q1},[r2]!
    281 	bgt	Loop_dec
    282 
    283 .byte	0x40,0x43,0xb0,0xf3	@ aesd q2,q0
    284 .byte	0xc4,0x43,0xb0,0xf3	@ aesimc q2,q2
    285 	vld1.32	{q0},[r2]
    286 .byte	0x42,0x43,0xb0,0xf3	@ aesd q2,q1
    287 	veor	q2,q2,q0
    288 
    289 	vst1.8	{q2},[r1]
    290 	bx	lr
    291 
    292 .globl	_aes_hw_cbc_encrypt
    293 .private_extern	_aes_hw_cbc_encrypt
    294 #ifdef __thumb2__
    295 .thumb_func	_aes_hw_cbc_encrypt
    296 #endif
    297 .align	5
    298 _aes_hw_cbc_encrypt:
    299 	mov	ip,sp
    300 	stmdb	sp!,{r4,r5,r6,r7,r8,lr}
    301 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
    302 	ldmia	ip,{r4,r5}		@ load remaining args
    303 	subs	r2,r2,#16
    304 	mov	r8,#16
    305 	blo	Lcbc_abort
    306 	moveq	r8,#0
    307 
    308 	cmp	r5,#0			@ en- or decrypting?
    309 	ldr	r5,[r3,#240]
    310 	and	r2,r2,#-16
    311 	vld1.8	{q6},[r4]
    312 	vld1.8	{q0},[r0],r8
    313 
    314 	vld1.32	{q8,q9},[r3]		@ load key schedule...
    315 	sub	r5,r5,#6
    316 	add	r7,r3,r5,lsl#4	@ pointer to last 7 round keys
    317 	sub	r5,r5,#2
    318 	vld1.32	{q10,q11},[r7]!
    319 	vld1.32	{q12,q13},[r7]!
    320 	vld1.32	{q14,q15},[r7]!
    321 	vld1.32	{q7},[r7]
    322 
    323 	add	r7,r3,#32
    324 	mov	r6,r5
    325 	beq	Lcbc_dec
    326 
    327 	cmp	r5,#2
    328 	veor	q0,q0,q6
    329 	veor	q5,q8,q7
    330 	beq	Lcbc_enc128
    331 
    332 	vld1.32	{q2,q3},[r7]
    333 	add	r7,r3,#16
    334 	add	r6,r3,#16*4
    335 	add	r12,r3,#16*5
    336 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    337 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    338 	add	r14,r3,#16*6
    339 	add	r3,r3,#16*7
    340 	b	Lenter_cbc_enc
    341 
    342 .align	4
    343 Loop_cbc_enc:
    344 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    345 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    346 	vst1.8	{q6},[r1]!
    347 Lenter_cbc_enc:
    348 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    349 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    350 .byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
    351 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    352 	vld1.32	{q8},[r6]
    353 	cmp	r5,#4
    354 .byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
    355 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    356 	vld1.32	{q9},[r12]
    357 	beq	Lcbc_enc192
    358 
    359 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    360 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    361 	vld1.32	{q8},[r14]
    362 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    363 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    364 	vld1.32	{q9},[r3]
    365 	nop
    366 
    367 Lcbc_enc192:
    368 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    369 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    370 	subs	r2,r2,#16
    371 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    372 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    373 	moveq	r8,#0
    374 .byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
    375 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    376 .byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
    377 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    378 	vld1.8	{q8},[r0],r8
    379 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    380 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    381 	veor	q8,q8,q5
    382 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    383 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    384 	vld1.32	{q9},[r7]		@ re-pre-load rndkey[1]
    385 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    386 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    387 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    388 	veor	q6,q0,q7
    389 	bhs	Loop_cbc_enc
    390 
    391 	vst1.8	{q6},[r1]!
    392 	b	Lcbc_done
    393 
    394 .align	5
    395 Lcbc_enc128:
    396 	vld1.32	{q2,q3},[r7]
    397 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    398 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    399 	b	Lenter_cbc_enc128
    400 Loop_cbc_enc128:
    401 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    402 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    403 	vst1.8	{q6},[r1]!
    404 Lenter_cbc_enc128:
    405 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    406 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    407 	subs	r2,r2,#16
    408 .byte	0x04,0x03,0xb0,0xf3	@ aese q0,q2
    409 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    410 	moveq	r8,#0
    411 .byte	0x06,0x03,0xb0,0xf3	@ aese q0,q3
    412 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    413 .byte	0x24,0x03,0xb0,0xf3	@ aese q0,q10
    414 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    415 .byte	0x26,0x03,0xb0,0xf3	@ aese q0,q11
    416 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    417 	vld1.8	{q8},[r0],r8
    418 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    419 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    420 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    421 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    422 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    423 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    424 	veor	q8,q8,q5
    425 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    426 	veor	q6,q0,q7
    427 	bhs	Loop_cbc_enc128
    428 
    429 	vst1.8	{q6},[r1]!
    430 	b	Lcbc_done
    431 .align	5
    432 Lcbc_dec:
    433 	vld1.8	{q10},[r0]!
    434 	subs	r2,r2,#32		@ bias
    435 	add	r6,r5,#2
    436 	vorr	q3,q0,q0
    437 	vorr	q1,q0,q0
    438 	vorr	q11,q10,q10
    439 	blo	Lcbc_dec_tail
    440 
    441 	vorr	q1,q10,q10
    442 	vld1.8	{q10},[r0]!
    443 	vorr	q2,q0,q0
    444 	vorr	q3,q1,q1
    445 	vorr	q11,q10,q10
    446 
    447 Loop3x_cbc_dec:
    448 .byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
    449 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    450 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    451 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    452 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    453 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    454 	vld1.32	{q8},[r7]!
    455 	subs	r6,r6,#2
    456 .byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
    457 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    458 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    459 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    460 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    461 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    462 	vld1.32	{q9},[r7]!
    463 	bgt	Loop3x_cbc_dec
    464 
    465 .byte	0x60,0x03,0xb0,0xf3	@ aesd q0,q8
    466 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    467 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    468 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    469 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    470 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    471 	veor	q4,q6,q7
    472 	subs	r2,r2,#0x30
    473 	veor	q5,q2,q7
    474 	movlo	r6,r2			@ r6, r6, is zero at this point
    475 .byte	0x62,0x03,0xb0,0xf3	@ aesd q0,q9
    476 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    477 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    478 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    479 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    480 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    481 	veor	q9,q3,q7
    482 	add	r0,r0,r6		@ r0 is adjusted in such way that
    483 					@ at exit from the loop q1-q10
    484 					@ are loaded with last "words"
    485 	vorr	q6,q11,q11
    486 	mov	r7,r3
    487 .byte	0x68,0x03,0xb0,0xf3	@ aesd q0,q12
    488 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    489 .byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
    490 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    491 .byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
    492 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    493 	vld1.8	{q2},[r0]!
    494 .byte	0x6a,0x03,0xb0,0xf3	@ aesd q0,q13
    495 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    496 .byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
    497 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    498 .byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
    499 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    500 	vld1.8	{q3},[r0]!
    501 .byte	0x6c,0x03,0xb0,0xf3	@ aesd q0,q14
    502 .byte	0xc0,0x03,0xb0,0xf3	@ aesimc q0,q0
    503 .byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
    504 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    505 .byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
    506 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    507 	vld1.8	{q11},[r0]!
    508 .byte	0x6e,0x03,0xb0,0xf3	@ aesd q0,q15
    509 .byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
    510 .byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
    511 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
    512 	add	r6,r5,#2
    513 	veor	q4,q4,q0
    514 	veor	q5,q5,q1
    515 	veor	q10,q10,q9
    516 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
    517 	vst1.8	{q4},[r1]!
    518 	vorr	q0,q2,q2
    519 	vst1.8	{q5},[r1]!
    520 	vorr	q1,q3,q3
    521 	vst1.8	{q10},[r1]!
    522 	vorr	q10,q11,q11
    523 	bhs	Loop3x_cbc_dec
    524 
    525 	cmn	r2,#0x30
    526 	beq	Lcbc_done
    527 	nop
    528 
    529 Lcbc_dec_tail:
    530 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    531 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    532 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    533 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    534 	vld1.32	{q8},[r7]!
    535 	subs	r6,r6,#2
    536 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    537 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    538 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    539 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    540 	vld1.32	{q9},[r7]!
    541 	bgt	Lcbc_dec_tail
    542 
    543 .byte	0x60,0x23,0xb0,0xf3	@ aesd q1,q8
    544 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    545 .byte	0x60,0x43,0xf0,0xf3	@ aesd q10,q8
    546 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    547 .byte	0x62,0x23,0xb0,0xf3	@ aesd q1,q9
    548 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    549 .byte	0x62,0x43,0xf0,0xf3	@ aesd q10,q9
    550 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    551 .byte	0x68,0x23,0xb0,0xf3	@ aesd q1,q12
    552 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    553 .byte	0x68,0x43,0xf0,0xf3	@ aesd q10,q12
    554 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    555 	cmn	r2,#0x20
    556 .byte	0x6a,0x23,0xb0,0xf3	@ aesd q1,q13
    557 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    558 .byte	0x6a,0x43,0xf0,0xf3	@ aesd q10,q13
    559 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    560 	veor	q5,q6,q7
    561 .byte	0x6c,0x23,0xb0,0xf3	@ aesd q1,q14
    562 .byte	0xc2,0x23,0xb0,0xf3	@ aesimc q1,q1
    563 .byte	0x6c,0x43,0xf0,0xf3	@ aesd q10,q14
    564 .byte	0xe4,0x43,0xf0,0xf3	@ aesimc q10,q10
    565 	veor	q9,q3,q7
    566 .byte	0x6e,0x23,0xb0,0xf3	@ aesd q1,q15
    567 .byte	0x6e,0x43,0xf0,0xf3	@ aesd q10,q15
    568 	beq	Lcbc_dec_one
    569 	veor	q5,q5,q1
    570 	veor	q9,q9,q10
    571 	vorr	q6,q11,q11
    572 	vst1.8	{q5},[r1]!
    573 	vst1.8	{q9},[r1]!
    574 	b	Lcbc_done
    575 
    576 Lcbc_dec_one:
    577 	veor	q5,q5,q10
    578 	vorr	q6,q11,q11
    579 	vst1.8	{q5},[r1]!
    580 
    581 Lcbc_done:
    582 	vst1.8	{q6},[r4]
    583 Lcbc_abort:
    584 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
    585 	ldmia	sp!,{r4,r5,r6,r7,r8,pc}
    586 
    587 .globl	_aes_hw_ctr32_encrypt_blocks
    588 .private_extern	_aes_hw_ctr32_encrypt_blocks
    589 #ifdef __thumb2__
    590 .thumb_func	_aes_hw_ctr32_encrypt_blocks
    591 #endif
    592 .align	5
    593 _aes_hw_ctr32_encrypt_blocks:
    594 	mov	ip,sp
    595 	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,lr}
    596 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}            @ ABI specification says so
    597 	ldr	r4, [ip]		@ load remaining arg
    598 	ldr	r5,[r3,#240]
    599 
    600 	ldr	r8, [r4, #12]
    601 	vld1.32	{q0},[r4]
    602 
    603 	vld1.32	{q8,q9},[r3]		@ load key schedule...
    604 	sub	r5,r5,#4
    605 	mov	r12,#16
    606 	cmp	r2,#2
    607 	add	r7,r3,r5,lsl#4	@ pointer to last 5 round keys
    608 	sub	r5,r5,#2
    609 	vld1.32	{q12,q13},[r7]!
    610 	vld1.32	{q14,q15},[r7]!
    611 	vld1.32	{q7},[r7]
    612 	add	r7,r3,#32
    613 	mov	r6,r5
    614 	movlo	r12,#0
    615 #ifndef __ARMEB__
    616 	rev	r8, r8
    617 #endif
    618 	vorr	q1,q0,q0
    619 	add	r10, r8, #1
    620 	vorr	q10,q0,q0
    621 	add	r8, r8, #2
    622 	vorr	q6,q0,q0
    623 	rev	r10, r10
    624 	vmov.32	d3[1],r10
    625 	bls	Lctr32_tail
    626 	rev	r12, r8
    627 	sub	r2,r2,#3		@ bias
    628 	vmov.32	d21[1],r12
    629 	b	Loop3x_ctr32
    630 
    631 .align	4
    632 Loop3x_ctr32:
    633 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    634 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    635 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    636 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    637 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
    638 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    639 	vld1.32	{q8},[r7]!
    640 	subs	r6,r6,#2
    641 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    642 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    643 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    644 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    645 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
    646 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    647 	vld1.32	{q9},[r7]!
    648 	bgt	Loop3x_ctr32
    649 
    650 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    651 .byte	0x80,0x83,0xb0,0xf3	@ aesmc q4,q0
    652 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    653 .byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
    654 	vld1.8	{q2},[r0]!
    655 	vorr	q0,q6,q6
    656 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
    657 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
    658 	vld1.8	{q3},[r0]!
    659 	vorr	q1,q6,q6
    660 .byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
    661 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    662 .byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
    663 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    664 	vld1.8	{q11},[r0]!
    665 	mov	r7,r3
    666 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
    667 .byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
    668 	vorr	q10,q6,q6
    669 	add	r9,r8,#1
    670 .byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
    671 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    672 .byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
    673 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    674 	veor	q2,q2,q7
    675 	add	r10,r8,#2
    676 .byte	0x28,0x23,0xf0,0xf3	@ aese q9,q12
    677 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    678 	veor	q3,q3,q7
    679 	add	r8,r8,#3
    680 .byte	0x2a,0x83,0xb0,0xf3	@ aese q4,q13
    681 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    682 .byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
    683 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    684 	veor	q11,q11,q7
    685 	rev	r9,r9
    686 .byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
    687 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    688 	vmov.32	d1[1], r9
    689 	rev	r10,r10
    690 .byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
    691 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
    692 .byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
    693 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
    694 	vmov.32	d3[1], r10
    695 	rev	r12,r8
    696 .byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
    697 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
    698 	vmov.32	d21[1], r12
    699 	subs	r2,r2,#3
    700 .byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
    701 .byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
    702 .byte	0x2e,0x23,0xf0,0xf3	@ aese q9,q15
    703 
    704 	veor	q2,q2,q4
    705 	vld1.32	{q8},[r7]!	@ re-pre-load rndkey[0]
    706 	vst1.8	{q2},[r1]!
    707 	veor	q3,q3,q5
    708 	mov	r6,r5
    709 	vst1.8	{q3},[r1]!
    710 	veor	q11,q11,q9
    711 	vld1.32	{q9},[r7]!	@ re-pre-load rndkey[1]
    712 	vst1.8	{q11},[r1]!
    713 	bhs	Loop3x_ctr32
    714 
    715 	adds	r2,r2,#3
    716 	beq	Lctr32_done
    717 	cmp	r2,#1
    718 	mov	r12,#16
    719 	moveq	r12,#0
    720 
    721 Lctr32_tail:
    722 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    723 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    724 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    725 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    726 	vld1.32	{q8},[r7]!
    727 	subs	r6,r6,#2
    728 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    729 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    730 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    731 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    732 	vld1.32	{q9},[r7]!
    733 	bgt	Lctr32_tail
    734 
    735 .byte	0x20,0x03,0xb0,0xf3	@ aese q0,q8
    736 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    737 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
    738 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    739 .byte	0x22,0x03,0xb0,0xf3	@ aese q0,q9
    740 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    741 .byte	0x22,0x23,0xb0,0xf3	@ aese q1,q9
    742 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    743 	vld1.8	{q2},[r0],r12
    744 .byte	0x28,0x03,0xb0,0xf3	@ aese q0,q12
    745 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    746 .byte	0x28,0x23,0xb0,0xf3	@ aese q1,q12
    747 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    748 	vld1.8	{q3},[r0]
    749 .byte	0x2a,0x03,0xb0,0xf3	@ aese q0,q13
    750 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    751 .byte	0x2a,0x23,0xb0,0xf3	@ aese q1,q13
    752 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    753 	veor	q2,q2,q7
    754 .byte	0x2c,0x03,0xb0,0xf3	@ aese q0,q14
    755 .byte	0x80,0x03,0xb0,0xf3	@ aesmc q0,q0
    756 .byte	0x2c,0x23,0xb0,0xf3	@ aese q1,q14
    757 .byte	0x82,0x23,0xb0,0xf3	@ aesmc q1,q1
    758 	veor	q3,q3,q7
    759 .byte	0x2e,0x03,0xb0,0xf3	@ aese q0,q15
    760 .byte	0x2e,0x23,0xb0,0xf3	@ aese q1,q15
    761 
    762 	cmp	r2,#1
    763 	veor	q2,q2,q0
    764 	veor	q3,q3,q1
    765 	vst1.8	{q2},[r1]!
    766 	beq	Lctr32_done
    767 	vst1.8	{q3},[r1]
    768 
    769 Lctr32_done:
    770 	vldmia	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}
    771 	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,pc}
    772 
    773 #endif
    774