1 #include "arm_arch.h" 2 3 #if __ARM_ARCH__>=7 4 .text 5 .fpu neon 6 .code 32 7 .align 5 8 rcon: 9 .long 0x01,0x01,0x01,0x01 10 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d @ rotate-n-splat 11 .long 0x1b,0x1b,0x1b,0x1b 12 13 .globl aes_v8_set_encrypt_key 14 .type aes_v8_set_encrypt_key,%function 15 .align 5 16 aes_v8_set_encrypt_key: 17 .Lenc_key: 18 adr r3,rcon 19 cmp r1,#192 20 21 veor q0,q0,q0 22 vld1.8 {q3},[r0]! 23 mov r1,#8 @ reuse r1 24 vld1.32 {q1,q2},[r3]! 25 26 blt .Loop128 27 beq .L192 28 b .L256 29 30 .align 4 31 .Loop128: 32 vtbl.8 d20,{q3},d4 33 vtbl.8 d21,{q3},d5 34 vext.8 q9,q0,q3,#12 35 vst1.32 {q3},[r2]! 36 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 37 subs r1,r1,#1 38 39 veor q3,q3,q9 40 vext.8 q9,q0,q9,#12 41 veor q3,q3,q9 42 vext.8 q9,q0,q9,#12 43 veor q10,q10,q1 44 veor q3,q3,q9 45 vshl.u8 q1,q1,#1 46 veor q3,q3,q10 47 bne .Loop128 48 49 vld1.32 {q1},[r3] 50 51 vtbl.8 d20,{q3},d4 52 vtbl.8 d21,{q3},d5 53 vext.8 q9,q0,q3,#12 54 vst1.32 {q3},[r2]! 55 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 56 57 veor q3,q3,q9 58 vext.8 q9,q0,q9,#12 59 veor q3,q3,q9 60 vext.8 q9,q0,q9,#12 61 veor q10,q10,q1 62 veor q3,q3,q9 63 vshl.u8 q1,q1,#1 64 veor q3,q3,q10 65 66 vtbl.8 d20,{q3},d4 67 vtbl.8 d21,{q3},d5 68 vext.8 q9,q0,q3,#12 69 vst1.32 {q3},[r2]! 70 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 71 72 veor q3,q3,q9 73 vext.8 q9,q0,q9,#12 74 veor q3,q3,q9 75 vext.8 q9,q0,q9,#12 76 veor q10,q10,q1 77 veor q3,q3,q9 78 veor q3,q3,q10 79 vst1.32 {q3},[r2] 80 add r2,r2,#0x50 81 82 mov r12,#10 83 b .Ldone 84 85 .align 4 86 .L192: 87 vld1.8 {d16},[r0]! 88 vmov.i8 q10,#8 @ borrow q10 89 vst1.32 {q3},[r2]! 90 vsub.i8 q2,q2,q10 @ adjust the mask 91 92 .Loop192: 93 vtbl.8 d20,{q8},d4 94 vtbl.8 d21,{q8},d5 95 vext.8 q9,q0,q3,#12 96 vst1.32 {d16},[r2]! 97 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 98 subs r1,r1,#1 99 100 veor q3,q3,q9 101 vext.8 q9,q0,q9,#12 102 veor q3,q3,q9 103 vext.8 q9,q0,q9,#12 104 veor q3,q3,q9 105 106 vdup.32 q9,d7[1] 107 veor q9,q9,q8 108 veor q10,q10,q1 109 vext.8 q8,q0,q8,#12 110 vshl.u8 q1,q1,#1 111 veor q8,q8,q9 112 veor q3,q3,q10 113 veor q8,q8,q10 114 vst1.32 {q3},[r2]! 115 bne .Loop192 116 117 mov r12,#12 118 add r2,r2,#0x20 119 b .Ldone 120 121 .align 4 122 .L256: 123 vld1.8 {q8},[r0] 124 mov r1,#7 125 mov r12,#14 126 vst1.32 {q3},[r2]! 127 128 .Loop256: 129 vtbl.8 d20,{q8},d4 130 vtbl.8 d21,{q8},d5 131 vext.8 q9,q0,q3,#12 132 vst1.32 {q8},[r2]! 133 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 134 subs r1,r1,#1 135 136 veor q3,q3,q9 137 vext.8 q9,q0,q9,#12 138 veor q3,q3,q9 139 vext.8 q9,q0,q9,#12 140 veor q10,q10,q1 141 veor q3,q3,q9 142 vshl.u8 q1,q1,#1 143 veor q3,q3,q10 144 vst1.32 {q3},[r2]! 145 beq .Ldone 146 147 vdup.32 q10,d7[1] 148 vext.8 q9,q0,q8,#12 149 .byte 0x00,0x43,0xf0,0xf3 @ aese q10,q0 150 151 veor q8,q8,q9 152 vext.8 q9,q0,q9,#12 153 veor q8,q8,q9 154 vext.8 q9,q0,q9,#12 155 veor q8,q8,q9 156 157 veor q8,q8,q10 158 b .Loop256 159 160 .Ldone: 161 str r12,[r2] 162 163 eor r0,r0,r0 @ return value 164 165 bx lr 166 .size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key 167 168 .globl aes_v8_set_decrypt_key 169 .type aes_v8_set_decrypt_key,%function 170 .align 5 171 aes_v8_set_decrypt_key: 172 stmdb sp!,{r4,lr} 173 bl .Lenc_key 174 175 sub r2,r2,#240 @ restore original r2 176 mov r4,#-16 177 add r0,r2,r12,lsl#4 @ end of key schedule 178 179 vld1.32 {q0},[r2] 180 vld1.32 {q1},[r0] 181 vst1.32 {q0},[r0],r4 182 vst1.32 {q1},[r2]! 183 184 .Loop_imc: 185 vld1.32 {q0},[r2] 186 vld1.32 {q1},[r0] 187 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 188 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 189 vst1.32 {q0},[r0],r4 190 vst1.32 {q1},[r2]! 191 cmp r0,r2 192 bhi .Loop_imc 193 194 vld1.32 {q0},[r2] 195 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 196 vst1.32 {q0},[r0] 197 198 eor r0,r0,r0 @ return value 199 ldmia sp!,{r4,pc} 200 .size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key 201 .globl aes_v8_encrypt 202 .type aes_v8_encrypt,%function 203 .align 5 204 aes_v8_encrypt: 205 ldr r3,[r2,#240] 206 vld1.32 {q0},[r2]! 207 vld1.8 {q2},[r0] 208 sub r3,r3,#2 209 vld1.32 {q1},[r2]! 210 211 .Loop_enc: 212 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 213 vld1.32 {q0},[r2]! 214 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 215 subs r3,r3,#2 216 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 217 vld1.32 {q1},[r2]! 218 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 219 bgt .Loop_enc 220 221 .byte 0x00,0x43,0xb0,0xf3 @ aese q2,q0 222 vld1.32 {q0},[r2] 223 .byte 0x84,0x43,0xb0,0xf3 @ aesmc q2,q2 224 .byte 0x02,0x43,0xb0,0xf3 @ aese q2,q1 225 veor q2,q2,q0 226 227 vst1.8 {q2},[r1] 228 bx lr 229 .size aes_v8_encrypt,.-aes_v8_encrypt 230 .globl aes_v8_decrypt 231 .type aes_v8_decrypt,%function 232 .align 5 233 aes_v8_decrypt: 234 ldr r3,[r2,#240] 235 vld1.32 {q0},[r2]! 236 vld1.8 {q2},[r0] 237 sub r3,r3,#2 238 vld1.32 {q1},[r2]! 239 240 .Loop_dec: 241 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 242 vld1.32 {q0},[r2]! 243 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 244 subs r3,r3,#2 245 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 246 vld1.32 {q1},[r2]! 247 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 248 bgt .Loop_dec 249 250 .byte 0x40,0x43,0xb0,0xf3 @ aesd q2,q0 251 vld1.32 {q0},[r2] 252 .byte 0xc4,0x43,0xb0,0xf3 @ aesimc q2,q2 253 .byte 0x42,0x43,0xb0,0xf3 @ aesd q2,q1 254 veor q2,q2,q0 255 256 vst1.8 {q2},[r1] 257 bx lr 258 .size aes_v8_decrypt,.-aes_v8_decrypt 259 .globl aes_v8_cbc_encrypt 260 .type aes_v8_cbc_encrypt,%function 261 .align 5 262 aes_v8_cbc_encrypt: 263 mov ip,sp 264 stmdb sp!,{r4-r8,lr} 265 vstmdb sp!,{d8-d15} @ ABI specification says so 266 ldmia ip,{r4-r5} @ load remaining args 267 subs r2,r2,#16 268 mov r8,#16 269 blo .Lcbc_abort 270 moveq r8,#0 271 272 cmp r5,#0 @ en- or decrypting? 273 ldr r5,[r3,#240] 274 and r2,r2,#-16 275 vld1.8 {q6},[r4] 276 vld1.8 {q0},[r0],r8 277 278 vld1.32 {q8-q9},[r3] @ load key schedule... 279 sub r5,r5,#6 280 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 281 sub r5,r5,#2 282 vld1.32 {q10-q11},[r7]! 283 vld1.32 {q12-q13},[r7]! 284 vld1.32 {q14-q15},[r7]! 285 vld1.32 {q7},[r7] 286 287 add r7,r3,#32 288 mov r6,r5 289 beq .Lcbc_dec 290 291 cmp r5,#2 292 veor q0,q0,q6 293 veor q5,q8,q7 294 beq .Lcbc_enc128 295 296 .Loop_cbc_enc: 297 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 298 vld1.32 {q8},[r7]! 299 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 300 subs r6,r6,#2 301 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 302 vld1.32 {q9},[r7]! 303 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 304 bgt .Loop_cbc_enc 305 306 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 307 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 308 subs r2,r2,#16 309 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 310 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 311 moveq r8,#0 312 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 313 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 314 add r7,r3,#16 315 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 316 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 317 vld1.8 {q8},[r0],r8 318 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 319 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 320 veor q8,q8,q5 321 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 322 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 323 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 324 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 325 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 326 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 327 328 mov r6,r5 329 veor q6,q0,q7 330 vst1.8 {q6},[r1]! 331 bhs .Loop_cbc_enc 332 333 b .Lcbc_done 334 335 .align 5 336 .Lcbc_enc128: 337 vld1.32 {q2-q3},[r7] 338 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 339 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 340 b .Lenter_cbc_enc128 341 .Loop_cbc_enc128: 342 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 343 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 344 vst1.8 {q6},[r1]! 345 .Lenter_cbc_enc128: 346 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 347 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 348 subs r2,r2,#16 349 .byte 0x04,0x03,0xb0,0xf3 @ aese q0,q2 350 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 351 moveq r8,#0 352 .byte 0x06,0x03,0xb0,0xf3 @ aese q0,q3 353 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 354 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 355 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 356 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 357 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 358 vld1.8 {q8},[r0],r8 359 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 360 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 361 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 362 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 363 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 364 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 365 veor q8,q8,q5 366 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 367 veor q6,q0,q7 368 bhs .Loop_cbc_enc128 369 370 vst1.8 {q6},[r1]! 371 b .Lcbc_done 372 373 .align 5 374 .Lcbc_dec128: 375 vld1.32 {q4-q5},[r7] 376 veor q6,q6,q7 377 veor q2,q0,q7 378 mov r12,r8 379 380 .Loop2x_cbc_dec128: 381 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 382 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 383 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 384 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 385 subs r2,r2,#32 386 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 387 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 388 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 389 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 390 movlo r8,#0 391 .byte 0x48,0x03,0xb0,0xf3 @ aesd q0,q4 392 .byte 0x48,0x23,0xb0,0xf3 @ aesd q1,q4 393 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 394 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 395 movls r12,#0 396 .byte 0x4a,0x03,0xb0,0xf3 @ aesd q0,q5 397 .byte 0x4a,0x23,0xb0,0xf3 @ aesd q1,q5 398 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 399 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 400 .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 401 .byte 0x64,0x23,0xb0,0xf3 @ aesd q1,q10 402 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 403 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 404 .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 405 .byte 0x66,0x23,0xb0,0xf3 @ aesd q1,q11 406 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 407 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 408 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 409 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 410 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 411 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 412 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 413 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 414 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 415 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 416 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 417 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 418 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 419 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 420 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 421 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 422 423 veor q6,q6,q0 424 vld1.8 {q0},[r0],r8 425 veor q2,q2,q1 426 vld1.8 {q1},[r0],r12 427 vst1.8 {q6},[r1]! 428 veor q6,q3,q7 429 vst1.8 {q2},[r1]! 430 veor q2,q0,q7 431 vorr q3,q1,q1 432 bhs .Loop2x_cbc_dec128 433 434 adds r2,r2,#32 435 veor q6,q6,q7 436 beq .Lcbc_done 437 veor q2,q2,q7 438 b .Lcbc_dec_tail 439 440 .align 5 441 .Lcbc_dec: 442 subs r2,r2,#16 443 vorr q2,q0,q0 444 blo .Lcbc_dec_tail 445 446 moveq r8,#0 447 cmp r5,#2 448 vld1.8 {q1},[r0],r8 449 vorr q3,q1,q1 450 beq .Lcbc_dec128 451 452 .Loop2x_cbc_dec: 453 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 454 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 455 vld1.32 {q8},[r7]! 456 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 457 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 458 subs r6,r6,#2 459 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 460 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 461 vld1.32 {q9},[r7]! 462 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 463 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 464 bgt .Loop2x_cbc_dec 465 466 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 467 .byte 0x60,0x23,0xb0,0xf3 @ aesd q1,q8 468 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 469 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 470 veor q4,q6,q7 471 veor q5,q2,q7 472 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 473 .byte 0x62,0x23,0xb0,0xf3 @ aesd q1,q9 474 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 475 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 476 vorr q6,q3,q3 477 subs r2,r2,#32 478 .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 479 .byte 0x64,0x23,0xb0,0xf3 @ aesd q1,q10 480 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 481 movlo r8,#0 482 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 483 mov r7,r3 484 .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 485 .byte 0x66,0x23,0xb0,0xf3 @ aesd q1,q11 486 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 487 vld1.8 {q2},[r0],r8 488 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 489 movls r8,#0 490 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 491 .byte 0x68,0x23,0xb0,0xf3 @ aesd q1,q12 492 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 493 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 494 vld1.8 {q3},[r0],r8 495 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 496 .byte 0x6a,0x23,0xb0,0xf3 @ aesd q1,q13 497 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 498 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 499 vld1.32 {q8},[r7]! @ re-pre-load rndkey[0] 500 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 501 .byte 0x6c,0x23,0xb0,0xf3 @ aesd q1,q14 502 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 503 .byte 0xc2,0x23,0xb0,0xf3 @ aesimc q1,q1 504 vld1.32 {q9},[r7]! @ re-pre-load rndkey[1] 505 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 506 .byte 0x6e,0x23,0xb0,0xf3 @ aesd q1,q15 507 508 mov r6,r5 509 veor q4,q4,q0 510 veor q5,q5,q1 511 vorr q0,q2,q2 512 vst1.8 {q4},[r1]! 513 vorr q1,q3,q3 514 vst1.8 {q5},[r1]! 515 bhs .Loop2x_cbc_dec 516 517 adds r2,r2,#32 518 beq .Lcbc_done 519 520 .Lcbc_dec_tail: 521 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 522 vld1.32 {q8},[r7]! 523 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 524 subs r6,r6,#2 525 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 526 vld1.32 {q9},[r7]! 527 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 528 bgt .Lcbc_dec_tail 529 530 .byte 0x60,0x03,0xb0,0xf3 @ aesd q0,q8 531 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 532 .byte 0x62,0x03,0xb0,0xf3 @ aesd q0,q9 533 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 534 veor q4,q6,q7 535 .byte 0x64,0x03,0xb0,0xf3 @ aesd q0,q10 536 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 537 vorr q6,q2,q2 538 .byte 0x66,0x03,0xb0,0xf3 @ aesd q0,q11 539 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 540 .byte 0x68,0x03,0xb0,0xf3 @ aesd q0,q12 541 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 542 .byte 0x6a,0x03,0xb0,0xf3 @ aesd q0,q13 543 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 544 .byte 0x6c,0x03,0xb0,0xf3 @ aesd q0,q14 545 .byte 0xc0,0x03,0xb0,0xf3 @ aesimc q0,q0 546 .byte 0x6e,0x03,0xb0,0xf3 @ aesd q0,q15 547 548 veor q4,q4,q0 549 vst1.8 {q4},[r1]! 550 551 .Lcbc_done: 552 vst1.8 {q6},[r4] 553 .Lcbc_abort: 554 vldmia sp!,{d8-d15} 555 ldmia sp!,{r4-r8,pc} 556 .size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt 557 .globl aes_v8_ctr32_encrypt_blocks 558 .type aes_v8_ctr32_encrypt_blocks,%function 559 .align 5 560 aes_v8_ctr32_encrypt_blocks: 561 mov ip,sp 562 stmdb sp!,{r4-r10,lr} 563 vstmdb sp!,{d8-d15} @ ABI specification says so 564 ldr r4, [ip] @ load remaining arg 565 ldr r5,[r3,#240] 566 567 ldr r8, [r4, #12] 568 vld1.32 {q0},[r4] 569 570 vld1.32 {q8-q9},[r3] @ load key schedule... 571 sub r5,r5,#6 572 add r7,r3,r5,lsl#4 @ pointer to last 7 round keys 573 sub r5,r5,#2 574 vld1.32 {q10-q11},[r7]! 575 vld1.32 {q12-q13},[r7]! 576 vld1.32 {q14-q15},[r7]! 577 vld1.32 {q7},[r7] 578 579 add r7,r3,#32 580 mov r6,r5 581 582 subs r2,r2,#2 583 blo .Lctr32_tail 584 585 #ifndef __ARMEB__ 586 rev r8, r8 587 #endif 588 vorr q1,q0,q0 589 add r8, r8, #1 590 vorr q6,q0,q0 591 rev r10, r8 592 cmp r5,#2 593 vmov.32 d3[1],r10 594 beq .Lctr32_128 595 596 .Loop2x_ctr32: 597 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 598 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 599 vld1.32 {q8},[r7]! 600 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 601 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 602 subs r6,r6,#2 603 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 604 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 605 vld1.32 {q9},[r7]! 606 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 607 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 608 bgt .Loop2x_ctr32 609 610 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 611 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 612 .byte 0x80,0x83,0xb0,0xf3 @ aesmc q4,q0 613 vorr q0,q6,q6 614 .byte 0x82,0xa3,0xb0,0xf3 @ aesmc q5,q1 615 vorr q1,q6,q6 616 .byte 0x22,0x83,0xb0,0xf3 @ aese q4,q9 617 .byte 0x22,0xa3,0xb0,0xf3 @ aese q5,q9 618 vld1.8 {q2},[r0]! 619 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 620 vld1.8 {q3},[r0]! 621 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 622 add r8,r8,#1 623 .byte 0x24,0x83,0xb0,0xf3 @ aese q4,q10 624 .byte 0x24,0xa3,0xb0,0xf3 @ aese q5,q10 625 rev r9,r8 626 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 627 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 628 add r8,r8,#1 629 .byte 0x26,0x83,0xb0,0xf3 @ aese q4,q11 630 .byte 0x26,0xa3,0xb0,0xf3 @ aese q5,q11 631 veor q2,q2,q7 632 rev r10,r8 633 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 634 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 635 veor q3,q3,q7 636 mov r7,r3 637 .byte 0x28,0x83,0xb0,0xf3 @ aese q4,q12 638 .byte 0x28,0xa3,0xb0,0xf3 @ aese q5,q12 639 subs r2,r2,#2 640 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 641 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 642 vld1.32 {q8-q9},[r7]! @ re-pre-load rndkey[0-1] 643 .byte 0x2a,0x83,0xb0,0xf3 @ aese q4,q13 644 .byte 0x2a,0xa3,0xb0,0xf3 @ aese q5,q13 645 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 646 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 647 .byte 0x2c,0x83,0xb0,0xf3 @ aese q4,q14 648 .byte 0x2c,0xa3,0xb0,0xf3 @ aese q5,q14 649 vmov.32 d1[1], r9 650 .byte 0x88,0x83,0xb0,0xf3 @ aesmc q4,q4 651 vmov.32 d3[1], r10 652 .byte 0x8a,0xa3,0xb0,0xf3 @ aesmc q5,q5 653 .byte 0x2e,0x83,0xb0,0xf3 @ aese q4,q15 654 .byte 0x2e,0xa3,0xb0,0xf3 @ aese q5,q15 655 656 mov r6,r5 657 veor q2,q2,q4 658 veor q3,q3,q5 659 vst1.8 {q2},[r1]! 660 vst1.8 {q3},[r1]! 661 bhs .Loop2x_ctr32 662 663 adds r2,r2,#2 664 beq .Lctr32_done 665 b .Lctr32_tail 666 667 .Lctr32_128: 668 vld1.32 {q4-q5},[r7] 669 670 .Loop2x_ctr32_128: 671 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 672 .byte 0x20,0x23,0xb0,0xf3 @ aese q1,q8 673 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 674 vld1.8 {q2},[r0]! 675 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 676 vld1.8 {q3},[r0]! 677 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 678 .byte 0x22,0x23,0xb0,0xf3 @ aese q1,q9 679 add r8,r8,#1 680 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 681 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 682 rev r9,r8 683 .byte 0x08,0x03,0xb0,0xf3 @ aese q0,q4 684 .byte 0x08,0x23,0xb0,0xf3 @ aese q1,q4 685 add r8,r8,#1 686 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 687 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 688 rev r10,r8 689 .byte 0x0a,0x03,0xb0,0xf3 @ aese q0,q5 690 .byte 0x0a,0x23,0xb0,0xf3 @ aese q1,q5 691 subs r2,r2,#2 692 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 693 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 694 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 695 .byte 0x24,0x23,0xb0,0xf3 @ aese q1,q10 696 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 697 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 698 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 699 .byte 0x26,0x23,0xb0,0xf3 @ aese q1,q11 700 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 701 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 702 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 703 .byte 0x28,0x23,0xb0,0xf3 @ aese q1,q12 704 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 705 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 706 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 707 .byte 0x2a,0x23,0xb0,0xf3 @ aese q1,q13 708 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 709 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 710 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 711 .byte 0x2c,0x23,0xb0,0xf3 @ aese q1,q14 712 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 713 .byte 0x82,0x23,0xb0,0xf3 @ aesmc q1,q1 714 veor q2,q2,q7 715 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 716 veor q3,q3,q7 717 .byte 0x2e,0x23,0xb0,0xf3 @ aese q1,q15 718 719 veor q2,q2,q0 720 vorr q0,q6,q6 721 veor q3,q3,q1 722 vorr q1,q6,q6 723 vst1.8 {q2},[r1]! 724 vmov.32 d1[1], r9 725 vst1.8 {q3},[r1]! 726 vmov.32 d3[1], r10 727 bhs .Loop2x_ctr32_128 728 729 adds r2,r2,#2 730 beq .Lctr32_done 731 732 .Lctr32_tail: 733 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 734 vld1.32 {q8},[r7]! 735 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 736 subs r6,r6,#2 737 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 738 vld1.32 {q9},[r7]! 739 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 740 bgt .Lctr32_tail 741 742 .byte 0x20,0x03,0xb0,0xf3 @ aese q0,q8 743 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 744 .byte 0x22,0x03,0xb0,0xf3 @ aese q0,q9 745 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 746 vld1.8 {q2},[r0] 747 .byte 0x24,0x03,0xb0,0xf3 @ aese q0,q10 748 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 749 .byte 0x26,0x03,0xb0,0xf3 @ aese q0,q11 750 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 751 .byte 0x28,0x03,0xb0,0xf3 @ aese q0,q12 752 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 753 .byte 0x2a,0x03,0xb0,0xf3 @ aese q0,q13 754 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 755 .byte 0x2c,0x03,0xb0,0xf3 @ aese q0,q14 756 .byte 0x80,0x03,0xb0,0xf3 @ aesmc q0,q0 757 veor q2,q2,q7 758 .byte 0x2e,0x03,0xb0,0xf3 @ aese q0,q15 759 760 veor q2,q2,q0 761 vst1.8 {q2},[r1] 762 763 .Lctr32_done: 764 vldmia sp!,{d8-d15} 765 ldmia sp!,{r4-r10,pc} 766 .size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks 767 #endif 768