Home | History | Annotate | Download | only in aes
      1 #if defined(__x86_64__)
      2 .text
      3 
      4 .globl	_aesni_encrypt
      5 .private_extern _aesni_encrypt
      6 
      7 .p2align	4
      8 _aesni_encrypt:
      9 	movups	(%rdi),%xmm2
     10 	movl	240(%rdx),%eax
     11 	movups	(%rdx),%xmm0
     12 	movups	16(%rdx),%xmm1
     13 	leaq	32(%rdx),%rdx
     14 	xorps	%xmm0,%xmm2
     15 L$oop_enc1_1:
     16 .byte	102,15,56,220,209
     17 	decl	%eax
     18 	movups	(%rdx),%xmm1
     19 	leaq	16(%rdx),%rdx
     20 	jnz	L$oop_enc1_1
     21 .byte	102,15,56,221,209
     22 	movups	%xmm2,(%rsi)
     23 	.byte	0xf3,0xc3
     24 
     25 
     26 .globl	_aesni_decrypt
     27 .private_extern _aesni_decrypt
     28 
     29 .p2align	4
     30 _aesni_decrypt:
     31 	movups	(%rdi),%xmm2
     32 	movl	240(%rdx),%eax
     33 	movups	(%rdx),%xmm0
     34 	movups	16(%rdx),%xmm1
     35 	leaq	32(%rdx),%rdx
     36 	xorps	%xmm0,%xmm2
     37 L$oop_dec1_2:
     38 .byte	102,15,56,222,209
     39 	decl	%eax
     40 	movups	(%rdx),%xmm1
     41 	leaq	16(%rdx),%rdx
     42 	jnz	L$oop_dec1_2
     43 .byte	102,15,56,223,209
     44 	movups	%xmm2,(%rsi)
     45 	.byte	0xf3,0xc3
     46 
     47 
     48 .p2align	4
     49 _aesni_encrypt2:
     50 	movups	(%rcx),%xmm0
     51 	shll	$4,%eax
     52 	movups	16(%rcx),%xmm1
     53 	xorps	%xmm0,%xmm2
     54 	xorps	%xmm0,%xmm3
     55 	movups	32(%rcx),%xmm0
     56 	leaq	32(%rcx,%rax,1),%rcx
     57 	negq	%rax
     58 	addq	$16,%rax
     59 
     60 L$enc_loop2:
     61 .byte	102,15,56,220,209
     62 .byte	102,15,56,220,217
     63 	movups	(%rcx,%rax,1),%xmm1
     64 	addq	$32,%rax
     65 .byte	102,15,56,220,208
     66 .byte	102,15,56,220,216
     67 	movups	-16(%rcx,%rax,1),%xmm0
     68 	jnz	L$enc_loop2
     69 
     70 .byte	102,15,56,220,209
     71 .byte	102,15,56,220,217
     72 .byte	102,15,56,221,208
     73 .byte	102,15,56,221,216
     74 	.byte	0xf3,0xc3
     75 
     76 
     77 .p2align	4
     78 _aesni_decrypt2:
     79 	movups	(%rcx),%xmm0
     80 	shll	$4,%eax
     81 	movups	16(%rcx),%xmm1
     82 	xorps	%xmm0,%xmm2
     83 	xorps	%xmm0,%xmm3
     84 	movups	32(%rcx),%xmm0
     85 	leaq	32(%rcx,%rax,1),%rcx
     86 	negq	%rax
     87 	addq	$16,%rax
     88 
     89 L$dec_loop2:
     90 .byte	102,15,56,222,209
     91 .byte	102,15,56,222,217
     92 	movups	(%rcx,%rax,1),%xmm1
     93 	addq	$32,%rax
     94 .byte	102,15,56,222,208
     95 .byte	102,15,56,222,216
     96 	movups	-16(%rcx,%rax,1),%xmm0
     97 	jnz	L$dec_loop2
     98 
     99 .byte	102,15,56,222,209
    100 .byte	102,15,56,222,217
    101 .byte	102,15,56,223,208
    102 .byte	102,15,56,223,216
    103 	.byte	0xf3,0xc3
    104 
    105 
    106 .p2align	4
    107 _aesni_encrypt3:
    108 	movups	(%rcx),%xmm0
    109 	shll	$4,%eax
    110 	movups	16(%rcx),%xmm1
    111 	xorps	%xmm0,%xmm2
    112 	xorps	%xmm0,%xmm3
    113 	xorps	%xmm0,%xmm4
    114 	movups	32(%rcx),%xmm0
    115 	leaq	32(%rcx,%rax,1),%rcx
    116 	negq	%rax
    117 	addq	$16,%rax
    118 
    119 L$enc_loop3:
    120 .byte	102,15,56,220,209
    121 .byte	102,15,56,220,217
    122 .byte	102,15,56,220,225
    123 	movups	(%rcx,%rax,1),%xmm1
    124 	addq	$32,%rax
    125 .byte	102,15,56,220,208
    126 .byte	102,15,56,220,216
    127 .byte	102,15,56,220,224
    128 	movups	-16(%rcx,%rax,1),%xmm0
    129 	jnz	L$enc_loop3
    130 
    131 .byte	102,15,56,220,209
    132 .byte	102,15,56,220,217
    133 .byte	102,15,56,220,225
    134 .byte	102,15,56,221,208
    135 .byte	102,15,56,221,216
    136 .byte	102,15,56,221,224
    137 	.byte	0xf3,0xc3
    138 
    139 
    140 .p2align	4
    141 _aesni_decrypt3:
    142 	movups	(%rcx),%xmm0
    143 	shll	$4,%eax
    144 	movups	16(%rcx),%xmm1
    145 	xorps	%xmm0,%xmm2
    146 	xorps	%xmm0,%xmm3
    147 	xorps	%xmm0,%xmm4
    148 	movups	32(%rcx),%xmm0
    149 	leaq	32(%rcx,%rax,1),%rcx
    150 	negq	%rax
    151 	addq	$16,%rax
    152 
    153 L$dec_loop3:
    154 .byte	102,15,56,222,209
    155 .byte	102,15,56,222,217
    156 .byte	102,15,56,222,225
    157 	movups	(%rcx,%rax,1),%xmm1
    158 	addq	$32,%rax
    159 .byte	102,15,56,222,208
    160 .byte	102,15,56,222,216
    161 .byte	102,15,56,222,224
    162 	movups	-16(%rcx,%rax,1),%xmm0
    163 	jnz	L$dec_loop3
    164 
    165 .byte	102,15,56,222,209
    166 .byte	102,15,56,222,217
    167 .byte	102,15,56,222,225
    168 .byte	102,15,56,223,208
    169 .byte	102,15,56,223,216
    170 .byte	102,15,56,223,224
    171 	.byte	0xf3,0xc3
    172 
    173 
    174 .p2align	4
    175 _aesni_encrypt4:
    176 	movups	(%rcx),%xmm0
    177 	shll	$4,%eax
    178 	movups	16(%rcx),%xmm1
    179 	xorps	%xmm0,%xmm2
    180 	xorps	%xmm0,%xmm3
    181 	xorps	%xmm0,%xmm4
    182 	xorps	%xmm0,%xmm5
    183 	movups	32(%rcx),%xmm0
    184 	leaq	32(%rcx,%rax,1),%rcx
    185 	negq	%rax
    186 .byte	0x0f,0x1f,0x00
    187 	addq	$16,%rax
    188 
    189 L$enc_loop4:
    190 .byte	102,15,56,220,209
    191 .byte	102,15,56,220,217
    192 .byte	102,15,56,220,225
    193 .byte	102,15,56,220,233
    194 	movups	(%rcx,%rax,1),%xmm1
    195 	addq	$32,%rax
    196 .byte	102,15,56,220,208
    197 .byte	102,15,56,220,216
    198 .byte	102,15,56,220,224
    199 .byte	102,15,56,220,232
    200 	movups	-16(%rcx,%rax,1),%xmm0
    201 	jnz	L$enc_loop4
    202 
    203 .byte	102,15,56,220,209
    204 .byte	102,15,56,220,217
    205 .byte	102,15,56,220,225
    206 .byte	102,15,56,220,233
    207 .byte	102,15,56,221,208
    208 .byte	102,15,56,221,216
    209 .byte	102,15,56,221,224
    210 .byte	102,15,56,221,232
    211 	.byte	0xf3,0xc3
    212 
    213 
    214 .p2align	4
    215 _aesni_decrypt4:
    216 	movups	(%rcx),%xmm0
    217 	shll	$4,%eax
    218 	movups	16(%rcx),%xmm1
    219 	xorps	%xmm0,%xmm2
    220 	xorps	%xmm0,%xmm3
    221 	xorps	%xmm0,%xmm4
    222 	xorps	%xmm0,%xmm5
    223 	movups	32(%rcx),%xmm0
    224 	leaq	32(%rcx,%rax,1),%rcx
    225 	negq	%rax
    226 .byte	0x0f,0x1f,0x00
    227 	addq	$16,%rax
    228 
    229 L$dec_loop4:
    230 .byte	102,15,56,222,209
    231 .byte	102,15,56,222,217
    232 .byte	102,15,56,222,225
    233 .byte	102,15,56,222,233
    234 	movups	(%rcx,%rax,1),%xmm1
    235 	addq	$32,%rax
    236 .byte	102,15,56,222,208
    237 .byte	102,15,56,222,216
    238 .byte	102,15,56,222,224
    239 .byte	102,15,56,222,232
    240 	movups	-16(%rcx,%rax,1),%xmm0
    241 	jnz	L$dec_loop4
    242 
    243 .byte	102,15,56,222,209
    244 .byte	102,15,56,222,217
    245 .byte	102,15,56,222,225
    246 .byte	102,15,56,222,233
    247 .byte	102,15,56,223,208
    248 .byte	102,15,56,223,216
    249 .byte	102,15,56,223,224
    250 .byte	102,15,56,223,232
    251 	.byte	0xf3,0xc3
    252 
    253 
    254 .p2align	4
    255 _aesni_encrypt6:
    256 	movups	(%rcx),%xmm0
    257 	shll	$4,%eax
    258 	movups	16(%rcx),%xmm1
    259 	xorps	%xmm0,%xmm2
    260 	pxor	%xmm0,%xmm3
    261 	pxor	%xmm0,%xmm4
    262 .byte	102,15,56,220,209
    263 	leaq	32(%rcx,%rax,1),%rcx
    264 	negq	%rax
    265 .byte	102,15,56,220,217
    266 	pxor	%xmm0,%xmm5
    267 	pxor	%xmm0,%xmm6
    268 .byte	102,15,56,220,225
    269 	pxor	%xmm0,%xmm7
    270 	addq	$16,%rax
    271 .byte	102,15,56,220,233
    272 .byte	102,15,56,220,241
    273 .byte	102,15,56,220,249
    274 	movups	-16(%rcx,%rax,1),%xmm0
    275 	jmp	L$enc_loop6_enter
    276 .p2align	4
    277 L$enc_loop6:
    278 .byte	102,15,56,220,209
    279 .byte	102,15,56,220,217
    280 .byte	102,15,56,220,225
    281 .byte	102,15,56,220,233
    282 .byte	102,15,56,220,241
    283 .byte	102,15,56,220,249
    284 L$enc_loop6_enter:
    285 	movups	(%rcx,%rax,1),%xmm1
    286 	addq	$32,%rax
    287 .byte	102,15,56,220,208
    288 .byte	102,15,56,220,216
    289 .byte	102,15,56,220,224
    290 .byte	102,15,56,220,232
    291 .byte	102,15,56,220,240
    292 .byte	102,15,56,220,248
    293 	movups	-16(%rcx,%rax,1),%xmm0
    294 	jnz	L$enc_loop6
    295 
    296 .byte	102,15,56,220,209
    297 .byte	102,15,56,220,217
    298 .byte	102,15,56,220,225
    299 .byte	102,15,56,220,233
    300 .byte	102,15,56,220,241
    301 .byte	102,15,56,220,249
    302 .byte	102,15,56,221,208
    303 .byte	102,15,56,221,216
    304 .byte	102,15,56,221,224
    305 .byte	102,15,56,221,232
    306 .byte	102,15,56,221,240
    307 .byte	102,15,56,221,248
    308 	.byte	0xf3,0xc3
    309 
    310 
    311 .p2align	4
    312 _aesni_decrypt6:
    313 	movups	(%rcx),%xmm0
    314 	shll	$4,%eax
    315 	movups	16(%rcx),%xmm1
    316 	xorps	%xmm0,%xmm2
    317 	pxor	%xmm0,%xmm3
    318 	pxor	%xmm0,%xmm4
    319 .byte	102,15,56,222,209
    320 	leaq	32(%rcx,%rax,1),%rcx
    321 	negq	%rax
    322 .byte	102,15,56,222,217
    323 	pxor	%xmm0,%xmm5
    324 	pxor	%xmm0,%xmm6
    325 .byte	102,15,56,222,225
    326 	pxor	%xmm0,%xmm7
    327 	addq	$16,%rax
    328 .byte	102,15,56,222,233
    329 .byte	102,15,56,222,241
    330 .byte	102,15,56,222,249
    331 	movups	-16(%rcx,%rax,1),%xmm0
    332 	jmp	L$dec_loop6_enter
    333 .p2align	4
    334 L$dec_loop6:
    335 .byte	102,15,56,222,209
    336 .byte	102,15,56,222,217
    337 .byte	102,15,56,222,225
    338 .byte	102,15,56,222,233
    339 .byte	102,15,56,222,241
    340 .byte	102,15,56,222,249
    341 L$dec_loop6_enter:
    342 	movups	(%rcx,%rax,1),%xmm1
    343 	addq	$32,%rax
    344 .byte	102,15,56,222,208
    345 .byte	102,15,56,222,216
    346 .byte	102,15,56,222,224
    347 .byte	102,15,56,222,232
    348 .byte	102,15,56,222,240
    349 .byte	102,15,56,222,248
    350 	movups	-16(%rcx,%rax,1),%xmm0
    351 	jnz	L$dec_loop6
    352 
    353 .byte	102,15,56,222,209
    354 .byte	102,15,56,222,217
    355 .byte	102,15,56,222,225
    356 .byte	102,15,56,222,233
    357 .byte	102,15,56,222,241
    358 .byte	102,15,56,222,249
    359 .byte	102,15,56,223,208
    360 .byte	102,15,56,223,216
    361 .byte	102,15,56,223,224
    362 .byte	102,15,56,223,232
    363 .byte	102,15,56,223,240
    364 .byte	102,15,56,223,248
    365 	.byte	0xf3,0xc3
    366 
    367 
    368 .p2align	4
    369 _aesni_encrypt8:
    370 	movups	(%rcx),%xmm0
    371 	shll	$4,%eax
    372 	movups	16(%rcx),%xmm1
    373 	xorps	%xmm0,%xmm2
    374 	xorps	%xmm0,%xmm3
    375 	pxor	%xmm0,%xmm4
    376 	pxor	%xmm0,%xmm5
    377 	pxor	%xmm0,%xmm6
    378 	leaq	32(%rcx,%rax,1),%rcx
    379 	negq	%rax
    380 .byte	102,15,56,220,209
    381 	addq	$16,%rax
    382 	pxor	%xmm0,%xmm7
    383 .byte	102,15,56,220,217
    384 	pxor	%xmm0,%xmm8
    385 	pxor	%xmm0,%xmm9
    386 .byte	102,15,56,220,225
    387 .byte	102,15,56,220,233
    388 .byte	102,15,56,220,241
    389 .byte	102,15,56,220,249
    390 .byte	102,68,15,56,220,193
    391 .byte	102,68,15,56,220,201
    392 	movups	-16(%rcx,%rax,1),%xmm0
    393 	jmp	L$enc_loop8_enter
    394 .p2align	4
    395 L$enc_loop8:
    396 .byte	102,15,56,220,209
    397 .byte	102,15,56,220,217
    398 .byte	102,15,56,220,225
    399 .byte	102,15,56,220,233
    400 .byte	102,15,56,220,241
    401 .byte	102,15,56,220,249
    402 .byte	102,68,15,56,220,193
    403 .byte	102,68,15,56,220,201
    404 L$enc_loop8_enter:
    405 	movups	(%rcx,%rax,1),%xmm1
    406 	addq	$32,%rax
    407 .byte	102,15,56,220,208
    408 .byte	102,15,56,220,216
    409 .byte	102,15,56,220,224
    410 .byte	102,15,56,220,232
    411 .byte	102,15,56,220,240
    412 .byte	102,15,56,220,248
    413 .byte	102,68,15,56,220,192
    414 .byte	102,68,15,56,220,200
    415 	movups	-16(%rcx,%rax,1),%xmm0
    416 	jnz	L$enc_loop8
    417 
    418 .byte	102,15,56,220,209
    419 .byte	102,15,56,220,217
    420 .byte	102,15,56,220,225
    421 .byte	102,15,56,220,233
    422 .byte	102,15,56,220,241
    423 .byte	102,15,56,220,249
    424 .byte	102,68,15,56,220,193
    425 .byte	102,68,15,56,220,201
    426 .byte	102,15,56,221,208
    427 .byte	102,15,56,221,216
    428 .byte	102,15,56,221,224
    429 .byte	102,15,56,221,232
    430 .byte	102,15,56,221,240
    431 .byte	102,15,56,221,248
    432 .byte	102,68,15,56,221,192
    433 .byte	102,68,15,56,221,200
    434 	.byte	0xf3,0xc3
    435 
    436 
    437 .p2align	4
    438 _aesni_decrypt8:
    439 	movups	(%rcx),%xmm0
    440 	shll	$4,%eax
    441 	movups	16(%rcx),%xmm1
    442 	xorps	%xmm0,%xmm2
    443 	xorps	%xmm0,%xmm3
    444 	pxor	%xmm0,%xmm4
    445 	pxor	%xmm0,%xmm5
    446 	pxor	%xmm0,%xmm6
    447 	leaq	32(%rcx,%rax,1),%rcx
    448 	negq	%rax
    449 .byte	102,15,56,222,209
    450 	addq	$16,%rax
    451 	pxor	%xmm0,%xmm7
    452 .byte	102,15,56,222,217
    453 	pxor	%xmm0,%xmm8
    454 	pxor	%xmm0,%xmm9
    455 .byte	102,15,56,222,225
    456 .byte	102,15,56,222,233
    457 .byte	102,15,56,222,241
    458 .byte	102,15,56,222,249
    459 .byte	102,68,15,56,222,193
    460 .byte	102,68,15,56,222,201
    461 	movups	-16(%rcx,%rax,1),%xmm0
    462 	jmp	L$dec_loop8_enter
    463 .p2align	4
    464 L$dec_loop8:
    465 .byte	102,15,56,222,209
    466 .byte	102,15,56,222,217
    467 .byte	102,15,56,222,225
    468 .byte	102,15,56,222,233
    469 .byte	102,15,56,222,241
    470 .byte	102,15,56,222,249
    471 .byte	102,68,15,56,222,193
    472 .byte	102,68,15,56,222,201
    473 L$dec_loop8_enter:
    474 	movups	(%rcx,%rax,1),%xmm1
    475 	addq	$32,%rax
    476 .byte	102,15,56,222,208
    477 .byte	102,15,56,222,216
    478 .byte	102,15,56,222,224
    479 .byte	102,15,56,222,232
    480 .byte	102,15,56,222,240
    481 .byte	102,15,56,222,248
    482 .byte	102,68,15,56,222,192
    483 .byte	102,68,15,56,222,200
    484 	movups	-16(%rcx,%rax,1),%xmm0
    485 	jnz	L$dec_loop8
    486 
    487 .byte	102,15,56,222,209
    488 .byte	102,15,56,222,217
    489 .byte	102,15,56,222,225
    490 .byte	102,15,56,222,233
    491 .byte	102,15,56,222,241
    492 .byte	102,15,56,222,249
    493 .byte	102,68,15,56,222,193
    494 .byte	102,68,15,56,222,201
    495 .byte	102,15,56,223,208
    496 .byte	102,15,56,223,216
    497 .byte	102,15,56,223,224
    498 .byte	102,15,56,223,232
    499 .byte	102,15,56,223,240
    500 .byte	102,15,56,223,248
    501 .byte	102,68,15,56,223,192
    502 .byte	102,68,15,56,223,200
    503 	.byte	0xf3,0xc3
    504 
    505 .globl	_aesni_ecb_encrypt
    506 .private_extern _aesni_ecb_encrypt
    507 
    508 .p2align	4
    509 _aesni_ecb_encrypt:
    510 	andq	$-16,%rdx
    511 	jz	L$ecb_ret
    512 
    513 	movl	240(%rcx),%eax
    514 	movups	(%rcx),%xmm0
    515 	movq	%rcx,%r11
    516 	movl	%eax,%r10d
    517 	testl	%r8d,%r8d
    518 	jz	L$ecb_decrypt
    519 
    520 	cmpq	$128,%rdx
    521 	jb	L$ecb_enc_tail
    522 
    523 	movdqu	(%rdi),%xmm2
    524 	movdqu	16(%rdi),%xmm3
    525 	movdqu	32(%rdi),%xmm4
    526 	movdqu	48(%rdi),%xmm5
    527 	movdqu	64(%rdi),%xmm6
    528 	movdqu	80(%rdi),%xmm7
    529 	movdqu	96(%rdi),%xmm8
    530 	movdqu	112(%rdi),%xmm9
    531 	leaq	128(%rdi),%rdi
    532 	subq	$128,%rdx
    533 	jmp	L$ecb_enc_loop8_enter
    534 .p2align	4
    535 L$ecb_enc_loop8:
    536 	movups	%xmm2,(%rsi)
    537 	movq	%r11,%rcx
    538 	movdqu	(%rdi),%xmm2
    539 	movl	%r10d,%eax
    540 	movups	%xmm3,16(%rsi)
    541 	movdqu	16(%rdi),%xmm3
    542 	movups	%xmm4,32(%rsi)
    543 	movdqu	32(%rdi),%xmm4
    544 	movups	%xmm5,48(%rsi)
    545 	movdqu	48(%rdi),%xmm5
    546 	movups	%xmm6,64(%rsi)
    547 	movdqu	64(%rdi),%xmm6
    548 	movups	%xmm7,80(%rsi)
    549 	movdqu	80(%rdi),%xmm7
    550 	movups	%xmm8,96(%rsi)
    551 	movdqu	96(%rdi),%xmm8
    552 	movups	%xmm9,112(%rsi)
    553 	leaq	128(%rsi),%rsi
    554 	movdqu	112(%rdi),%xmm9
    555 	leaq	128(%rdi),%rdi
    556 L$ecb_enc_loop8_enter:
    557 
    558 	call	_aesni_encrypt8
    559 
    560 	subq	$128,%rdx
    561 	jnc	L$ecb_enc_loop8
    562 
    563 	movups	%xmm2,(%rsi)
    564 	movq	%r11,%rcx
    565 	movups	%xmm3,16(%rsi)
    566 	movl	%r10d,%eax
    567 	movups	%xmm4,32(%rsi)
    568 	movups	%xmm5,48(%rsi)
    569 	movups	%xmm6,64(%rsi)
    570 	movups	%xmm7,80(%rsi)
    571 	movups	%xmm8,96(%rsi)
    572 	movups	%xmm9,112(%rsi)
    573 	leaq	128(%rsi),%rsi
    574 	addq	$128,%rdx
    575 	jz	L$ecb_ret
    576 
    577 L$ecb_enc_tail:
    578 	movups	(%rdi),%xmm2
    579 	cmpq	$32,%rdx
    580 	jb	L$ecb_enc_one
    581 	movups	16(%rdi),%xmm3
    582 	je	L$ecb_enc_two
    583 	movups	32(%rdi),%xmm4
    584 	cmpq	$64,%rdx
    585 	jb	L$ecb_enc_three
    586 	movups	48(%rdi),%xmm5
    587 	je	L$ecb_enc_four
    588 	movups	64(%rdi),%xmm6
    589 	cmpq	$96,%rdx
    590 	jb	L$ecb_enc_five
    591 	movups	80(%rdi),%xmm7
    592 	je	L$ecb_enc_six
    593 	movdqu	96(%rdi),%xmm8
    594 	call	_aesni_encrypt8
    595 	movups	%xmm2,(%rsi)
    596 	movups	%xmm3,16(%rsi)
    597 	movups	%xmm4,32(%rsi)
    598 	movups	%xmm5,48(%rsi)
    599 	movups	%xmm6,64(%rsi)
    600 	movups	%xmm7,80(%rsi)
    601 	movups	%xmm8,96(%rsi)
    602 	jmp	L$ecb_ret
    603 .p2align	4
    604 L$ecb_enc_one:
    605 	movups	(%rcx),%xmm0
    606 	movups	16(%rcx),%xmm1
    607 	leaq	32(%rcx),%rcx
    608 	xorps	%xmm0,%xmm2
    609 L$oop_enc1_3:
    610 .byte	102,15,56,220,209
    611 	decl	%eax
    612 	movups	(%rcx),%xmm1
    613 	leaq	16(%rcx),%rcx
    614 	jnz	L$oop_enc1_3
    615 .byte	102,15,56,221,209
    616 	movups	%xmm2,(%rsi)
    617 	jmp	L$ecb_ret
    618 .p2align	4
    619 L$ecb_enc_two:
    620 	call	_aesni_encrypt2
    621 	movups	%xmm2,(%rsi)
    622 	movups	%xmm3,16(%rsi)
    623 	jmp	L$ecb_ret
    624 .p2align	4
    625 L$ecb_enc_three:
    626 	call	_aesni_encrypt3
    627 	movups	%xmm2,(%rsi)
    628 	movups	%xmm3,16(%rsi)
    629 	movups	%xmm4,32(%rsi)
    630 	jmp	L$ecb_ret
    631 .p2align	4
    632 L$ecb_enc_four:
    633 	call	_aesni_encrypt4
    634 	movups	%xmm2,(%rsi)
    635 	movups	%xmm3,16(%rsi)
    636 	movups	%xmm4,32(%rsi)
    637 	movups	%xmm5,48(%rsi)
    638 	jmp	L$ecb_ret
    639 .p2align	4
    640 L$ecb_enc_five:
    641 	xorps	%xmm7,%xmm7
    642 	call	_aesni_encrypt6
    643 	movups	%xmm2,(%rsi)
    644 	movups	%xmm3,16(%rsi)
    645 	movups	%xmm4,32(%rsi)
    646 	movups	%xmm5,48(%rsi)
    647 	movups	%xmm6,64(%rsi)
    648 	jmp	L$ecb_ret
    649 .p2align	4
    650 L$ecb_enc_six:
    651 	call	_aesni_encrypt6
    652 	movups	%xmm2,(%rsi)
    653 	movups	%xmm3,16(%rsi)
    654 	movups	%xmm4,32(%rsi)
    655 	movups	%xmm5,48(%rsi)
    656 	movups	%xmm6,64(%rsi)
    657 	movups	%xmm7,80(%rsi)
    658 	jmp	L$ecb_ret
    659 
    660 .p2align	4
    661 L$ecb_decrypt:
    662 	cmpq	$128,%rdx
    663 	jb	L$ecb_dec_tail
    664 
    665 	movdqu	(%rdi),%xmm2
    666 	movdqu	16(%rdi),%xmm3
    667 	movdqu	32(%rdi),%xmm4
    668 	movdqu	48(%rdi),%xmm5
    669 	movdqu	64(%rdi),%xmm6
    670 	movdqu	80(%rdi),%xmm7
    671 	movdqu	96(%rdi),%xmm8
    672 	movdqu	112(%rdi),%xmm9
    673 	leaq	128(%rdi),%rdi
    674 	subq	$128,%rdx
    675 	jmp	L$ecb_dec_loop8_enter
    676 .p2align	4
    677 L$ecb_dec_loop8:
    678 	movups	%xmm2,(%rsi)
    679 	movq	%r11,%rcx
    680 	movdqu	(%rdi),%xmm2
    681 	movl	%r10d,%eax
    682 	movups	%xmm3,16(%rsi)
    683 	movdqu	16(%rdi),%xmm3
    684 	movups	%xmm4,32(%rsi)
    685 	movdqu	32(%rdi),%xmm4
    686 	movups	%xmm5,48(%rsi)
    687 	movdqu	48(%rdi),%xmm5
    688 	movups	%xmm6,64(%rsi)
    689 	movdqu	64(%rdi),%xmm6
    690 	movups	%xmm7,80(%rsi)
    691 	movdqu	80(%rdi),%xmm7
    692 	movups	%xmm8,96(%rsi)
    693 	movdqu	96(%rdi),%xmm8
    694 	movups	%xmm9,112(%rsi)
    695 	leaq	128(%rsi),%rsi
    696 	movdqu	112(%rdi),%xmm9
    697 	leaq	128(%rdi),%rdi
    698 L$ecb_dec_loop8_enter:
    699 
    700 	call	_aesni_decrypt8
    701 
    702 	movups	(%r11),%xmm0
    703 	subq	$128,%rdx
    704 	jnc	L$ecb_dec_loop8
    705 
    706 	movups	%xmm2,(%rsi)
    707 	movq	%r11,%rcx
    708 	movups	%xmm3,16(%rsi)
    709 	movl	%r10d,%eax
    710 	movups	%xmm4,32(%rsi)
    711 	movups	%xmm5,48(%rsi)
    712 	movups	%xmm6,64(%rsi)
    713 	movups	%xmm7,80(%rsi)
    714 	movups	%xmm8,96(%rsi)
    715 	movups	%xmm9,112(%rsi)
    716 	leaq	128(%rsi),%rsi
    717 	addq	$128,%rdx
    718 	jz	L$ecb_ret
    719 
    720 L$ecb_dec_tail:
    721 	movups	(%rdi),%xmm2
    722 	cmpq	$32,%rdx
    723 	jb	L$ecb_dec_one
    724 	movups	16(%rdi),%xmm3
    725 	je	L$ecb_dec_two
    726 	movups	32(%rdi),%xmm4
    727 	cmpq	$64,%rdx
    728 	jb	L$ecb_dec_three
    729 	movups	48(%rdi),%xmm5
    730 	je	L$ecb_dec_four
    731 	movups	64(%rdi),%xmm6
    732 	cmpq	$96,%rdx
    733 	jb	L$ecb_dec_five
    734 	movups	80(%rdi),%xmm7
    735 	je	L$ecb_dec_six
    736 	movups	96(%rdi),%xmm8
    737 	movups	(%rcx),%xmm0
    738 	call	_aesni_decrypt8
    739 	movups	%xmm2,(%rsi)
    740 	movups	%xmm3,16(%rsi)
    741 	movups	%xmm4,32(%rsi)
    742 	movups	%xmm5,48(%rsi)
    743 	movups	%xmm6,64(%rsi)
    744 	movups	%xmm7,80(%rsi)
    745 	movups	%xmm8,96(%rsi)
    746 	jmp	L$ecb_ret
    747 .p2align	4
    748 L$ecb_dec_one:
    749 	movups	(%rcx),%xmm0
    750 	movups	16(%rcx),%xmm1
    751 	leaq	32(%rcx),%rcx
    752 	xorps	%xmm0,%xmm2
    753 L$oop_dec1_4:
    754 .byte	102,15,56,222,209
    755 	decl	%eax
    756 	movups	(%rcx),%xmm1
    757 	leaq	16(%rcx),%rcx
    758 	jnz	L$oop_dec1_4
    759 .byte	102,15,56,223,209
    760 	movups	%xmm2,(%rsi)
    761 	jmp	L$ecb_ret
    762 .p2align	4
    763 L$ecb_dec_two:
    764 	call	_aesni_decrypt2
    765 	movups	%xmm2,(%rsi)
    766 	movups	%xmm3,16(%rsi)
    767 	jmp	L$ecb_ret
    768 .p2align	4
    769 L$ecb_dec_three:
    770 	call	_aesni_decrypt3
    771 	movups	%xmm2,(%rsi)
    772 	movups	%xmm3,16(%rsi)
    773 	movups	%xmm4,32(%rsi)
    774 	jmp	L$ecb_ret
    775 .p2align	4
    776 L$ecb_dec_four:
    777 	call	_aesni_decrypt4
    778 	movups	%xmm2,(%rsi)
    779 	movups	%xmm3,16(%rsi)
    780 	movups	%xmm4,32(%rsi)
    781 	movups	%xmm5,48(%rsi)
    782 	jmp	L$ecb_ret
    783 .p2align	4
    784 L$ecb_dec_five:
    785 	xorps	%xmm7,%xmm7
    786 	call	_aesni_decrypt6
    787 	movups	%xmm2,(%rsi)
    788 	movups	%xmm3,16(%rsi)
    789 	movups	%xmm4,32(%rsi)
    790 	movups	%xmm5,48(%rsi)
    791 	movups	%xmm6,64(%rsi)
    792 	jmp	L$ecb_ret
    793 .p2align	4
    794 L$ecb_dec_six:
    795 	call	_aesni_decrypt6
    796 	movups	%xmm2,(%rsi)
    797 	movups	%xmm3,16(%rsi)
    798 	movups	%xmm4,32(%rsi)
    799 	movups	%xmm5,48(%rsi)
    800 	movups	%xmm6,64(%rsi)
    801 	movups	%xmm7,80(%rsi)
    802 
    803 L$ecb_ret:
    804 	.byte	0xf3,0xc3
    805 
    806 .globl	_aesni_ccm64_encrypt_blocks
    807 .private_extern _aesni_ccm64_encrypt_blocks
    808 
    809 .p2align	4
    810 _aesni_ccm64_encrypt_blocks:
    811 	movl	240(%rcx),%eax
    812 	movdqu	(%r8),%xmm6
    813 	movdqa	L$increment64(%rip),%xmm9
    814 	movdqa	L$bswap_mask(%rip),%xmm7
    815 
    816 	shll	$4,%eax
    817 	movl	$16,%r10d
    818 	leaq	0(%rcx),%r11
    819 	movdqu	(%r9),%xmm3
    820 	movdqa	%xmm6,%xmm2
    821 	leaq	32(%rcx,%rax,1),%rcx
    822 .byte	102,15,56,0,247
    823 	subq	%rax,%r10
    824 	jmp	L$ccm64_enc_outer
    825 .p2align	4
    826 L$ccm64_enc_outer:
    827 	movups	(%r11),%xmm0
    828 	movq	%r10,%rax
    829 	movups	(%rdi),%xmm8
    830 
    831 	xorps	%xmm0,%xmm2
    832 	movups	16(%r11),%xmm1
    833 	xorps	%xmm8,%xmm0
    834 	xorps	%xmm0,%xmm3
    835 	movups	32(%r11),%xmm0
    836 
    837 L$ccm64_enc2_loop:
    838 .byte	102,15,56,220,209
    839 .byte	102,15,56,220,217
    840 	movups	(%rcx,%rax,1),%xmm1
    841 	addq	$32,%rax
    842 .byte	102,15,56,220,208
    843 .byte	102,15,56,220,216
    844 	movups	-16(%rcx,%rax,1),%xmm0
    845 	jnz	L$ccm64_enc2_loop
    846 .byte	102,15,56,220,209
    847 .byte	102,15,56,220,217
    848 	paddq	%xmm9,%xmm6
    849 	decq	%rdx
    850 .byte	102,15,56,221,208
    851 .byte	102,15,56,221,216
    852 
    853 	leaq	16(%rdi),%rdi
    854 	xorps	%xmm2,%xmm8
    855 	movdqa	%xmm6,%xmm2
    856 	movups	%xmm8,(%rsi)
    857 .byte	102,15,56,0,215
    858 	leaq	16(%rsi),%rsi
    859 	jnz	L$ccm64_enc_outer
    860 
    861 	movups	%xmm3,(%r9)
    862 	.byte	0xf3,0xc3
    863 
    864 .globl	_aesni_ccm64_decrypt_blocks
    865 .private_extern _aesni_ccm64_decrypt_blocks
    866 
    867 .p2align	4
    868 _aesni_ccm64_decrypt_blocks:
    869 	movl	240(%rcx),%eax
    870 	movups	(%r8),%xmm6
    871 	movdqu	(%r9),%xmm3
    872 	movdqa	L$increment64(%rip),%xmm9
    873 	movdqa	L$bswap_mask(%rip),%xmm7
    874 
    875 	movaps	%xmm6,%xmm2
    876 	movl	%eax,%r10d
    877 	movq	%rcx,%r11
    878 .byte	102,15,56,0,247
    879 	movups	(%rcx),%xmm0
    880 	movups	16(%rcx),%xmm1
    881 	leaq	32(%rcx),%rcx
    882 	xorps	%xmm0,%xmm2
    883 L$oop_enc1_5:
    884 .byte	102,15,56,220,209
    885 	decl	%eax
    886 	movups	(%rcx),%xmm1
    887 	leaq	16(%rcx),%rcx
    888 	jnz	L$oop_enc1_5
    889 .byte	102,15,56,221,209
    890 	shll	$4,%r10d
    891 	movl	$16,%eax
    892 	movups	(%rdi),%xmm8
    893 	paddq	%xmm9,%xmm6
    894 	leaq	16(%rdi),%rdi
    895 	subq	%r10,%rax
    896 	leaq	32(%r11,%r10,1),%rcx
    897 	movq	%rax,%r10
    898 	jmp	L$ccm64_dec_outer
    899 .p2align	4
    900 L$ccm64_dec_outer:
    901 	xorps	%xmm2,%xmm8
    902 	movdqa	%xmm6,%xmm2
    903 	movups	%xmm8,(%rsi)
    904 	leaq	16(%rsi),%rsi
    905 .byte	102,15,56,0,215
    906 
    907 	subq	$1,%rdx
    908 	jz	L$ccm64_dec_break
    909 
    910 	movups	(%r11),%xmm0
    911 	movq	%r10,%rax
    912 	movups	16(%r11),%xmm1
    913 	xorps	%xmm0,%xmm8
    914 	xorps	%xmm0,%xmm2
    915 	xorps	%xmm8,%xmm3
    916 	movups	32(%r11),%xmm0
    917 	jmp	L$ccm64_dec2_loop
    918 .p2align	4
    919 L$ccm64_dec2_loop:
    920 .byte	102,15,56,220,209
    921 .byte	102,15,56,220,217
    922 	movups	(%rcx,%rax,1),%xmm1
    923 	addq	$32,%rax
    924 .byte	102,15,56,220,208
    925 .byte	102,15,56,220,216
    926 	movups	-16(%rcx,%rax,1),%xmm0
    927 	jnz	L$ccm64_dec2_loop
    928 	movups	(%rdi),%xmm8
    929 	paddq	%xmm9,%xmm6
    930 .byte	102,15,56,220,209
    931 .byte	102,15,56,220,217
    932 .byte	102,15,56,221,208
    933 .byte	102,15,56,221,216
    934 	leaq	16(%rdi),%rdi
    935 	jmp	L$ccm64_dec_outer
    936 
    937 .p2align	4
    938 L$ccm64_dec_break:
    939 
    940 	movl	240(%r11),%eax
    941 	movups	(%r11),%xmm0
    942 	movups	16(%r11),%xmm1
    943 	xorps	%xmm0,%xmm8
    944 	leaq	32(%r11),%r11
    945 	xorps	%xmm8,%xmm3
    946 L$oop_enc1_6:
    947 .byte	102,15,56,220,217
    948 	decl	%eax
    949 	movups	(%r11),%xmm1
    950 	leaq	16(%r11),%r11
    951 	jnz	L$oop_enc1_6
    952 .byte	102,15,56,221,217
    953 	movups	%xmm3,(%r9)
    954 	.byte	0xf3,0xc3
    955 
    956 .globl	_aesni_ctr32_encrypt_blocks
    957 .private_extern _aesni_ctr32_encrypt_blocks
    958 
    959 .p2align	4
    960 _aesni_ctr32_encrypt_blocks:
    961 	leaq	(%rsp),%rax
    962 	pushq	%rbp
    963 	subq	$128,%rsp
    964 	andq	$-16,%rsp
    965 	leaq	-8(%rax),%rbp
    966 
    967 	cmpq	$1,%rdx
    968 	je	L$ctr32_one_shortcut
    969 
    970 	movdqu	(%r8),%xmm2
    971 	movdqu	(%rcx),%xmm0
    972 	movl	12(%r8),%r8d
    973 	pxor	%xmm0,%xmm2
    974 	movl	12(%rcx),%r11d
    975 	movdqa	%xmm2,0(%rsp)
    976 	bswapl	%r8d
    977 	movdqa	%xmm2,%xmm3
    978 	movdqa	%xmm2,%xmm4
    979 	movdqa	%xmm2,%xmm5
    980 	movdqa	%xmm2,64(%rsp)
    981 	movdqa	%xmm2,80(%rsp)
    982 	movdqa	%xmm2,96(%rsp)
    983 	movq	%rdx,%r10
    984 	movdqa	%xmm2,112(%rsp)
    985 
    986 	leaq	1(%r8),%rax
    987 	leaq	2(%r8),%rdx
    988 	bswapl	%eax
    989 	bswapl	%edx
    990 	xorl	%r11d,%eax
    991 	xorl	%r11d,%edx
    992 .byte	102,15,58,34,216,3
    993 	leaq	3(%r8),%rax
    994 	movdqa	%xmm3,16(%rsp)
    995 .byte	102,15,58,34,226,3
    996 	bswapl	%eax
    997 	movq	%r10,%rdx
    998 	leaq	4(%r8),%r10
    999 	movdqa	%xmm4,32(%rsp)
   1000 	xorl	%r11d,%eax
   1001 	bswapl	%r10d
   1002 .byte	102,15,58,34,232,3
   1003 	xorl	%r11d,%r10d
   1004 	movdqa	%xmm5,48(%rsp)
   1005 	leaq	5(%r8),%r9
   1006 	movl	%r10d,64+12(%rsp)
   1007 	bswapl	%r9d
   1008 	leaq	6(%r8),%r10
   1009 	movl	240(%rcx),%eax
   1010 	xorl	%r11d,%r9d
   1011 	bswapl	%r10d
   1012 	movl	%r9d,80+12(%rsp)
   1013 	xorl	%r11d,%r10d
   1014 	leaq	7(%r8),%r9
   1015 	movl	%r10d,96+12(%rsp)
   1016 	bswapl	%r9d
   1017 	movl	_OPENSSL_ia32cap_P+4(%rip),%r10d
   1018 	xorl	%r11d,%r9d
   1019 	andl	$71303168,%r10d
   1020 	movl	%r9d,112+12(%rsp)
   1021 
   1022 	movups	16(%rcx),%xmm1
   1023 
   1024 	movdqa	64(%rsp),%xmm6
   1025 	movdqa	80(%rsp),%xmm7
   1026 
   1027 	cmpq	$8,%rdx
   1028 	jb	L$ctr32_tail
   1029 
   1030 	subq	$6,%rdx
   1031 	cmpl	$4194304,%r10d
   1032 	je	L$ctr32_6x
   1033 
   1034 	leaq	128(%rcx),%rcx
   1035 	subq	$2,%rdx
   1036 	jmp	L$ctr32_loop8
   1037 
   1038 .p2align	4
   1039 L$ctr32_6x:
   1040 	shll	$4,%eax
   1041 	movl	$48,%r10d
   1042 	bswapl	%r11d
   1043 	leaq	32(%rcx,%rax,1),%rcx
   1044 	subq	%rax,%r10
   1045 	jmp	L$ctr32_loop6
   1046 
   1047 .p2align	4
   1048 L$ctr32_loop6:
   1049 	addl	$6,%r8d
   1050 	movups	-48(%rcx,%r10,1),%xmm0
   1051 .byte	102,15,56,220,209
   1052 	movl	%r8d,%eax
   1053 	xorl	%r11d,%eax
   1054 .byte	102,15,56,220,217
   1055 .byte	0x0f,0x38,0xf1,0x44,0x24,12
   1056 	leal	1(%r8),%eax
   1057 .byte	102,15,56,220,225
   1058 	xorl	%r11d,%eax
   1059 .byte	0x0f,0x38,0xf1,0x44,0x24,28
   1060 .byte	102,15,56,220,233
   1061 	leal	2(%r8),%eax
   1062 	xorl	%r11d,%eax
   1063 .byte	102,15,56,220,241
   1064 .byte	0x0f,0x38,0xf1,0x44,0x24,44
   1065 	leal	3(%r8),%eax
   1066 .byte	102,15,56,220,249
   1067 	movups	-32(%rcx,%r10,1),%xmm1
   1068 	xorl	%r11d,%eax
   1069 
   1070 .byte	102,15,56,220,208
   1071 .byte	0x0f,0x38,0xf1,0x44,0x24,60
   1072 	leal	4(%r8),%eax
   1073 .byte	102,15,56,220,216
   1074 	xorl	%r11d,%eax
   1075 .byte	0x0f,0x38,0xf1,0x44,0x24,76
   1076 .byte	102,15,56,220,224
   1077 	leal	5(%r8),%eax
   1078 	xorl	%r11d,%eax
   1079 .byte	102,15,56,220,232
   1080 .byte	0x0f,0x38,0xf1,0x44,0x24,92
   1081 	movq	%r10,%rax
   1082 .byte	102,15,56,220,240
   1083 .byte	102,15,56,220,248
   1084 	movups	-16(%rcx,%r10,1),%xmm0
   1085 
   1086 	call	L$enc_loop6
   1087 
   1088 	movdqu	(%rdi),%xmm8
   1089 	movdqu	16(%rdi),%xmm9
   1090 	movdqu	32(%rdi),%xmm10
   1091 	movdqu	48(%rdi),%xmm11
   1092 	movdqu	64(%rdi),%xmm12
   1093 	movdqu	80(%rdi),%xmm13
   1094 	leaq	96(%rdi),%rdi
   1095 	movups	-64(%rcx,%r10,1),%xmm1
   1096 	pxor	%xmm2,%xmm8
   1097 	movaps	0(%rsp),%xmm2
   1098 	pxor	%xmm3,%xmm9
   1099 	movaps	16(%rsp),%xmm3
   1100 	pxor	%xmm4,%xmm10
   1101 	movaps	32(%rsp),%xmm4
   1102 	pxor	%xmm5,%xmm11
   1103 	movaps	48(%rsp),%xmm5
   1104 	pxor	%xmm6,%xmm12
   1105 	movaps	64(%rsp),%xmm6
   1106 	pxor	%xmm7,%xmm13
   1107 	movaps	80(%rsp),%xmm7
   1108 	movdqu	%xmm8,(%rsi)
   1109 	movdqu	%xmm9,16(%rsi)
   1110 	movdqu	%xmm10,32(%rsi)
   1111 	movdqu	%xmm11,48(%rsi)
   1112 	movdqu	%xmm12,64(%rsi)
   1113 	movdqu	%xmm13,80(%rsi)
   1114 	leaq	96(%rsi),%rsi
   1115 
   1116 	subq	$6,%rdx
   1117 	jnc	L$ctr32_loop6
   1118 
   1119 	addq	$6,%rdx
   1120 	jz	L$ctr32_done
   1121 
   1122 	leal	-48(%r10),%eax
   1123 	leaq	-80(%rcx,%r10,1),%rcx
   1124 	negl	%eax
   1125 	shrl	$4,%eax
   1126 	jmp	L$ctr32_tail
   1127 
   1128 .p2align	5
   1129 L$ctr32_loop8:
   1130 	addl	$8,%r8d
   1131 	movdqa	96(%rsp),%xmm8
   1132 .byte	102,15,56,220,209
   1133 	movl	%r8d,%r9d
   1134 	movdqa	112(%rsp),%xmm9
   1135 .byte	102,15,56,220,217
   1136 	bswapl	%r9d
   1137 	movups	32-128(%rcx),%xmm0
   1138 .byte	102,15,56,220,225
   1139 	xorl	%r11d,%r9d
   1140 	nop
   1141 .byte	102,15,56,220,233
   1142 	movl	%r9d,0+12(%rsp)
   1143 	leaq	1(%r8),%r9
   1144 .byte	102,15,56,220,241
   1145 .byte	102,15,56,220,249
   1146 .byte	102,68,15,56,220,193
   1147 .byte	102,68,15,56,220,201
   1148 	movups	48-128(%rcx),%xmm1
   1149 	bswapl	%r9d
   1150 .byte	102,15,56,220,208
   1151 .byte	102,15,56,220,216
   1152 	xorl	%r11d,%r9d
   1153 .byte	0x66,0x90
   1154 .byte	102,15,56,220,224
   1155 .byte	102,15,56,220,232
   1156 	movl	%r9d,16+12(%rsp)
   1157 	leaq	2(%r8),%r9
   1158 .byte	102,15,56,220,240
   1159 .byte	102,15,56,220,248
   1160 .byte	102,68,15,56,220,192
   1161 .byte	102,68,15,56,220,200
   1162 	movups	64-128(%rcx),%xmm0
   1163 	bswapl	%r9d
   1164 .byte	102,15,56,220,209
   1165 .byte	102,15,56,220,217
   1166 	xorl	%r11d,%r9d
   1167 .byte	0x66,0x90
   1168 .byte	102,15,56,220,225
   1169 .byte	102,15,56,220,233
   1170 	movl	%r9d,32+12(%rsp)
   1171 	leaq	3(%r8),%r9
   1172 .byte	102,15,56,220,241
   1173 .byte	102,15,56,220,249
   1174 .byte	102,68,15,56,220,193
   1175 .byte	102,68,15,56,220,201
   1176 	movups	80-128(%rcx),%xmm1
   1177 	bswapl	%r9d
   1178 .byte	102,15,56,220,208
   1179 .byte	102,15,56,220,216
   1180 	xorl	%r11d,%r9d
   1181 .byte	0x66,0x90
   1182 .byte	102,15,56,220,224
   1183 .byte	102,15,56,220,232
   1184 	movl	%r9d,48+12(%rsp)
   1185 	leaq	4(%r8),%r9
   1186 .byte	102,15,56,220,240
   1187 .byte	102,15,56,220,248
   1188 .byte	102,68,15,56,220,192
   1189 .byte	102,68,15,56,220,200
   1190 	movups	96-128(%rcx),%xmm0
   1191 	bswapl	%r9d
   1192 .byte	102,15,56,220,209
   1193 .byte	102,15,56,220,217
   1194 	xorl	%r11d,%r9d
   1195 .byte	0x66,0x90
   1196 .byte	102,15,56,220,225
   1197 .byte	102,15,56,220,233
   1198 	movl	%r9d,64+12(%rsp)
   1199 	leaq	5(%r8),%r9
   1200 .byte	102,15,56,220,241
   1201 .byte	102,15,56,220,249
   1202 .byte	102,68,15,56,220,193
   1203 .byte	102,68,15,56,220,201
   1204 	movups	112-128(%rcx),%xmm1
   1205 	bswapl	%r9d
   1206 .byte	102,15,56,220,208
   1207 .byte	102,15,56,220,216
   1208 	xorl	%r11d,%r9d
   1209 .byte	0x66,0x90
   1210 .byte	102,15,56,220,224
   1211 .byte	102,15,56,220,232
   1212 	movl	%r9d,80+12(%rsp)
   1213 	leaq	6(%r8),%r9
   1214 .byte	102,15,56,220,240
   1215 .byte	102,15,56,220,248
   1216 .byte	102,68,15,56,220,192
   1217 .byte	102,68,15,56,220,200
   1218 	movups	128-128(%rcx),%xmm0
   1219 	bswapl	%r9d
   1220 .byte	102,15,56,220,209
   1221 .byte	102,15,56,220,217
   1222 	xorl	%r11d,%r9d
   1223 .byte	0x66,0x90
   1224 .byte	102,15,56,220,225
   1225 .byte	102,15,56,220,233
   1226 	movl	%r9d,96+12(%rsp)
   1227 	leaq	7(%r8),%r9
   1228 .byte	102,15,56,220,241
   1229 .byte	102,15,56,220,249
   1230 .byte	102,68,15,56,220,193
   1231 .byte	102,68,15,56,220,201
   1232 	movups	144-128(%rcx),%xmm1
   1233 	bswapl	%r9d
   1234 .byte	102,15,56,220,208
   1235 .byte	102,15,56,220,216
   1236 .byte	102,15,56,220,224
   1237 	xorl	%r11d,%r9d
   1238 	movdqu	0(%rdi),%xmm10
   1239 .byte	102,15,56,220,232
   1240 	movl	%r9d,112+12(%rsp)
   1241 	cmpl	$11,%eax
   1242 .byte	102,15,56,220,240
   1243 .byte	102,15,56,220,248
   1244 .byte	102,68,15,56,220,192
   1245 .byte	102,68,15,56,220,200
   1246 	movups	160-128(%rcx),%xmm0
   1247 
   1248 	jb	L$ctr32_enc_done
   1249 
   1250 .byte	102,15,56,220,209
   1251 .byte	102,15,56,220,217
   1252 .byte	102,15,56,220,225
   1253 .byte	102,15,56,220,233
   1254 .byte	102,15,56,220,241
   1255 .byte	102,15,56,220,249
   1256 .byte	102,68,15,56,220,193
   1257 .byte	102,68,15,56,220,201
   1258 	movups	176-128(%rcx),%xmm1
   1259 
   1260 .byte	102,15,56,220,208
   1261 .byte	102,15,56,220,216
   1262 .byte	102,15,56,220,224
   1263 .byte	102,15,56,220,232
   1264 .byte	102,15,56,220,240
   1265 .byte	102,15,56,220,248
   1266 .byte	102,68,15,56,220,192
   1267 .byte	102,68,15,56,220,200
   1268 	movups	192-128(%rcx),%xmm0
   1269 	je	L$ctr32_enc_done
   1270 
   1271 .byte	102,15,56,220,209
   1272 .byte	102,15,56,220,217
   1273 .byte	102,15,56,220,225
   1274 .byte	102,15,56,220,233
   1275 .byte	102,15,56,220,241
   1276 .byte	102,15,56,220,249
   1277 .byte	102,68,15,56,220,193
   1278 .byte	102,68,15,56,220,201
   1279 	movups	208-128(%rcx),%xmm1
   1280 
   1281 .byte	102,15,56,220,208
   1282 .byte	102,15,56,220,216
   1283 .byte	102,15,56,220,224
   1284 .byte	102,15,56,220,232
   1285 .byte	102,15,56,220,240
   1286 .byte	102,15,56,220,248
   1287 .byte	102,68,15,56,220,192
   1288 .byte	102,68,15,56,220,200
   1289 	movups	224-128(%rcx),%xmm0
   1290 	jmp	L$ctr32_enc_done
   1291 
   1292 .p2align	4
   1293 L$ctr32_enc_done:
   1294 	movdqu	16(%rdi),%xmm11
   1295 	pxor	%xmm0,%xmm10
   1296 	movdqu	32(%rdi),%xmm12
   1297 	pxor	%xmm0,%xmm11
   1298 	movdqu	48(%rdi),%xmm13
   1299 	pxor	%xmm0,%xmm12
   1300 	movdqu	64(%rdi),%xmm14
   1301 	pxor	%xmm0,%xmm13
   1302 	movdqu	80(%rdi),%xmm15
   1303 	pxor	%xmm0,%xmm14
   1304 	pxor	%xmm0,%xmm15
   1305 .byte	102,15,56,220,209
   1306 .byte	102,15,56,220,217
   1307 .byte	102,15,56,220,225
   1308 .byte	102,15,56,220,233
   1309 .byte	102,15,56,220,241
   1310 .byte	102,15,56,220,249
   1311 .byte	102,68,15,56,220,193
   1312 .byte	102,68,15,56,220,201
   1313 	movdqu	96(%rdi),%xmm1
   1314 	leaq	128(%rdi),%rdi
   1315 
   1316 .byte	102,65,15,56,221,210
   1317 	pxor	%xmm0,%xmm1
   1318 	movdqu	112-128(%rdi),%xmm10
   1319 .byte	102,65,15,56,221,219
   1320 	pxor	%xmm0,%xmm10
   1321 	movdqa	0(%rsp),%xmm11
   1322 .byte	102,65,15,56,221,228
   1323 .byte	102,65,15,56,221,237
   1324 	movdqa	16(%rsp),%xmm12
   1325 	movdqa	32(%rsp),%xmm13
   1326 .byte	102,65,15,56,221,246
   1327 .byte	102,65,15,56,221,255
   1328 	movdqa	48(%rsp),%xmm14
   1329 	movdqa	64(%rsp),%xmm15
   1330 .byte	102,68,15,56,221,193
   1331 	movdqa	80(%rsp),%xmm0
   1332 	movups	16-128(%rcx),%xmm1
   1333 .byte	102,69,15,56,221,202
   1334 
   1335 	movups	%xmm2,(%rsi)
   1336 	movdqa	%xmm11,%xmm2
   1337 	movups	%xmm3,16(%rsi)
   1338 	movdqa	%xmm12,%xmm3
   1339 	movups	%xmm4,32(%rsi)
   1340 	movdqa	%xmm13,%xmm4
   1341 	movups	%xmm5,48(%rsi)
   1342 	movdqa	%xmm14,%xmm5
   1343 	movups	%xmm6,64(%rsi)
   1344 	movdqa	%xmm15,%xmm6
   1345 	movups	%xmm7,80(%rsi)
   1346 	movdqa	%xmm0,%xmm7
   1347 	movups	%xmm8,96(%rsi)
   1348 	movups	%xmm9,112(%rsi)
   1349 	leaq	128(%rsi),%rsi
   1350 
   1351 	subq	$8,%rdx
   1352 	jnc	L$ctr32_loop8
   1353 
   1354 	addq	$8,%rdx
   1355 	jz	L$ctr32_done
   1356 	leaq	-128(%rcx),%rcx
   1357 
   1358 L$ctr32_tail:
   1359 	leaq	16(%rcx),%rcx
   1360 	cmpq	$4,%rdx
   1361 	jb	L$ctr32_loop3
   1362 	je	L$ctr32_loop4
   1363 
   1364 	shll	$4,%eax
   1365 	movdqa	96(%rsp),%xmm8
   1366 	pxor	%xmm9,%xmm9
   1367 
   1368 	movups	16(%rcx),%xmm0
   1369 .byte	102,15,56,220,209
   1370 .byte	102,15,56,220,217
   1371 	leaq	32-16(%rcx,%rax,1),%rcx
   1372 	negq	%rax
   1373 .byte	102,15,56,220,225
   1374 	addq	$16,%rax
   1375 	movups	(%rdi),%xmm10
   1376 .byte	102,15,56,220,233
   1377 .byte	102,15,56,220,241
   1378 	movups	16(%rdi),%xmm11
   1379 	movups	32(%rdi),%xmm12
   1380 .byte	102,15,56,220,249
   1381 .byte	102,68,15,56,220,193
   1382 
   1383 	call	L$enc_loop8_enter
   1384 
   1385 	movdqu	48(%rdi),%xmm13
   1386 	pxor	%xmm10,%xmm2
   1387 	movdqu	64(%rdi),%xmm10
   1388 	pxor	%xmm11,%xmm3
   1389 	movdqu	%xmm2,(%rsi)
   1390 	pxor	%xmm12,%xmm4
   1391 	movdqu	%xmm3,16(%rsi)
   1392 	pxor	%xmm13,%xmm5
   1393 	movdqu	%xmm4,32(%rsi)
   1394 	pxor	%xmm10,%xmm6
   1395 	movdqu	%xmm5,48(%rsi)
   1396 	movdqu	%xmm6,64(%rsi)
   1397 	cmpq	$6,%rdx
   1398 	jb	L$ctr32_done
   1399 
   1400 	movups	80(%rdi),%xmm11
   1401 	xorps	%xmm11,%xmm7
   1402 	movups	%xmm7,80(%rsi)
   1403 	je	L$ctr32_done
   1404 
   1405 	movups	96(%rdi),%xmm12
   1406 	xorps	%xmm12,%xmm8
   1407 	movups	%xmm8,96(%rsi)
   1408 	jmp	L$ctr32_done
   1409 
   1410 .p2align	5
   1411 L$ctr32_loop4:
   1412 .byte	102,15,56,220,209
   1413 	leaq	16(%rcx),%rcx
   1414 	decl	%eax
   1415 .byte	102,15,56,220,217
   1416 .byte	102,15,56,220,225
   1417 .byte	102,15,56,220,233
   1418 	movups	(%rcx),%xmm1
   1419 	jnz	L$ctr32_loop4
   1420 .byte	102,15,56,221,209
   1421 .byte	102,15,56,221,217
   1422 	movups	(%rdi),%xmm10
   1423 	movups	16(%rdi),%xmm11
   1424 .byte	102,15,56,221,225
   1425 .byte	102,15,56,221,233
   1426 	movups	32(%rdi),%xmm12
   1427 	movups	48(%rdi),%xmm13
   1428 
   1429 	xorps	%xmm10,%xmm2
   1430 	movups	%xmm2,(%rsi)
   1431 	xorps	%xmm11,%xmm3
   1432 	movups	%xmm3,16(%rsi)
   1433 	pxor	%xmm12,%xmm4
   1434 	movdqu	%xmm4,32(%rsi)
   1435 	pxor	%xmm13,%xmm5
   1436 	movdqu	%xmm5,48(%rsi)
   1437 	jmp	L$ctr32_done
   1438 
   1439 .p2align	5
   1440 L$ctr32_loop3:
   1441 .byte	102,15,56,220,209
   1442 	leaq	16(%rcx),%rcx
   1443 	decl	%eax
   1444 .byte	102,15,56,220,217
   1445 .byte	102,15,56,220,225
   1446 	movups	(%rcx),%xmm1
   1447 	jnz	L$ctr32_loop3
   1448 .byte	102,15,56,221,209
   1449 .byte	102,15,56,221,217
   1450 .byte	102,15,56,221,225
   1451 
   1452 	movups	(%rdi),%xmm10
   1453 	xorps	%xmm10,%xmm2
   1454 	movups	%xmm2,(%rsi)
   1455 	cmpq	$2,%rdx
   1456 	jb	L$ctr32_done
   1457 
   1458 	movups	16(%rdi),%xmm11
   1459 	xorps	%xmm11,%xmm3
   1460 	movups	%xmm3,16(%rsi)
   1461 	je	L$ctr32_done
   1462 
   1463 	movups	32(%rdi),%xmm12
   1464 	xorps	%xmm12,%xmm4
   1465 	movups	%xmm4,32(%rsi)
   1466 	jmp	L$ctr32_done
   1467 
   1468 .p2align	4
   1469 L$ctr32_one_shortcut:
   1470 	movups	(%r8),%xmm2
   1471 	movups	(%rdi),%xmm10
   1472 	movl	240(%rcx),%eax
   1473 	movups	(%rcx),%xmm0
   1474 	movups	16(%rcx),%xmm1
   1475 	leaq	32(%rcx),%rcx
   1476 	xorps	%xmm0,%xmm2
   1477 L$oop_enc1_7:
   1478 .byte	102,15,56,220,209
   1479 	decl	%eax
   1480 	movups	(%rcx),%xmm1
   1481 	leaq	16(%rcx),%rcx
   1482 	jnz	L$oop_enc1_7
   1483 .byte	102,15,56,221,209
   1484 	xorps	%xmm10,%xmm2
   1485 	movups	%xmm2,(%rsi)
   1486 	jmp	L$ctr32_done
   1487 
   1488 .p2align	4
   1489 L$ctr32_done:
   1490 	leaq	(%rbp),%rsp
   1491 	popq	%rbp
   1492 L$ctr32_epilogue:
   1493 	.byte	0xf3,0xc3
   1494 
   1495 .globl	_aesni_xts_encrypt
   1496 .private_extern _aesni_xts_encrypt
   1497 
   1498 .p2align	4
   1499 _aesni_xts_encrypt:
   1500 	leaq	(%rsp),%rax
   1501 	pushq	%rbp
   1502 	subq	$112,%rsp
   1503 	andq	$-16,%rsp
   1504 	leaq	-8(%rax),%rbp
   1505 	movups	(%r9),%xmm2
   1506 	movl	240(%r8),%eax
   1507 	movl	240(%rcx),%r10d
   1508 	movups	(%r8),%xmm0
   1509 	movups	16(%r8),%xmm1
   1510 	leaq	32(%r8),%r8
   1511 	xorps	%xmm0,%xmm2
   1512 L$oop_enc1_8:
   1513 .byte	102,15,56,220,209
   1514 	decl	%eax
   1515 	movups	(%r8),%xmm1
   1516 	leaq	16(%r8),%r8
   1517 	jnz	L$oop_enc1_8
   1518 .byte	102,15,56,221,209
   1519 	movups	(%rcx),%xmm0
   1520 	movq	%rcx,%r11
   1521 	movl	%r10d,%eax
   1522 	shll	$4,%r10d
   1523 	movq	%rdx,%r9
   1524 	andq	$-16,%rdx
   1525 
   1526 	movups	16(%rcx,%r10,1),%xmm1
   1527 
   1528 	movdqa	L$xts_magic(%rip),%xmm8
   1529 	movdqa	%xmm2,%xmm15
   1530 	pshufd	$95,%xmm2,%xmm9
   1531 	pxor	%xmm0,%xmm1
   1532 	movdqa	%xmm9,%xmm14
   1533 	paddd	%xmm9,%xmm9
   1534 	movdqa	%xmm15,%xmm10
   1535 	psrad	$31,%xmm14
   1536 	paddq	%xmm15,%xmm15
   1537 	pand	%xmm8,%xmm14
   1538 	pxor	%xmm0,%xmm10
   1539 	pxor	%xmm14,%xmm15
   1540 	movdqa	%xmm9,%xmm14
   1541 	paddd	%xmm9,%xmm9
   1542 	movdqa	%xmm15,%xmm11
   1543 	psrad	$31,%xmm14
   1544 	paddq	%xmm15,%xmm15
   1545 	pand	%xmm8,%xmm14
   1546 	pxor	%xmm0,%xmm11
   1547 	pxor	%xmm14,%xmm15
   1548 	movdqa	%xmm9,%xmm14
   1549 	paddd	%xmm9,%xmm9
   1550 	movdqa	%xmm15,%xmm12
   1551 	psrad	$31,%xmm14
   1552 	paddq	%xmm15,%xmm15
   1553 	pand	%xmm8,%xmm14
   1554 	pxor	%xmm0,%xmm12
   1555 	pxor	%xmm14,%xmm15
   1556 	movdqa	%xmm9,%xmm14
   1557 	paddd	%xmm9,%xmm9
   1558 	movdqa	%xmm15,%xmm13
   1559 	psrad	$31,%xmm14
   1560 	paddq	%xmm15,%xmm15
   1561 	pand	%xmm8,%xmm14
   1562 	pxor	%xmm0,%xmm13
   1563 	pxor	%xmm14,%xmm15
   1564 	movdqa	%xmm15,%xmm14
   1565 	psrad	$31,%xmm9
   1566 	paddq	%xmm15,%xmm15
   1567 	pand	%xmm8,%xmm9
   1568 	pxor	%xmm0,%xmm14
   1569 	pxor	%xmm9,%xmm15
   1570 	movaps	%xmm1,96(%rsp)
   1571 
   1572 	subq	$96,%rdx
   1573 	jc	L$xts_enc_short
   1574 
   1575 	movl	$16+96,%eax
   1576 	leaq	32(%r11,%r10,1),%rcx
   1577 	subq	%r10,%rax
   1578 	movups	16(%r11),%xmm1
   1579 	movq	%rax,%r10
   1580 	leaq	L$xts_magic(%rip),%r8
   1581 	jmp	L$xts_enc_grandloop
   1582 
   1583 .p2align	5
   1584 L$xts_enc_grandloop:
   1585 	movdqu	0(%rdi),%xmm2
   1586 	movdqa	%xmm0,%xmm8
   1587 	movdqu	16(%rdi),%xmm3
   1588 	pxor	%xmm10,%xmm2
   1589 	movdqu	32(%rdi),%xmm4
   1590 	pxor	%xmm11,%xmm3
   1591 .byte	102,15,56,220,209
   1592 	movdqu	48(%rdi),%xmm5
   1593 	pxor	%xmm12,%xmm4
   1594 .byte	102,15,56,220,217
   1595 	movdqu	64(%rdi),%xmm6
   1596 	pxor	%xmm13,%xmm5
   1597 .byte	102,15,56,220,225
   1598 	movdqu	80(%rdi),%xmm7
   1599 	pxor	%xmm15,%xmm8
   1600 	movdqa	96(%rsp),%xmm9
   1601 	pxor	%xmm14,%xmm6
   1602 .byte	102,15,56,220,233
   1603 	movups	32(%r11),%xmm0
   1604 	leaq	96(%rdi),%rdi
   1605 	pxor	%xmm8,%xmm7
   1606 
   1607 	pxor	%xmm9,%xmm10
   1608 .byte	102,15,56,220,241
   1609 	pxor	%xmm9,%xmm11
   1610 	movdqa	%xmm10,0(%rsp)
   1611 .byte	102,15,56,220,249
   1612 	movups	48(%r11),%xmm1
   1613 	pxor	%xmm9,%xmm12
   1614 
   1615 .byte	102,15,56,220,208
   1616 	pxor	%xmm9,%xmm13
   1617 	movdqa	%xmm11,16(%rsp)
   1618 .byte	102,15,56,220,216
   1619 	pxor	%xmm9,%xmm14
   1620 	movdqa	%xmm12,32(%rsp)
   1621 .byte	102,15,56,220,224
   1622 .byte	102,15,56,220,232
   1623 	pxor	%xmm9,%xmm8
   1624 	movdqa	%xmm14,64(%rsp)
   1625 .byte	102,15,56,220,240
   1626 .byte	102,15,56,220,248
   1627 	movups	64(%r11),%xmm0
   1628 	movdqa	%xmm8,80(%rsp)
   1629 	pshufd	$95,%xmm15,%xmm9
   1630 	jmp	L$xts_enc_loop6
   1631 .p2align	5
   1632 L$xts_enc_loop6:
   1633 .byte	102,15,56,220,209
   1634 .byte	102,15,56,220,217
   1635 .byte	102,15,56,220,225
   1636 .byte	102,15,56,220,233
   1637 .byte	102,15,56,220,241
   1638 .byte	102,15,56,220,249
   1639 	movups	-64(%rcx,%rax,1),%xmm1
   1640 	addq	$32,%rax
   1641 
   1642 .byte	102,15,56,220,208
   1643 .byte	102,15,56,220,216
   1644 .byte	102,15,56,220,224
   1645 .byte	102,15,56,220,232
   1646 .byte	102,15,56,220,240
   1647 .byte	102,15,56,220,248
   1648 	movups	-80(%rcx,%rax,1),%xmm0
   1649 	jnz	L$xts_enc_loop6
   1650 
   1651 	movdqa	(%r8),%xmm8
   1652 	movdqa	%xmm9,%xmm14
   1653 	paddd	%xmm9,%xmm9
   1654 .byte	102,15,56,220,209
   1655 	paddq	%xmm15,%xmm15
   1656 	psrad	$31,%xmm14
   1657 .byte	102,15,56,220,217
   1658 	pand	%xmm8,%xmm14
   1659 	movups	(%r11),%xmm10
   1660 .byte	102,15,56,220,225
   1661 .byte	102,15,56,220,233
   1662 .byte	102,15,56,220,241
   1663 	pxor	%xmm14,%xmm15
   1664 	movaps	%xmm10,%xmm11
   1665 .byte	102,15,56,220,249
   1666 	movups	-64(%rcx),%xmm1
   1667 
   1668 	movdqa	%xmm9,%xmm14
   1669 .byte	102,15,56,220,208
   1670 	paddd	%xmm9,%xmm9
   1671 	pxor	%xmm15,%xmm10
   1672 .byte	102,15,56,220,216
   1673 	psrad	$31,%xmm14
   1674 	paddq	%xmm15,%xmm15
   1675 .byte	102,15,56,220,224
   1676 .byte	102,15,56,220,232
   1677 	pand	%xmm8,%xmm14
   1678 	movaps	%xmm11,%xmm12
   1679 .byte	102,15,56,220,240
   1680 	pxor	%xmm14,%xmm15
   1681 	movdqa	%xmm9,%xmm14
   1682 .byte	102,15,56,220,248
   1683 	movups	-48(%rcx),%xmm0
   1684 
   1685 	paddd	%xmm9,%xmm9
   1686 .byte	102,15,56,220,209
   1687 	pxor	%xmm15,%xmm11
   1688 	psrad	$31,%xmm14
   1689 .byte	102,15,56,220,217
   1690 	paddq	%xmm15,%xmm15
   1691 	pand	%xmm8,%xmm14
   1692 .byte	102,15,56,220,225
   1693 .byte	102,15,56,220,233
   1694 	movdqa	%xmm13,48(%rsp)
   1695 	pxor	%xmm14,%xmm15
   1696 .byte	102,15,56,220,241
   1697 	movaps	%xmm12,%xmm13
   1698 	movdqa	%xmm9,%xmm14
   1699 .byte	102,15,56,220,249
   1700 	movups	-32(%rcx),%xmm1
   1701 
   1702 	paddd	%xmm9,%xmm9
   1703 .byte	102,15,56,220,208
   1704 	pxor	%xmm15,%xmm12
   1705 	psrad	$31,%xmm14
   1706 .byte	102,15,56,220,216
   1707 	paddq	%xmm15,%xmm15
   1708 	pand	%xmm8,%xmm14
   1709 .byte	102,15,56,220,224
   1710 .byte	102,15,56,220,232
   1711 .byte	102,15,56,220,240
   1712 	pxor	%xmm14,%xmm15
   1713 	movaps	%xmm13,%xmm14
   1714 .byte	102,15,56,220,248
   1715 
   1716 	movdqa	%xmm9,%xmm0
   1717 	paddd	%xmm9,%xmm9
   1718 .byte	102,15,56,220,209
   1719 	pxor	%xmm15,%xmm13
   1720 	psrad	$31,%xmm0
   1721 .byte	102,15,56,220,217
   1722 	paddq	%xmm15,%xmm15
   1723 	pand	%xmm8,%xmm0
   1724 .byte	102,15,56,220,225
   1725 .byte	102,15,56,220,233
   1726 	pxor	%xmm0,%xmm15
   1727 	movups	(%r11),%xmm0
   1728 .byte	102,15,56,220,241
   1729 .byte	102,15,56,220,249
   1730 	movups	16(%r11),%xmm1
   1731 
   1732 	pxor	%xmm15,%xmm14
   1733 .byte	102,15,56,221,84,36,0
   1734 	psrad	$31,%xmm9
   1735 	paddq	%xmm15,%xmm15
   1736 .byte	102,15,56,221,92,36,16
   1737 .byte	102,15,56,221,100,36,32
   1738 	pand	%xmm8,%xmm9
   1739 	movq	%r10,%rax
   1740 .byte	102,15,56,221,108,36,48
   1741 .byte	102,15,56,221,116,36,64
   1742 .byte	102,15,56,221,124,36,80
   1743 	pxor	%xmm9,%xmm15
   1744 
   1745 	leaq	96(%rsi),%rsi
   1746 	movups	%xmm2,-96(%rsi)
   1747 	movups	%xmm3,-80(%rsi)
   1748 	movups	%xmm4,-64(%rsi)
   1749 	movups	%xmm5,-48(%rsi)
   1750 	movups	%xmm6,-32(%rsi)
   1751 	movups	%xmm7,-16(%rsi)
   1752 	subq	$96,%rdx
   1753 	jnc	L$xts_enc_grandloop
   1754 
   1755 	movl	$16+96,%eax
   1756 	subl	%r10d,%eax
   1757 	movq	%r11,%rcx
   1758 	shrl	$4,%eax
   1759 
   1760 L$xts_enc_short:
   1761 	movl	%eax,%r10d
   1762 	pxor	%xmm0,%xmm10
   1763 	addq	$96,%rdx
   1764 	jz	L$xts_enc_done
   1765 
   1766 	pxor	%xmm0,%xmm11
   1767 	cmpq	$32,%rdx
   1768 	jb	L$xts_enc_one
   1769 	pxor	%xmm0,%xmm12
   1770 	je	L$xts_enc_two
   1771 
   1772 	pxor	%xmm0,%xmm13
   1773 	cmpq	$64,%rdx
   1774 	jb	L$xts_enc_three
   1775 	pxor	%xmm0,%xmm14
   1776 	je	L$xts_enc_four
   1777 
   1778 	movdqu	(%rdi),%xmm2
   1779 	movdqu	16(%rdi),%xmm3
   1780 	movdqu	32(%rdi),%xmm4
   1781 	pxor	%xmm10,%xmm2
   1782 	movdqu	48(%rdi),%xmm5
   1783 	pxor	%xmm11,%xmm3
   1784 	movdqu	64(%rdi),%xmm6
   1785 	leaq	80(%rdi),%rdi
   1786 	pxor	%xmm12,%xmm4
   1787 	pxor	%xmm13,%xmm5
   1788 	pxor	%xmm14,%xmm6
   1789 
   1790 	call	_aesni_encrypt6
   1791 
   1792 	xorps	%xmm10,%xmm2
   1793 	movdqa	%xmm15,%xmm10
   1794 	xorps	%xmm11,%xmm3
   1795 	xorps	%xmm12,%xmm4
   1796 	movdqu	%xmm2,(%rsi)
   1797 	xorps	%xmm13,%xmm5
   1798 	movdqu	%xmm3,16(%rsi)
   1799 	xorps	%xmm14,%xmm6
   1800 	movdqu	%xmm4,32(%rsi)
   1801 	movdqu	%xmm5,48(%rsi)
   1802 	movdqu	%xmm6,64(%rsi)
   1803 	leaq	80(%rsi),%rsi
   1804 	jmp	L$xts_enc_done
   1805 
   1806 .p2align	4
   1807 L$xts_enc_one:
   1808 	movups	(%rdi),%xmm2
   1809 	leaq	16(%rdi),%rdi
   1810 	xorps	%xmm10,%xmm2
   1811 	movups	(%rcx),%xmm0
   1812 	movups	16(%rcx),%xmm1
   1813 	leaq	32(%rcx),%rcx
   1814 	xorps	%xmm0,%xmm2
   1815 L$oop_enc1_9:
   1816 .byte	102,15,56,220,209
   1817 	decl	%eax
   1818 	movups	(%rcx),%xmm1
   1819 	leaq	16(%rcx),%rcx
   1820 	jnz	L$oop_enc1_9
   1821 .byte	102,15,56,221,209
   1822 	xorps	%xmm10,%xmm2
   1823 	movdqa	%xmm11,%xmm10
   1824 	movups	%xmm2,(%rsi)
   1825 	leaq	16(%rsi),%rsi
   1826 	jmp	L$xts_enc_done
   1827 
   1828 .p2align	4
   1829 L$xts_enc_two:
   1830 	movups	(%rdi),%xmm2
   1831 	movups	16(%rdi),%xmm3
   1832 	leaq	32(%rdi),%rdi
   1833 	xorps	%xmm10,%xmm2
   1834 	xorps	%xmm11,%xmm3
   1835 
   1836 	call	_aesni_encrypt2
   1837 
   1838 	xorps	%xmm10,%xmm2
   1839 	movdqa	%xmm12,%xmm10
   1840 	xorps	%xmm11,%xmm3
   1841 	movups	%xmm2,(%rsi)
   1842 	movups	%xmm3,16(%rsi)
   1843 	leaq	32(%rsi),%rsi
   1844 	jmp	L$xts_enc_done
   1845 
   1846 .p2align	4
   1847 L$xts_enc_three:
   1848 	movups	(%rdi),%xmm2
   1849 	movups	16(%rdi),%xmm3
   1850 	movups	32(%rdi),%xmm4
   1851 	leaq	48(%rdi),%rdi
   1852 	xorps	%xmm10,%xmm2
   1853 	xorps	%xmm11,%xmm3
   1854 	xorps	%xmm12,%xmm4
   1855 
   1856 	call	_aesni_encrypt3
   1857 
   1858 	xorps	%xmm10,%xmm2
   1859 	movdqa	%xmm13,%xmm10
   1860 	xorps	%xmm11,%xmm3
   1861 	xorps	%xmm12,%xmm4
   1862 	movups	%xmm2,(%rsi)
   1863 	movups	%xmm3,16(%rsi)
   1864 	movups	%xmm4,32(%rsi)
   1865 	leaq	48(%rsi),%rsi
   1866 	jmp	L$xts_enc_done
   1867 
   1868 .p2align	4
   1869 L$xts_enc_four:
   1870 	movups	(%rdi),%xmm2
   1871 	movups	16(%rdi),%xmm3
   1872 	movups	32(%rdi),%xmm4
   1873 	xorps	%xmm10,%xmm2
   1874 	movups	48(%rdi),%xmm5
   1875 	leaq	64(%rdi),%rdi
   1876 	xorps	%xmm11,%xmm3
   1877 	xorps	%xmm12,%xmm4
   1878 	xorps	%xmm13,%xmm5
   1879 
   1880 	call	_aesni_encrypt4
   1881 
   1882 	pxor	%xmm10,%xmm2
   1883 	movdqa	%xmm14,%xmm10
   1884 	pxor	%xmm11,%xmm3
   1885 	pxor	%xmm12,%xmm4
   1886 	movdqu	%xmm2,(%rsi)
   1887 	pxor	%xmm13,%xmm5
   1888 	movdqu	%xmm3,16(%rsi)
   1889 	movdqu	%xmm4,32(%rsi)
   1890 	movdqu	%xmm5,48(%rsi)
   1891 	leaq	64(%rsi),%rsi
   1892 	jmp	L$xts_enc_done
   1893 
   1894 .p2align	4
   1895 L$xts_enc_done:
   1896 	andq	$15,%r9
   1897 	jz	L$xts_enc_ret
   1898 	movq	%r9,%rdx
   1899 
   1900 L$xts_enc_steal:
   1901 	movzbl	(%rdi),%eax
   1902 	movzbl	-16(%rsi),%ecx
   1903 	leaq	1(%rdi),%rdi
   1904 	movb	%al,-16(%rsi)
   1905 	movb	%cl,0(%rsi)
   1906 	leaq	1(%rsi),%rsi
   1907 	subq	$1,%rdx
   1908 	jnz	L$xts_enc_steal
   1909 
   1910 	subq	%r9,%rsi
   1911 	movq	%r11,%rcx
   1912 	movl	%r10d,%eax
   1913 
   1914 	movups	-16(%rsi),%xmm2
   1915 	xorps	%xmm10,%xmm2
   1916 	movups	(%rcx),%xmm0
   1917 	movups	16(%rcx),%xmm1
   1918 	leaq	32(%rcx),%rcx
   1919 	xorps	%xmm0,%xmm2
   1920 L$oop_enc1_10:
   1921 .byte	102,15,56,220,209
   1922 	decl	%eax
   1923 	movups	(%rcx),%xmm1
   1924 	leaq	16(%rcx),%rcx
   1925 	jnz	L$oop_enc1_10
   1926 .byte	102,15,56,221,209
   1927 	xorps	%xmm10,%xmm2
   1928 	movups	%xmm2,-16(%rsi)
   1929 
   1930 L$xts_enc_ret:
   1931 	leaq	(%rbp),%rsp
   1932 	popq	%rbp
   1933 L$xts_enc_epilogue:
   1934 	.byte	0xf3,0xc3
   1935 
   1936 .globl	_aesni_xts_decrypt
   1937 .private_extern _aesni_xts_decrypt
   1938 
   1939 .p2align	4
   1940 _aesni_xts_decrypt:
   1941 	leaq	(%rsp),%rax
   1942 	pushq	%rbp
   1943 	subq	$112,%rsp
   1944 	andq	$-16,%rsp
   1945 	leaq	-8(%rax),%rbp
   1946 	movups	(%r9),%xmm2
   1947 	movl	240(%r8),%eax
   1948 	movl	240(%rcx),%r10d
   1949 	movups	(%r8),%xmm0
   1950 	movups	16(%r8),%xmm1
   1951 	leaq	32(%r8),%r8
   1952 	xorps	%xmm0,%xmm2
   1953 L$oop_enc1_11:
   1954 .byte	102,15,56,220,209
   1955 	decl	%eax
   1956 	movups	(%r8),%xmm1
   1957 	leaq	16(%r8),%r8
   1958 	jnz	L$oop_enc1_11
   1959 .byte	102,15,56,221,209
   1960 	xorl	%eax,%eax
   1961 	testq	$15,%rdx
   1962 	setnz	%al
   1963 	shlq	$4,%rax
   1964 	subq	%rax,%rdx
   1965 
   1966 	movups	(%rcx),%xmm0
   1967 	movq	%rcx,%r11
   1968 	movl	%r10d,%eax
   1969 	shll	$4,%r10d
   1970 	movq	%rdx,%r9
   1971 	andq	$-16,%rdx
   1972 
   1973 	movups	16(%rcx,%r10,1),%xmm1
   1974 
   1975 	movdqa	L$xts_magic(%rip),%xmm8
   1976 	movdqa	%xmm2,%xmm15
   1977 	pshufd	$95,%xmm2,%xmm9
   1978 	pxor	%xmm0,%xmm1
   1979 	movdqa	%xmm9,%xmm14
   1980 	paddd	%xmm9,%xmm9
   1981 	movdqa	%xmm15,%xmm10
   1982 	psrad	$31,%xmm14
   1983 	paddq	%xmm15,%xmm15
   1984 	pand	%xmm8,%xmm14
   1985 	pxor	%xmm0,%xmm10
   1986 	pxor	%xmm14,%xmm15
   1987 	movdqa	%xmm9,%xmm14
   1988 	paddd	%xmm9,%xmm9
   1989 	movdqa	%xmm15,%xmm11
   1990 	psrad	$31,%xmm14
   1991 	paddq	%xmm15,%xmm15
   1992 	pand	%xmm8,%xmm14
   1993 	pxor	%xmm0,%xmm11
   1994 	pxor	%xmm14,%xmm15
   1995 	movdqa	%xmm9,%xmm14
   1996 	paddd	%xmm9,%xmm9
   1997 	movdqa	%xmm15,%xmm12
   1998 	psrad	$31,%xmm14
   1999 	paddq	%xmm15,%xmm15
   2000 	pand	%xmm8,%xmm14
   2001 	pxor	%xmm0,%xmm12
   2002 	pxor	%xmm14,%xmm15
   2003 	movdqa	%xmm9,%xmm14
   2004 	paddd	%xmm9,%xmm9
   2005 	movdqa	%xmm15,%xmm13
   2006 	psrad	$31,%xmm14
   2007 	paddq	%xmm15,%xmm15
   2008 	pand	%xmm8,%xmm14
   2009 	pxor	%xmm0,%xmm13
   2010 	pxor	%xmm14,%xmm15
   2011 	movdqa	%xmm15,%xmm14
   2012 	psrad	$31,%xmm9
   2013 	paddq	%xmm15,%xmm15
   2014 	pand	%xmm8,%xmm9
   2015 	pxor	%xmm0,%xmm14
   2016 	pxor	%xmm9,%xmm15
   2017 	movaps	%xmm1,96(%rsp)
   2018 
   2019 	subq	$96,%rdx
   2020 	jc	L$xts_dec_short
   2021 
   2022 	movl	$16+96,%eax
   2023 	leaq	32(%r11,%r10,1),%rcx
   2024 	subq	%r10,%rax
   2025 	movups	16(%r11),%xmm1
   2026 	movq	%rax,%r10
   2027 	leaq	L$xts_magic(%rip),%r8
   2028 	jmp	L$xts_dec_grandloop
   2029 
   2030 .p2align	5
   2031 L$xts_dec_grandloop:
   2032 	movdqu	0(%rdi),%xmm2
   2033 	movdqa	%xmm0,%xmm8
   2034 	movdqu	16(%rdi),%xmm3
   2035 	pxor	%xmm10,%xmm2
   2036 	movdqu	32(%rdi),%xmm4
   2037 	pxor	%xmm11,%xmm3
   2038 .byte	102,15,56,222,209
   2039 	movdqu	48(%rdi),%xmm5
   2040 	pxor	%xmm12,%xmm4
   2041 .byte	102,15,56,222,217
   2042 	movdqu	64(%rdi),%xmm6
   2043 	pxor	%xmm13,%xmm5
   2044 .byte	102,15,56,222,225
   2045 	movdqu	80(%rdi),%xmm7
   2046 	pxor	%xmm15,%xmm8
   2047 	movdqa	96(%rsp),%xmm9
   2048 	pxor	%xmm14,%xmm6
   2049 .byte	102,15,56,222,233
   2050 	movups	32(%r11),%xmm0
   2051 	leaq	96(%rdi),%rdi
   2052 	pxor	%xmm8,%xmm7
   2053 
   2054 	pxor	%xmm9,%xmm10
   2055 .byte	102,15,56,222,241
   2056 	pxor	%xmm9,%xmm11
   2057 	movdqa	%xmm10,0(%rsp)
   2058 .byte	102,15,56,222,249
   2059 	movups	48(%r11),%xmm1
   2060 	pxor	%xmm9,%xmm12
   2061 
   2062 .byte	102,15,56,222,208
   2063 	pxor	%xmm9,%xmm13
   2064 	movdqa	%xmm11,16(%rsp)
   2065 .byte	102,15,56,222,216
   2066 	pxor	%xmm9,%xmm14
   2067 	movdqa	%xmm12,32(%rsp)
   2068 .byte	102,15,56,222,224
   2069 .byte	102,15,56,222,232
   2070 	pxor	%xmm9,%xmm8
   2071 	movdqa	%xmm14,64(%rsp)
   2072 .byte	102,15,56,222,240
   2073 .byte	102,15,56,222,248
   2074 	movups	64(%r11),%xmm0
   2075 	movdqa	%xmm8,80(%rsp)
   2076 	pshufd	$95,%xmm15,%xmm9
   2077 	jmp	L$xts_dec_loop6
   2078 .p2align	5
   2079 L$xts_dec_loop6:
   2080 .byte	102,15,56,222,209
   2081 .byte	102,15,56,222,217
   2082 .byte	102,15,56,222,225
   2083 .byte	102,15,56,222,233
   2084 .byte	102,15,56,222,241
   2085 .byte	102,15,56,222,249
   2086 	movups	-64(%rcx,%rax,1),%xmm1
   2087 	addq	$32,%rax
   2088 
   2089 .byte	102,15,56,222,208
   2090 .byte	102,15,56,222,216
   2091 .byte	102,15,56,222,224
   2092 .byte	102,15,56,222,232
   2093 .byte	102,15,56,222,240
   2094 .byte	102,15,56,222,248
   2095 	movups	-80(%rcx,%rax,1),%xmm0
   2096 	jnz	L$xts_dec_loop6
   2097 
   2098 	movdqa	(%r8),%xmm8
   2099 	movdqa	%xmm9,%xmm14
   2100 	paddd	%xmm9,%xmm9
   2101 .byte	102,15,56,222,209
   2102 	paddq	%xmm15,%xmm15
   2103 	psrad	$31,%xmm14
   2104 .byte	102,15,56,222,217
   2105 	pand	%xmm8,%xmm14
   2106 	movups	(%r11),%xmm10
   2107 .byte	102,15,56,222,225
   2108 .byte	102,15,56,222,233
   2109 .byte	102,15,56,222,241
   2110 	pxor	%xmm14,%xmm15
   2111 	movaps	%xmm10,%xmm11
   2112 .byte	102,15,56,222,249
   2113 	movups	-64(%rcx),%xmm1
   2114 
   2115 	movdqa	%xmm9,%xmm14
   2116 .byte	102,15,56,222,208
   2117 	paddd	%xmm9,%xmm9
   2118 	pxor	%xmm15,%xmm10
   2119 .byte	102,15,56,222,216
   2120 	psrad	$31,%xmm14
   2121 	paddq	%xmm15,%xmm15
   2122 .byte	102,15,56,222,224
   2123 .byte	102,15,56,222,232
   2124 	pand	%xmm8,%xmm14
   2125 	movaps	%xmm11,%xmm12
   2126 .byte	102,15,56,222,240
   2127 	pxor	%xmm14,%xmm15
   2128 	movdqa	%xmm9,%xmm14
   2129 .byte	102,15,56,222,248
   2130 	movups	-48(%rcx),%xmm0
   2131 
   2132 	paddd	%xmm9,%xmm9
   2133 .byte	102,15,56,222,209
   2134 	pxor	%xmm15,%xmm11
   2135 	psrad	$31,%xmm14
   2136 .byte	102,15,56,222,217
   2137 	paddq	%xmm15,%xmm15
   2138 	pand	%xmm8,%xmm14
   2139 .byte	102,15,56,222,225
   2140 .byte	102,15,56,222,233
   2141 	movdqa	%xmm13,48(%rsp)
   2142 	pxor	%xmm14,%xmm15
   2143 .byte	102,15,56,222,241
   2144 	movaps	%xmm12,%xmm13
   2145 	movdqa	%xmm9,%xmm14
   2146 .byte	102,15,56,222,249
   2147 	movups	-32(%rcx),%xmm1
   2148 
   2149 	paddd	%xmm9,%xmm9
   2150 .byte	102,15,56,222,208
   2151 	pxor	%xmm15,%xmm12
   2152 	psrad	$31,%xmm14
   2153 .byte	102,15,56,222,216
   2154 	paddq	%xmm15,%xmm15
   2155 	pand	%xmm8,%xmm14
   2156 .byte	102,15,56,222,224
   2157 .byte	102,15,56,222,232
   2158 .byte	102,15,56,222,240
   2159 	pxor	%xmm14,%xmm15
   2160 	movaps	%xmm13,%xmm14
   2161 .byte	102,15,56,222,248
   2162 
   2163 	movdqa	%xmm9,%xmm0
   2164 	paddd	%xmm9,%xmm9
   2165 .byte	102,15,56,222,209
   2166 	pxor	%xmm15,%xmm13
   2167 	psrad	$31,%xmm0
   2168 .byte	102,15,56,222,217
   2169 	paddq	%xmm15,%xmm15
   2170 	pand	%xmm8,%xmm0
   2171 .byte	102,15,56,222,225
   2172 .byte	102,15,56,222,233
   2173 	pxor	%xmm0,%xmm15
   2174 	movups	(%r11),%xmm0
   2175 .byte	102,15,56,222,241
   2176 .byte	102,15,56,222,249
   2177 	movups	16(%r11),%xmm1
   2178 
   2179 	pxor	%xmm15,%xmm14
   2180 .byte	102,15,56,223,84,36,0
   2181 	psrad	$31,%xmm9
   2182 	paddq	%xmm15,%xmm15
   2183 .byte	102,15,56,223,92,36,16
   2184 .byte	102,15,56,223,100,36,32
   2185 	pand	%xmm8,%xmm9
   2186 	movq	%r10,%rax
   2187 .byte	102,15,56,223,108,36,48
   2188 .byte	102,15,56,223,116,36,64
   2189 .byte	102,15,56,223,124,36,80
   2190 	pxor	%xmm9,%xmm15
   2191 
   2192 	leaq	96(%rsi),%rsi
   2193 	movups	%xmm2,-96(%rsi)
   2194 	movups	%xmm3,-80(%rsi)
   2195 	movups	%xmm4,-64(%rsi)
   2196 	movups	%xmm5,-48(%rsi)
   2197 	movups	%xmm6,-32(%rsi)
   2198 	movups	%xmm7,-16(%rsi)
   2199 	subq	$96,%rdx
   2200 	jnc	L$xts_dec_grandloop
   2201 
   2202 	movl	$16+96,%eax
   2203 	subl	%r10d,%eax
   2204 	movq	%r11,%rcx
   2205 	shrl	$4,%eax
   2206 
   2207 L$xts_dec_short:
   2208 	movl	%eax,%r10d
   2209 	pxor	%xmm0,%xmm10
   2210 	pxor	%xmm0,%xmm11
   2211 	addq	$96,%rdx
   2212 	jz	L$xts_dec_done
   2213 
   2214 	pxor	%xmm0,%xmm12
   2215 	cmpq	$32,%rdx
   2216 	jb	L$xts_dec_one
   2217 	pxor	%xmm0,%xmm13
   2218 	je	L$xts_dec_two
   2219 
   2220 	pxor	%xmm0,%xmm14
   2221 	cmpq	$64,%rdx
   2222 	jb	L$xts_dec_three
   2223 	je	L$xts_dec_four
   2224 
   2225 	movdqu	(%rdi),%xmm2
   2226 	movdqu	16(%rdi),%xmm3
   2227 	movdqu	32(%rdi),%xmm4
   2228 	pxor	%xmm10,%xmm2
   2229 	movdqu	48(%rdi),%xmm5
   2230 	pxor	%xmm11,%xmm3
   2231 	movdqu	64(%rdi),%xmm6
   2232 	leaq	80(%rdi),%rdi
   2233 	pxor	%xmm12,%xmm4
   2234 	pxor	%xmm13,%xmm5
   2235 	pxor	%xmm14,%xmm6
   2236 
   2237 	call	_aesni_decrypt6
   2238 
   2239 	xorps	%xmm10,%xmm2
   2240 	xorps	%xmm11,%xmm3
   2241 	xorps	%xmm12,%xmm4
   2242 	movdqu	%xmm2,(%rsi)
   2243 	xorps	%xmm13,%xmm5
   2244 	movdqu	%xmm3,16(%rsi)
   2245 	xorps	%xmm14,%xmm6
   2246 	movdqu	%xmm4,32(%rsi)
   2247 	pxor	%xmm14,%xmm14
   2248 	movdqu	%xmm5,48(%rsi)
   2249 	pcmpgtd	%xmm15,%xmm14
   2250 	movdqu	%xmm6,64(%rsi)
   2251 	leaq	80(%rsi),%rsi
   2252 	pshufd	$19,%xmm14,%xmm11
   2253 	andq	$15,%r9
   2254 	jz	L$xts_dec_ret
   2255 
   2256 	movdqa	%xmm15,%xmm10
   2257 	paddq	%xmm15,%xmm15
   2258 	pand	%xmm8,%xmm11
   2259 	pxor	%xmm15,%xmm11
   2260 	jmp	L$xts_dec_done2
   2261 
   2262 .p2align	4
   2263 L$xts_dec_one:
   2264 	movups	(%rdi),%xmm2
   2265 	leaq	16(%rdi),%rdi
   2266 	xorps	%xmm10,%xmm2
   2267 	movups	(%rcx),%xmm0
   2268 	movups	16(%rcx),%xmm1
   2269 	leaq	32(%rcx),%rcx
   2270 	xorps	%xmm0,%xmm2
   2271 L$oop_dec1_12:
   2272 .byte	102,15,56,222,209
   2273 	decl	%eax
   2274 	movups	(%rcx),%xmm1
   2275 	leaq	16(%rcx),%rcx
   2276 	jnz	L$oop_dec1_12
   2277 .byte	102,15,56,223,209
   2278 	xorps	%xmm10,%xmm2
   2279 	movdqa	%xmm11,%xmm10
   2280 	movups	%xmm2,(%rsi)
   2281 	movdqa	%xmm12,%xmm11
   2282 	leaq	16(%rsi),%rsi
   2283 	jmp	L$xts_dec_done
   2284 
   2285 .p2align	4
   2286 L$xts_dec_two:
   2287 	movups	(%rdi),%xmm2
   2288 	movups	16(%rdi),%xmm3
   2289 	leaq	32(%rdi),%rdi
   2290 	xorps	%xmm10,%xmm2
   2291 	xorps	%xmm11,%xmm3
   2292 
   2293 	call	_aesni_decrypt2
   2294 
   2295 	xorps	%xmm10,%xmm2
   2296 	movdqa	%xmm12,%xmm10
   2297 	xorps	%xmm11,%xmm3
   2298 	movdqa	%xmm13,%xmm11
   2299 	movups	%xmm2,(%rsi)
   2300 	movups	%xmm3,16(%rsi)
   2301 	leaq	32(%rsi),%rsi
   2302 	jmp	L$xts_dec_done
   2303 
   2304 .p2align	4
   2305 L$xts_dec_three:
   2306 	movups	(%rdi),%xmm2
   2307 	movups	16(%rdi),%xmm3
   2308 	movups	32(%rdi),%xmm4
   2309 	leaq	48(%rdi),%rdi
   2310 	xorps	%xmm10,%xmm2
   2311 	xorps	%xmm11,%xmm3
   2312 	xorps	%xmm12,%xmm4
   2313 
   2314 	call	_aesni_decrypt3
   2315 
   2316 	xorps	%xmm10,%xmm2
   2317 	movdqa	%xmm13,%xmm10
   2318 	xorps	%xmm11,%xmm3
   2319 	movdqa	%xmm14,%xmm11
   2320 	xorps	%xmm12,%xmm4
   2321 	movups	%xmm2,(%rsi)
   2322 	movups	%xmm3,16(%rsi)
   2323 	movups	%xmm4,32(%rsi)
   2324 	leaq	48(%rsi),%rsi
   2325 	jmp	L$xts_dec_done
   2326 
   2327 .p2align	4
   2328 L$xts_dec_four:
   2329 	movups	(%rdi),%xmm2
   2330 	movups	16(%rdi),%xmm3
   2331 	movups	32(%rdi),%xmm4
   2332 	xorps	%xmm10,%xmm2
   2333 	movups	48(%rdi),%xmm5
   2334 	leaq	64(%rdi),%rdi
   2335 	xorps	%xmm11,%xmm3
   2336 	xorps	%xmm12,%xmm4
   2337 	xorps	%xmm13,%xmm5
   2338 
   2339 	call	_aesni_decrypt4
   2340 
   2341 	pxor	%xmm10,%xmm2
   2342 	movdqa	%xmm14,%xmm10
   2343 	pxor	%xmm11,%xmm3
   2344 	movdqa	%xmm15,%xmm11
   2345 	pxor	%xmm12,%xmm4
   2346 	movdqu	%xmm2,(%rsi)
   2347 	pxor	%xmm13,%xmm5
   2348 	movdqu	%xmm3,16(%rsi)
   2349 	movdqu	%xmm4,32(%rsi)
   2350 	movdqu	%xmm5,48(%rsi)
   2351 	leaq	64(%rsi),%rsi
   2352 	jmp	L$xts_dec_done
   2353 
   2354 .p2align	4
   2355 L$xts_dec_done:
   2356 	andq	$15,%r9
   2357 	jz	L$xts_dec_ret
   2358 L$xts_dec_done2:
   2359 	movq	%r9,%rdx
   2360 	movq	%r11,%rcx
   2361 	movl	%r10d,%eax
   2362 
   2363 	movups	(%rdi),%xmm2
   2364 	xorps	%xmm11,%xmm2
   2365 	movups	(%rcx),%xmm0
   2366 	movups	16(%rcx),%xmm1
   2367 	leaq	32(%rcx),%rcx
   2368 	xorps	%xmm0,%xmm2
   2369 L$oop_dec1_13:
   2370 .byte	102,15,56,222,209
   2371 	decl	%eax
   2372 	movups	(%rcx),%xmm1
   2373 	leaq	16(%rcx),%rcx
   2374 	jnz	L$oop_dec1_13
   2375 .byte	102,15,56,223,209
   2376 	xorps	%xmm11,%xmm2
   2377 	movups	%xmm2,(%rsi)
   2378 
   2379 L$xts_dec_steal:
   2380 	movzbl	16(%rdi),%eax
   2381 	movzbl	(%rsi),%ecx
   2382 	leaq	1(%rdi),%rdi
   2383 	movb	%al,(%rsi)
   2384 	movb	%cl,16(%rsi)
   2385 	leaq	1(%rsi),%rsi
   2386 	subq	$1,%rdx
   2387 	jnz	L$xts_dec_steal
   2388 
   2389 	subq	%r9,%rsi
   2390 	movq	%r11,%rcx
   2391 	movl	%r10d,%eax
   2392 
   2393 	movups	(%rsi),%xmm2
   2394 	xorps	%xmm10,%xmm2
   2395 	movups	(%rcx),%xmm0
   2396 	movups	16(%rcx),%xmm1
   2397 	leaq	32(%rcx),%rcx
   2398 	xorps	%xmm0,%xmm2
   2399 L$oop_dec1_14:
   2400 .byte	102,15,56,222,209
   2401 	decl	%eax
   2402 	movups	(%rcx),%xmm1
   2403 	leaq	16(%rcx),%rcx
   2404 	jnz	L$oop_dec1_14
   2405 .byte	102,15,56,223,209
   2406 	xorps	%xmm10,%xmm2
   2407 	movups	%xmm2,(%rsi)
   2408 
   2409 L$xts_dec_ret:
   2410 	leaq	(%rbp),%rsp
   2411 	popq	%rbp
   2412 L$xts_dec_epilogue:
   2413 	.byte	0xf3,0xc3
   2414 
   2415 .globl	_aesni_cbc_encrypt
   2416 .private_extern _aesni_cbc_encrypt
   2417 
   2418 .p2align	4
   2419 _aesni_cbc_encrypt:
   2420 	testq	%rdx,%rdx
   2421 	jz	L$cbc_ret
   2422 
   2423 	movl	240(%rcx),%r10d
   2424 	movq	%rcx,%r11
   2425 	testl	%r9d,%r9d
   2426 	jz	L$cbc_decrypt
   2427 
   2428 	movups	(%r8),%xmm2
   2429 	movl	%r10d,%eax
   2430 	cmpq	$16,%rdx
   2431 	jb	L$cbc_enc_tail
   2432 	subq	$16,%rdx
   2433 	jmp	L$cbc_enc_loop
   2434 .p2align	4
   2435 L$cbc_enc_loop:
   2436 	movups	(%rdi),%xmm3
   2437 	leaq	16(%rdi),%rdi
   2438 
   2439 	movups	(%rcx),%xmm0
   2440 	movups	16(%rcx),%xmm1
   2441 	xorps	%xmm0,%xmm3
   2442 	leaq	32(%rcx),%rcx
   2443 	xorps	%xmm3,%xmm2
   2444 L$oop_enc1_15:
   2445 .byte	102,15,56,220,209
   2446 	decl	%eax
   2447 	movups	(%rcx),%xmm1
   2448 	leaq	16(%rcx),%rcx
   2449 	jnz	L$oop_enc1_15
   2450 .byte	102,15,56,221,209
   2451 	movl	%r10d,%eax
   2452 	movq	%r11,%rcx
   2453 	movups	%xmm2,0(%rsi)
   2454 	leaq	16(%rsi),%rsi
   2455 	subq	$16,%rdx
   2456 	jnc	L$cbc_enc_loop
   2457 	addq	$16,%rdx
   2458 	jnz	L$cbc_enc_tail
   2459 	movups	%xmm2,(%r8)
   2460 	jmp	L$cbc_ret
   2461 
   2462 L$cbc_enc_tail:
   2463 	movq	%rdx,%rcx
   2464 	xchgq	%rdi,%rsi
   2465 .long	0x9066A4F3
   2466 	movl	$16,%ecx
   2467 	subq	%rdx,%rcx
   2468 	xorl	%eax,%eax
   2469 .long	0x9066AAF3
   2470 	leaq	-16(%rdi),%rdi
   2471 	movl	%r10d,%eax
   2472 	movq	%rdi,%rsi
   2473 	movq	%r11,%rcx
   2474 	xorq	%rdx,%rdx
   2475 	jmp	L$cbc_enc_loop
   2476 
   2477 .p2align	4
   2478 L$cbc_decrypt:
   2479 	leaq	(%rsp),%rax
   2480 	pushq	%rbp
   2481 	subq	$16,%rsp
   2482 	andq	$-16,%rsp
   2483 	leaq	-8(%rax),%rbp
   2484 	movups	(%r8),%xmm10
   2485 	movl	%r10d,%eax
   2486 	cmpq	$80,%rdx
   2487 	jbe	L$cbc_dec_tail
   2488 
   2489 	movups	(%rcx),%xmm0
   2490 	movdqu	0(%rdi),%xmm2
   2491 	movdqu	16(%rdi),%xmm3
   2492 	movdqa	%xmm2,%xmm11
   2493 	movdqu	32(%rdi),%xmm4
   2494 	movdqa	%xmm3,%xmm12
   2495 	movdqu	48(%rdi),%xmm5
   2496 	movdqa	%xmm4,%xmm13
   2497 	movdqu	64(%rdi),%xmm6
   2498 	movdqa	%xmm5,%xmm14
   2499 	movdqu	80(%rdi),%xmm7
   2500 	movdqa	%xmm6,%xmm15
   2501 	movl	_OPENSSL_ia32cap_P+4(%rip),%r9d
   2502 	cmpq	$112,%rdx
   2503 	jbe	L$cbc_dec_six_or_seven
   2504 
   2505 	andl	$71303168,%r9d
   2506 	subq	$80,%rdx
   2507 	cmpl	$4194304,%r9d
   2508 	je	L$cbc_dec_loop6_enter
   2509 	subq	$32,%rdx
   2510 	leaq	112(%rcx),%rcx
   2511 	jmp	L$cbc_dec_loop8_enter
   2512 .p2align	4
   2513 L$cbc_dec_loop8:
   2514 	movups	%xmm9,(%rsi)
   2515 	leaq	16(%rsi),%rsi
   2516 L$cbc_dec_loop8_enter:
   2517 	movdqu	96(%rdi),%xmm8
   2518 	pxor	%xmm0,%xmm2
   2519 	movdqu	112(%rdi),%xmm9
   2520 	pxor	%xmm0,%xmm3
   2521 	movups	16-112(%rcx),%xmm1
   2522 	pxor	%xmm0,%xmm4
   2523 	xorq	%r11,%r11
   2524 	cmpq	$112,%rdx
   2525 	pxor	%xmm0,%xmm5
   2526 	pxor	%xmm0,%xmm6
   2527 	pxor	%xmm0,%xmm7
   2528 	pxor	%xmm0,%xmm8
   2529 
   2530 .byte	102,15,56,222,209
   2531 	pxor	%xmm0,%xmm9
   2532 	movups	32-112(%rcx),%xmm0
   2533 .byte	102,15,56,222,217
   2534 .byte	102,15,56,222,225
   2535 .byte	102,15,56,222,233
   2536 .byte	102,15,56,222,241
   2537 .byte	102,15,56,222,249
   2538 .byte	102,68,15,56,222,193
   2539 	setnc	%r11b
   2540 	shlq	$7,%r11
   2541 .byte	102,68,15,56,222,201
   2542 	addq	%rdi,%r11
   2543 	movups	48-112(%rcx),%xmm1
   2544 .byte	102,15,56,222,208
   2545 .byte	102,15,56,222,216
   2546 .byte	102,15,56,222,224
   2547 .byte	102,15,56,222,232
   2548 .byte	102,15,56,222,240
   2549 .byte	102,15,56,222,248
   2550 .byte	102,68,15,56,222,192
   2551 .byte	102,68,15,56,222,200
   2552 	movups	64-112(%rcx),%xmm0
   2553 	nop
   2554 .byte	102,15,56,222,209
   2555 .byte	102,15,56,222,217
   2556 .byte	102,15,56,222,225
   2557 .byte	102,15,56,222,233
   2558 .byte	102,15,56,222,241
   2559 .byte	102,15,56,222,249
   2560 .byte	102,68,15,56,222,193
   2561 .byte	102,68,15,56,222,201
   2562 	movups	80-112(%rcx),%xmm1
   2563 	nop
   2564 .byte	102,15,56,222,208
   2565 .byte	102,15,56,222,216
   2566 .byte	102,15,56,222,224
   2567 .byte	102,15,56,222,232
   2568 .byte	102,15,56,222,240
   2569 .byte	102,15,56,222,248
   2570 .byte	102,68,15,56,222,192
   2571 .byte	102,68,15,56,222,200
   2572 	movups	96-112(%rcx),%xmm0
   2573 	nop
   2574 .byte	102,15,56,222,209
   2575 .byte	102,15,56,222,217
   2576 .byte	102,15,56,222,225
   2577 .byte	102,15,56,222,233
   2578 .byte	102,15,56,222,241
   2579 .byte	102,15,56,222,249
   2580 .byte	102,68,15,56,222,193
   2581 .byte	102,68,15,56,222,201
   2582 	movups	112-112(%rcx),%xmm1
   2583 	nop
   2584 .byte	102,15,56,222,208
   2585 .byte	102,15,56,222,216
   2586 .byte	102,15,56,222,224
   2587 .byte	102,15,56,222,232
   2588 .byte	102,15,56,222,240
   2589 .byte	102,15,56,222,248
   2590 .byte	102,68,15,56,222,192
   2591 .byte	102,68,15,56,222,200
   2592 	movups	128-112(%rcx),%xmm0
   2593 	nop
   2594 .byte	102,15,56,222,209
   2595 .byte	102,15,56,222,217
   2596 .byte	102,15,56,222,225
   2597 .byte	102,15,56,222,233
   2598 .byte	102,15,56,222,241
   2599 .byte	102,15,56,222,249
   2600 .byte	102,68,15,56,222,193
   2601 .byte	102,68,15,56,222,201
   2602 	movups	144-112(%rcx),%xmm1
   2603 	cmpl	$11,%eax
   2604 .byte	102,15,56,222,208
   2605 .byte	102,15,56,222,216
   2606 .byte	102,15,56,222,224
   2607 .byte	102,15,56,222,232
   2608 .byte	102,15,56,222,240
   2609 .byte	102,15,56,222,248
   2610 .byte	102,68,15,56,222,192
   2611 .byte	102,68,15,56,222,200
   2612 	movups	160-112(%rcx),%xmm0
   2613 	jb	L$cbc_dec_done
   2614 .byte	102,15,56,222,209
   2615 .byte	102,15,56,222,217
   2616 .byte	102,15,56,222,225
   2617 .byte	102,15,56,222,233
   2618 .byte	102,15,56,222,241
   2619 .byte	102,15,56,222,249
   2620 .byte	102,68,15,56,222,193
   2621 .byte	102,68,15,56,222,201
   2622 	movups	176-112(%rcx),%xmm1
   2623 	nop
   2624 .byte	102,15,56,222,208
   2625 .byte	102,15,56,222,216
   2626 .byte	102,15,56,222,224
   2627 .byte	102,15,56,222,232
   2628 .byte	102,15,56,222,240
   2629 .byte	102,15,56,222,248
   2630 .byte	102,68,15,56,222,192
   2631 .byte	102,68,15,56,222,200
   2632 	movups	192-112(%rcx),%xmm0
   2633 	je	L$cbc_dec_done
   2634 .byte	102,15,56,222,209
   2635 .byte	102,15,56,222,217
   2636 .byte	102,15,56,222,225
   2637 .byte	102,15,56,222,233
   2638 .byte	102,15,56,222,241
   2639 .byte	102,15,56,222,249
   2640 .byte	102,68,15,56,222,193
   2641 .byte	102,68,15,56,222,201
   2642 	movups	208-112(%rcx),%xmm1
   2643 	nop
   2644 .byte	102,15,56,222,208
   2645 .byte	102,15,56,222,216
   2646 .byte	102,15,56,222,224
   2647 .byte	102,15,56,222,232
   2648 .byte	102,15,56,222,240
   2649 .byte	102,15,56,222,248
   2650 .byte	102,68,15,56,222,192
   2651 .byte	102,68,15,56,222,200
   2652 	movups	224-112(%rcx),%xmm0
   2653 	jmp	L$cbc_dec_done
   2654 .p2align	4
   2655 L$cbc_dec_done:
   2656 .byte	102,15,56,222,209
   2657 .byte	102,15,56,222,217
   2658 	pxor	%xmm0,%xmm10
   2659 	pxor	%xmm0,%xmm11
   2660 .byte	102,15,56,222,225
   2661 .byte	102,15,56,222,233
   2662 	pxor	%xmm0,%xmm12
   2663 	pxor	%xmm0,%xmm13
   2664 .byte	102,15,56,222,241
   2665 .byte	102,15,56,222,249
   2666 	pxor	%xmm0,%xmm14
   2667 	pxor	%xmm0,%xmm15
   2668 .byte	102,68,15,56,222,193
   2669 .byte	102,68,15,56,222,201
   2670 	movdqu	80(%rdi),%xmm1
   2671 
   2672 .byte	102,65,15,56,223,210
   2673 	movdqu	96(%rdi),%xmm10
   2674 	pxor	%xmm0,%xmm1
   2675 .byte	102,65,15,56,223,219
   2676 	pxor	%xmm0,%xmm10
   2677 	movdqu	112(%rdi),%xmm0
   2678 .byte	102,65,15,56,223,228
   2679 	leaq	128(%rdi),%rdi
   2680 	movdqu	0(%r11),%xmm11
   2681 .byte	102,65,15,56,223,237
   2682 .byte	102,65,15,56,223,246
   2683 	movdqu	16(%r11),%xmm12
   2684 	movdqu	32(%r11),%xmm13
   2685 .byte	102,65,15,56,223,255
   2686 .byte	102,68,15,56,223,193
   2687 	movdqu	48(%r11),%xmm14
   2688 	movdqu	64(%r11),%xmm15
   2689 .byte	102,69,15,56,223,202
   2690 	movdqa	%xmm0,%xmm10
   2691 	movdqu	80(%r11),%xmm1
   2692 	movups	-112(%rcx),%xmm0
   2693 
   2694 	movups	%xmm2,(%rsi)
   2695 	movdqa	%xmm11,%xmm2
   2696 	movups	%xmm3,16(%rsi)
   2697 	movdqa	%xmm12,%xmm3
   2698 	movups	%xmm4,32(%rsi)
   2699 	movdqa	%xmm13,%xmm4
   2700 	movups	%xmm5,48(%rsi)
   2701 	movdqa	%xmm14,%xmm5
   2702 	movups	%xmm6,64(%rsi)
   2703 	movdqa	%xmm15,%xmm6
   2704 	movups	%xmm7,80(%rsi)
   2705 	movdqa	%xmm1,%xmm7
   2706 	movups	%xmm8,96(%rsi)
   2707 	leaq	112(%rsi),%rsi
   2708 
   2709 	subq	$128,%rdx
   2710 	ja	L$cbc_dec_loop8
   2711 
   2712 	movaps	%xmm9,%xmm2
   2713 	leaq	-112(%rcx),%rcx
   2714 	addq	$112,%rdx
   2715 	jle	L$cbc_dec_tail_collected
   2716 	movups	%xmm9,(%rsi)
   2717 	leaq	16(%rsi),%rsi
   2718 	cmpq	$80,%rdx
   2719 	jbe	L$cbc_dec_tail
   2720 
   2721 	movaps	%xmm11,%xmm2
   2722 L$cbc_dec_six_or_seven:
   2723 	cmpq	$96,%rdx
   2724 	ja	L$cbc_dec_seven
   2725 
   2726 	movaps	%xmm7,%xmm8
   2727 	call	_aesni_decrypt6
   2728 	pxor	%xmm10,%xmm2
   2729 	movaps	%xmm8,%xmm10
   2730 	pxor	%xmm11,%xmm3
   2731 	movdqu	%xmm2,(%rsi)
   2732 	pxor	%xmm12,%xmm4
   2733 	movdqu	%xmm3,16(%rsi)
   2734 	pxor	%xmm13,%xmm5
   2735 	movdqu	%xmm4,32(%rsi)
   2736 	pxor	%xmm14,%xmm6
   2737 	movdqu	%xmm5,48(%rsi)
   2738 	pxor	%xmm15,%xmm7
   2739 	movdqu	%xmm6,64(%rsi)
   2740 	leaq	80(%rsi),%rsi
   2741 	movdqa	%xmm7,%xmm2
   2742 	jmp	L$cbc_dec_tail_collected
   2743 
   2744 .p2align	4
   2745 L$cbc_dec_seven:
   2746 	movups	96(%rdi),%xmm8
   2747 	xorps	%xmm9,%xmm9
   2748 	call	_aesni_decrypt8
   2749 	movups	80(%rdi),%xmm9
   2750 	pxor	%xmm10,%xmm2
   2751 	movups	96(%rdi),%xmm10
   2752 	pxor	%xmm11,%xmm3
   2753 	movdqu	%xmm2,(%rsi)
   2754 	pxor	%xmm12,%xmm4
   2755 	movdqu	%xmm3,16(%rsi)
   2756 	pxor	%xmm13,%xmm5
   2757 	movdqu	%xmm4,32(%rsi)
   2758 	pxor	%xmm14,%xmm6
   2759 	movdqu	%xmm5,48(%rsi)
   2760 	pxor	%xmm15,%xmm7
   2761 	movdqu	%xmm6,64(%rsi)
   2762 	pxor	%xmm9,%xmm8
   2763 	movdqu	%xmm7,80(%rsi)
   2764 	leaq	96(%rsi),%rsi
   2765 	movdqa	%xmm8,%xmm2
   2766 	jmp	L$cbc_dec_tail_collected
   2767 
   2768 .p2align	4
   2769 L$cbc_dec_loop6:
   2770 	movups	%xmm7,(%rsi)
   2771 	leaq	16(%rsi),%rsi
   2772 	movdqu	0(%rdi),%xmm2
   2773 	movdqu	16(%rdi),%xmm3
   2774 	movdqa	%xmm2,%xmm11
   2775 	movdqu	32(%rdi),%xmm4
   2776 	movdqa	%xmm3,%xmm12
   2777 	movdqu	48(%rdi),%xmm5
   2778 	movdqa	%xmm4,%xmm13
   2779 	movdqu	64(%rdi),%xmm6
   2780 	movdqa	%xmm5,%xmm14
   2781 	movdqu	80(%rdi),%xmm7
   2782 	movdqa	%xmm6,%xmm15
   2783 L$cbc_dec_loop6_enter:
   2784 	leaq	96(%rdi),%rdi
   2785 	movdqa	%xmm7,%xmm8
   2786 
   2787 	call	_aesni_decrypt6
   2788 
   2789 	pxor	%xmm10,%xmm2
   2790 	movdqa	%xmm8,%xmm10
   2791 	pxor	%xmm11,%xmm3
   2792 	movdqu	%xmm2,(%rsi)
   2793 	pxor	%xmm12,%xmm4
   2794 	movdqu	%xmm3,16(%rsi)
   2795 	pxor	%xmm13,%xmm5
   2796 	movdqu	%xmm4,32(%rsi)
   2797 	pxor	%xmm14,%xmm6
   2798 	movq	%r11,%rcx
   2799 	movdqu	%xmm5,48(%rsi)
   2800 	pxor	%xmm15,%xmm7
   2801 	movl	%r10d,%eax
   2802 	movdqu	%xmm6,64(%rsi)
   2803 	leaq	80(%rsi),%rsi
   2804 	subq	$96,%rdx
   2805 	ja	L$cbc_dec_loop6
   2806 
   2807 	movdqa	%xmm7,%xmm2
   2808 	addq	$80,%rdx
   2809 	jle	L$cbc_dec_tail_collected
   2810 	movups	%xmm7,(%rsi)
   2811 	leaq	16(%rsi),%rsi
   2812 
   2813 L$cbc_dec_tail:
   2814 	movups	(%rdi),%xmm2
   2815 	subq	$16,%rdx
   2816 	jbe	L$cbc_dec_one
   2817 
   2818 	movups	16(%rdi),%xmm3
   2819 	movaps	%xmm2,%xmm11
   2820 	subq	$16,%rdx
   2821 	jbe	L$cbc_dec_two
   2822 
   2823 	movups	32(%rdi),%xmm4
   2824 	movaps	%xmm3,%xmm12
   2825 	subq	$16,%rdx
   2826 	jbe	L$cbc_dec_three
   2827 
   2828 	movups	48(%rdi),%xmm5
   2829 	movaps	%xmm4,%xmm13
   2830 	subq	$16,%rdx
   2831 	jbe	L$cbc_dec_four
   2832 
   2833 	movups	64(%rdi),%xmm6
   2834 	movaps	%xmm5,%xmm14
   2835 	movaps	%xmm6,%xmm15
   2836 	xorps	%xmm7,%xmm7
   2837 	call	_aesni_decrypt6
   2838 	pxor	%xmm10,%xmm2
   2839 	movaps	%xmm15,%xmm10
   2840 	pxor	%xmm11,%xmm3
   2841 	movdqu	%xmm2,(%rsi)
   2842 	pxor	%xmm12,%xmm4
   2843 	movdqu	%xmm3,16(%rsi)
   2844 	pxor	%xmm13,%xmm5
   2845 	movdqu	%xmm4,32(%rsi)
   2846 	pxor	%xmm14,%xmm6
   2847 	movdqu	%xmm5,48(%rsi)
   2848 	leaq	64(%rsi),%rsi
   2849 	movdqa	%xmm6,%xmm2
   2850 	subq	$16,%rdx
   2851 	jmp	L$cbc_dec_tail_collected
   2852 
   2853 .p2align	4
   2854 L$cbc_dec_one:
   2855 	movaps	%xmm2,%xmm11
   2856 	movups	(%rcx),%xmm0
   2857 	movups	16(%rcx),%xmm1
   2858 	leaq	32(%rcx),%rcx
   2859 	xorps	%xmm0,%xmm2
   2860 L$oop_dec1_16:
   2861 .byte	102,15,56,222,209
   2862 	decl	%eax
   2863 	movups	(%rcx),%xmm1
   2864 	leaq	16(%rcx),%rcx
   2865 	jnz	L$oop_dec1_16
   2866 .byte	102,15,56,223,209
   2867 	xorps	%xmm10,%xmm2
   2868 	movaps	%xmm11,%xmm10
   2869 	jmp	L$cbc_dec_tail_collected
   2870 .p2align	4
   2871 L$cbc_dec_two:
   2872 	movaps	%xmm3,%xmm12
   2873 	call	_aesni_decrypt2
   2874 	pxor	%xmm10,%xmm2
   2875 	movaps	%xmm12,%xmm10
   2876 	pxor	%xmm11,%xmm3
   2877 	movdqu	%xmm2,(%rsi)
   2878 	movdqa	%xmm3,%xmm2
   2879 	leaq	16(%rsi),%rsi
   2880 	jmp	L$cbc_dec_tail_collected
   2881 .p2align	4
   2882 L$cbc_dec_three:
   2883 	movaps	%xmm4,%xmm13
   2884 	call	_aesni_decrypt3
   2885 	pxor	%xmm10,%xmm2
   2886 	movaps	%xmm13,%xmm10
   2887 	pxor	%xmm11,%xmm3
   2888 	movdqu	%xmm2,(%rsi)
   2889 	pxor	%xmm12,%xmm4
   2890 	movdqu	%xmm3,16(%rsi)
   2891 	movdqa	%xmm4,%xmm2
   2892 	leaq	32(%rsi),%rsi
   2893 	jmp	L$cbc_dec_tail_collected
   2894 .p2align	4
   2895 L$cbc_dec_four:
   2896 	movaps	%xmm5,%xmm14
   2897 	call	_aesni_decrypt4
   2898 	pxor	%xmm10,%xmm2
   2899 	movaps	%xmm14,%xmm10
   2900 	pxor	%xmm11,%xmm3
   2901 	movdqu	%xmm2,(%rsi)
   2902 	pxor	%xmm12,%xmm4
   2903 	movdqu	%xmm3,16(%rsi)
   2904 	pxor	%xmm13,%xmm5
   2905 	movdqu	%xmm4,32(%rsi)
   2906 	movdqa	%xmm5,%xmm2
   2907 	leaq	48(%rsi),%rsi
   2908 	jmp	L$cbc_dec_tail_collected
   2909 
   2910 .p2align	4
   2911 L$cbc_dec_tail_collected:
   2912 	movups	%xmm10,(%r8)
   2913 	andq	$15,%rdx
   2914 	jnz	L$cbc_dec_tail_partial
   2915 	movups	%xmm2,(%rsi)
   2916 	jmp	L$cbc_dec_ret
   2917 .p2align	4
   2918 L$cbc_dec_tail_partial:
   2919 	movaps	%xmm2,(%rsp)
   2920 	movq	$16,%rcx
   2921 	movq	%rsi,%rdi
   2922 	subq	%rdx,%rcx
   2923 	leaq	(%rsp),%rsi
   2924 .long	0x9066A4F3
   2925 
   2926 L$cbc_dec_ret:
   2927 	leaq	(%rbp),%rsp
   2928 	popq	%rbp
   2929 L$cbc_ret:
   2930 	.byte	0xf3,0xc3
   2931 
   2932 .globl	_aesni_set_decrypt_key
   2933 .private_extern _aesni_set_decrypt_key
   2934 
   2935 .p2align	4
   2936 _aesni_set_decrypt_key:
   2937 .byte	0x48,0x83,0xEC,0x08
   2938 	call	__aesni_set_encrypt_key
   2939 	shll	$4,%esi
   2940 	testl	%eax,%eax
   2941 	jnz	L$dec_key_ret
   2942 	leaq	16(%rdx,%rsi,1),%rdi
   2943 
   2944 	movups	(%rdx),%xmm0
   2945 	movups	(%rdi),%xmm1
   2946 	movups	%xmm0,(%rdi)
   2947 	movups	%xmm1,(%rdx)
   2948 	leaq	16(%rdx),%rdx
   2949 	leaq	-16(%rdi),%rdi
   2950 
   2951 L$dec_key_inverse:
   2952 	movups	(%rdx),%xmm0
   2953 	movups	(%rdi),%xmm1
   2954 .byte	102,15,56,219,192
   2955 .byte	102,15,56,219,201
   2956 	leaq	16(%rdx),%rdx
   2957 	leaq	-16(%rdi),%rdi
   2958 	movups	%xmm0,16(%rdi)
   2959 	movups	%xmm1,-16(%rdx)
   2960 	cmpq	%rdx,%rdi
   2961 	ja	L$dec_key_inverse
   2962 
   2963 	movups	(%rdx),%xmm0
   2964 .byte	102,15,56,219,192
   2965 	movups	%xmm0,(%rdi)
   2966 L$dec_key_ret:
   2967 	addq	$8,%rsp
   2968 	.byte	0xf3,0xc3
   2969 L$SEH_end_set_decrypt_key:
   2970 
   2971 .globl	_aesni_set_encrypt_key
   2972 .private_extern _aesni_set_encrypt_key
   2973 
   2974 .p2align	4
   2975 _aesni_set_encrypt_key:
   2976 __aesni_set_encrypt_key:
   2977 .byte	0x48,0x83,0xEC,0x08
   2978 	movq	$-1,%rax
   2979 	testq	%rdi,%rdi
   2980 	jz	L$enc_key_ret
   2981 	testq	%rdx,%rdx
   2982 	jz	L$enc_key_ret
   2983 
   2984 	movups	(%rdi),%xmm0
   2985 	xorps	%xmm4,%xmm4
   2986 	leaq	16(%rdx),%rax
   2987 	cmpl	$256,%esi
   2988 	je	L$14rounds
   2989 	cmpl	$192,%esi
   2990 	je	L$12rounds
   2991 	cmpl	$128,%esi
   2992 	jne	L$bad_keybits
   2993 
   2994 L$10rounds:
   2995 	movl	$9,%esi
   2996 	movups	%xmm0,(%rdx)
   2997 .byte	102,15,58,223,200,1
   2998 	call	L$key_expansion_128_cold
   2999 .byte	102,15,58,223,200,2
   3000 	call	L$key_expansion_128
   3001 .byte	102,15,58,223,200,4
   3002 	call	L$key_expansion_128
   3003 .byte	102,15,58,223,200,8
   3004 	call	L$key_expansion_128
   3005 .byte	102,15,58,223,200,16
   3006 	call	L$key_expansion_128
   3007 .byte	102,15,58,223,200,32
   3008 	call	L$key_expansion_128
   3009 .byte	102,15,58,223,200,64
   3010 	call	L$key_expansion_128
   3011 .byte	102,15,58,223,200,128
   3012 	call	L$key_expansion_128
   3013 .byte	102,15,58,223,200,27
   3014 	call	L$key_expansion_128
   3015 .byte	102,15,58,223,200,54
   3016 	call	L$key_expansion_128
   3017 	movups	%xmm0,(%rax)
   3018 	movl	%esi,80(%rax)
   3019 	xorl	%eax,%eax
   3020 	jmp	L$enc_key_ret
   3021 
   3022 .p2align	4
   3023 L$12rounds:
   3024 	movq	16(%rdi),%xmm2
   3025 	movl	$11,%esi
   3026 	movups	%xmm0,(%rdx)
   3027 .byte	102,15,58,223,202,1
   3028 	call	L$key_expansion_192a_cold
   3029 .byte	102,15,58,223,202,2
   3030 	call	L$key_expansion_192b
   3031 .byte	102,15,58,223,202,4
   3032 	call	L$key_expansion_192a
   3033 .byte	102,15,58,223,202,8
   3034 	call	L$key_expansion_192b
   3035 .byte	102,15,58,223,202,16
   3036 	call	L$key_expansion_192a
   3037 .byte	102,15,58,223,202,32
   3038 	call	L$key_expansion_192b
   3039 .byte	102,15,58,223,202,64
   3040 	call	L$key_expansion_192a
   3041 .byte	102,15,58,223,202,128
   3042 	call	L$key_expansion_192b
   3043 	movups	%xmm0,(%rax)
   3044 	movl	%esi,48(%rax)
   3045 	xorq	%rax,%rax
   3046 	jmp	L$enc_key_ret
   3047 
   3048 .p2align	4
   3049 L$14rounds:
   3050 	movups	16(%rdi),%xmm2
   3051 	movl	$13,%esi
   3052 	leaq	16(%rax),%rax
   3053 	movups	%xmm0,(%rdx)
   3054 	movups	%xmm2,16(%rdx)
   3055 .byte	102,15,58,223,202,1
   3056 	call	L$key_expansion_256a_cold
   3057 .byte	102,15,58,223,200,1
   3058 	call	L$key_expansion_256b
   3059 .byte	102,15,58,223,202,2
   3060 	call	L$key_expansion_256a
   3061 .byte	102,15,58,223,200,2
   3062 	call	L$key_expansion_256b
   3063 .byte	102,15,58,223,202,4
   3064 	call	L$key_expansion_256a
   3065 .byte	102,15,58,223,200,4
   3066 	call	L$key_expansion_256b
   3067 .byte	102,15,58,223,202,8
   3068 	call	L$key_expansion_256a
   3069 .byte	102,15,58,223,200,8
   3070 	call	L$key_expansion_256b
   3071 .byte	102,15,58,223,202,16
   3072 	call	L$key_expansion_256a
   3073 .byte	102,15,58,223,200,16
   3074 	call	L$key_expansion_256b
   3075 .byte	102,15,58,223,202,32
   3076 	call	L$key_expansion_256a
   3077 .byte	102,15,58,223,200,32
   3078 	call	L$key_expansion_256b
   3079 .byte	102,15,58,223,202,64
   3080 	call	L$key_expansion_256a
   3081 	movups	%xmm0,(%rax)
   3082 	movl	%esi,16(%rax)
   3083 	xorq	%rax,%rax
   3084 	jmp	L$enc_key_ret
   3085 
   3086 .p2align	4
   3087 L$bad_keybits:
   3088 	movq	$-2,%rax
   3089 L$enc_key_ret:
   3090 	addq	$8,%rsp
   3091 	.byte	0xf3,0xc3
   3092 L$SEH_end_set_encrypt_key:
   3093 
   3094 .p2align	4
   3095 L$key_expansion_128:
   3096 	movups	%xmm0,(%rax)
   3097 	leaq	16(%rax),%rax
   3098 L$key_expansion_128_cold:
   3099 	shufps	$16,%xmm0,%xmm4
   3100 	xorps	%xmm4,%xmm0
   3101 	shufps	$140,%xmm0,%xmm4
   3102 	xorps	%xmm4,%xmm0
   3103 	shufps	$255,%xmm1,%xmm1
   3104 	xorps	%xmm1,%xmm0
   3105 	.byte	0xf3,0xc3
   3106 
   3107 .p2align	4
   3108 L$key_expansion_192a:
   3109 	movups	%xmm0,(%rax)
   3110 	leaq	16(%rax),%rax
   3111 L$key_expansion_192a_cold:
   3112 	movaps	%xmm2,%xmm5
   3113 L$key_expansion_192b_warm:
   3114 	shufps	$16,%xmm0,%xmm4
   3115 	movdqa	%xmm2,%xmm3
   3116 	xorps	%xmm4,%xmm0
   3117 	shufps	$140,%xmm0,%xmm4
   3118 	pslldq	$4,%xmm3
   3119 	xorps	%xmm4,%xmm0
   3120 	pshufd	$85,%xmm1,%xmm1
   3121 	pxor	%xmm3,%xmm2
   3122 	pxor	%xmm1,%xmm0
   3123 	pshufd	$255,%xmm0,%xmm3
   3124 	pxor	%xmm3,%xmm2
   3125 	.byte	0xf3,0xc3
   3126 
   3127 .p2align	4
   3128 L$key_expansion_192b:
   3129 	movaps	%xmm0,%xmm3
   3130 	shufps	$68,%xmm0,%xmm5
   3131 	movups	%xmm5,(%rax)
   3132 	shufps	$78,%xmm2,%xmm3
   3133 	movups	%xmm3,16(%rax)
   3134 	leaq	32(%rax),%rax
   3135 	jmp	L$key_expansion_192b_warm
   3136 
   3137 .p2align	4
   3138 L$key_expansion_256a:
   3139 	movups	%xmm2,(%rax)
   3140 	leaq	16(%rax),%rax
   3141 L$key_expansion_256a_cold:
   3142 	shufps	$16,%xmm0,%xmm4
   3143 	xorps	%xmm4,%xmm0
   3144 	shufps	$140,%xmm0,%xmm4
   3145 	xorps	%xmm4,%xmm0
   3146 	shufps	$255,%xmm1,%xmm1
   3147 	xorps	%xmm1,%xmm0
   3148 	.byte	0xf3,0xc3
   3149 
   3150 .p2align	4
   3151 L$key_expansion_256b:
   3152 	movups	%xmm0,(%rax)
   3153 	leaq	16(%rax),%rax
   3154 
   3155 	shufps	$16,%xmm2,%xmm4
   3156 	xorps	%xmm4,%xmm2
   3157 	shufps	$140,%xmm2,%xmm4
   3158 	xorps	%xmm4,%xmm2
   3159 	shufps	$170,%xmm1,%xmm1
   3160 	xorps	%xmm1,%xmm2
   3161 	.byte	0xf3,0xc3
   3162 
   3163 
   3164 .p2align	6
   3165 L$bswap_mask:
   3166 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   3167 L$increment32:
   3168 .long	6,6,6,0
   3169 L$increment64:
   3170 .long	1,0,0,0
   3171 L$xts_magic:
   3172 .long	0x87,0,1,0
   3173 L$increment1:
   3174 .byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
   3175 
   3176 .byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   3177 .p2align	6
   3178 #endif
   3179