Home | History | Annotate | Download | only in aes
      1 #if defined(__x86_64__)
      2 .text
      3 
      4 .globl	_aesni_encrypt
      5 .private_extern _aesni_encrypt
      6 
      7 .p2align	4
      8 _aesni_encrypt:
      9 	movups	(%rdi),%xmm2
     10 	movl	240(%rdx),%eax
     11 	movups	(%rdx),%xmm0
     12 	movups	16(%rdx),%xmm1
     13 	leaq	32(%rdx),%rdx
     14 	xorps	%xmm0,%xmm2
     15 L$oop_enc1_1:
     16 .byte	102,15,56,220,209
     17 	decl	%eax
     18 	movups	(%rdx),%xmm1
     19 	leaq	16(%rdx),%rdx
     20 	jnz	L$oop_enc1_1
     21 .byte	102,15,56,221,209
     22 	pxor	%xmm0,%xmm0
     23 	pxor	%xmm1,%xmm1
     24 	movups	%xmm2,(%rsi)
     25 	pxor	%xmm2,%xmm2
     26 	.byte	0xf3,0xc3
     27 
     28 
     29 .globl	_aesni_decrypt
     30 .private_extern _aesni_decrypt
     31 
     32 .p2align	4
     33 _aesni_decrypt:
     34 	movups	(%rdi),%xmm2
     35 	movl	240(%rdx),%eax
     36 	movups	(%rdx),%xmm0
     37 	movups	16(%rdx),%xmm1
     38 	leaq	32(%rdx),%rdx
     39 	xorps	%xmm0,%xmm2
     40 L$oop_dec1_2:
     41 .byte	102,15,56,222,209
     42 	decl	%eax
     43 	movups	(%rdx),%xmm1
     44 	leaq	16(%rdx),%rdx
     45 	jnz	L$oop_dec1_2
     46 .byte	102,15,56,223,209
     47 	pxor	%xmm0,%xmm0
     48 	pxor	%xmm1,%xmm1
     49 	movups	%xmm2,(%rsi)
     50 	pxor	%xmm2,%xmm2
     51 	.byte	0xf3,0xc3
     52 
     53 
     54 .p2align	4
     55 _aesni_encrypt2:
     56 	movups	(%rcx),%xmm0
     57 	shll	$4,%eax
     58 	movups	16(%rcx),%xmm1
     59 	xorps	%xmm0,%xmm2
     60 	xorps	%xmm0,%xmm3
     61 	movups	32(%rcx),%xmm0
     62 	leaq	32(%rcx,%rax,1),%rcx
     63 	negq	%rax
     64 	addq	$16,%rax
     65 
     66 L$enc_loop2:
     67 .byte	102,15,56,220,209
     68 .byte	102,15,56,220,217
     69 	movups	(%rcx,%rax,1),%xmm1
     70 	addq	$32,%rax
     71 .byte	102,15,56,220,208
     72 .byte	102,15,56,220,216
     73 	movups	-16(%rcx,%rax,1),%xmm0
     74 	jnz	L$enc_loop2
     75 
     76 .byte	102,15,56,220,209
     77 .byte	102,15,56,220,217
     78 .byte	102,15,56,221,208
     79 .byte	102,15,56,221,216
     80 	.byte	0xf3,0xc3
     81 
     82 
     83 .p2align	4
     84 _aesni_decrypt2:
     85 	movups	(%rcx),%xmm0
     86 	shll	$4,%eax
     87 	movups	16(%rcx),%xmm1
     88 	xorps	%xmm0,%xmm2
     89 	xorps	%xmm0,%xmm3
     90 	movups	32(%rcx),%xmm0
     91 	leaq	32(%rcx,%rax,1),%rcx
     92 	negq	%rax
     93 	addq	$16,%rax
     94 
     95 L$dec_loop2:
     96 .byte	102,15,56,222,209
     97 .byte	102,15,56,222,217
     98 	movups	(%rcx,%rax,1),%xmm1
     99 	addq	$32,%rax
    100 .byte	102,15,56,222,208
    101 .byte	102,15,56,222,216
    102 	movups	-16(%rcx,%rax,1),%xmm0
    103 	jnz	L$dec_loop2
    104 
    105 .byte	102,15,56,222,209
    106 .byte	102,15,56,222,217
    107 .byte	102,15,56,223,208
    108 .byte	102,15,56,223,216
    109 	.byte	0xf3,0xc3
    110 
    111 
    112 .p2align	4
    113 _aesni_encrypt3:
    114 	movups	(%rcx),%xmm0
    115 	shll	$4,%eax
    116 	movups	16(%rcx),%xmm1
    117 	xorps	%xmm0,%xmm2
    118 	xorps	%xmm0,%xmm3
    119 	xorps	%xmm0,%xmm4
    120 	movups	32(%rcx),%xmm0
    121 	leaq	32(%rcx,%rax,1),%rcx
    122 	negq	%rax
    123 	addq	$16,%rax
    124 
    125 L$enc_loop3:
    126 .byte	102,15,56,220,209
    127 .byte	102,15,56,220,217
    128 .byte	102,15,56,220,225
    129 	movups	(%rcx,%rax,1),%xmm1
    130 	addq	$32,%rax
    131 .byte	102,15,56,220,208
    132 .byte	102,15,56,220,216
    133 .byte	102,15,56,220,224
    134 	movups	-16(%rcx,%rax,1),%xmm0
    135 	jnz	L$enc_loop3
    136 
    137 .byte	102,15,56,220,209
    138 .byte	102,15,56,220,217
    139 .byte	102,15,56,220,225
    140 .byte	102,15,56,221,208
    141 .byte	102,15,56,221,216
    142 .byte	102,15,56,221,224
    143 	.byte	0xf3,0xc3
    144 
    145 
    146 .p2align	4
    147 _aesni_decrypt3:
    148 	movups	(%rcx),%xmm0
    149 	shll	$4,%eax
    150 	movups	16(%rcx),%xmm1
    151 	xorps	%xmm0,%xmm2
    152 	xorps	%xmm0,%xmm3
    153 	xorps	%xmm0,%xmm4
    154 	movups	32(%rcx),%xmm0
    155 	leaq	32(%rcx,%rax,1),%rcx
    156 	negq	%rax
    157 	addq	$16,%rax
    158 
    159 L$dec_loop3:
    160 .byte	102,15,56,222,209
    161 .byte	102,15,56,222,217
    162 .byte	102,15,56,222,225
    163 	movups	(%rcx,%rax,1),%xmm1
    164 	addq	$32,%rax
    165 .byte	102,15,56,222,208
    166 .byte	102,15,56,222,216
    167 .byte	102,15,56,222,224
    168 	movups	-16(%rcx,%rax,1),%xmm0
    169 	jnz	L$dec_loop3
    170 
    171 .byte	102,15,56,222,209
    172 .byte	102,15,56,222,217
    173 .byte	102,15,56,222,225
    174 .byte	102,15,56,223,208
    175 .byte	102,15,56,223,216
    176 .byte	102,15,56,223,224
    177 	.byte	0xf3,0xc3
    178 
    179 
    180 .p2align	4
    181 _aesni_encrypt4:
    182 	movups	(%rcx),%xmm0
    183 	shll	$4,%eax
    184 	movups	16(%rcx),%xmm1
    185 	xorps	%xmm0,%xmm2
    186 	xorps	%xmm0,%xmm3
    187 	xorps	%xmm0,%xmm4
    188 	xorps	%xmm0,%xmm5
    189 	movups	32(%rcx),%xmm0
    190 	leaq	32(%rcx,%rax,1),%rcx
    191 	negq	%rax
    192 .byte	0x0f,0x1f,0x00
    193 	addq	$16,%rax
    194 
    195 L$enc_loop4:
    196 .byte	102,15,56,220,209
    197 .byte	102,15,56,220,217
    198 .byte	102,15,56,220,225
    199 .byte	102,15,56,220,233
    200 	movups	(%rcx,%rax,1),%xmm1
    201 	addq	$32,%rax
    202 .byte	102,15,56,220,208
    203 .byte	102,15,56,220,216
    204 .byte	102,15,56,220,224
    205 .byte	102,15,56,220,232
    206 	movups	-16(%rcx,%rax,1),%xmm0
    207 	jnz	L$enc_loop4
    208 
    209 .byte	102,15,56,220,209
    210 .byte	102,15,56,220,217
    211 .byte	102,15,56,220,225
    212 .byte	102,15,56,220,233
    213 .byte	102,15,56,221,208
    214 .byte	102,15,56,221,216
    215 .byte	102,15,56,221,224
    216 .byte	102,15,56,221,232
    217 	.byte	0xf3,0xc3
    218 
    219 
    220 .p2align	4
    221 _aesni_decrypt4:
    222 	movups	(%rcx),%xmm0
    223 	shll	$4,%eax
    224 	movups	16(%rcx),%xmm1
    225 	xorps	%xmm0,%xmm2
    226 	xorps	%xmm0,%xmm3
    227 	xorps	%xmm0,%xmm4
    228 	xorps	%xmm0,%xmm5
    229 	movups	32(%rcx),%xmm0
    230 	leaq	32(%rcx,%rax,1),%rcx
    231 	negq	%rax
    232 .byte	0x0f,0x1f,0x00
    233 	addq	$16,%rax
    234 
    235 L$dec_loop4:
    236 .byte	102,15,56,222,209
    237 .byte	102,15,56,222,217
    238 .byte	102,15,56,222,225
    239 .byte	102,15,56,222,233
    240 	movups	(%rcx,%rax,1),%xmm1
    241 	addq	$32,%rax
    242 .byte	102,15,56,222,208
    243 .byte	102,15,56,222,216
    244 .byte	102,15,56,222,224
    245 .byte	102,15,56,222,232
    246 	movups	-16(%rcx,%rax,1),%xmm0
    247 	jnz	L$dec_loop4
    248 
    249 .byte	102,15,56,222,209
    250 .byte	102,15,56,222,217
    251 .byte	102,15,56,222,225
    252 .byte	102,15,56,222,233
    253 .byte	102,15,56,223,208
    254 .byte	102,15,56,223,216
    255 .byte	102,15,56,223,224
    256 .byte	102,15,56,223,232
    257 	.byte	0xf3,0xc3
    258 
    259 
    260 .p2align	4
    261 _aesni_encrypt6:
    262 	movups	(%rcx),%xmm0
    263 	shll	$4,%eax
    264 	movups	16(%rcx),%xmm1
    265 	xorps	%xmm0,%xmm2
    266 	pxor	%xmm0,%xmm3
    267 	pxor	%xmm0,%xmm4
    268 .byte	102,15,56,220,209
    269 	leaq	32(%rcx,%rax,1),%rcx
    270 	negq	%rax
    271 .byte	102,15,56,220,217
    272 	pxor	%xmm0,%xmm5
    273 	pxor	%xmm0,%xmm6
    274 .byte	102,15,56,220,225
    275 	pxor	%xmm0,%xmm7
    276 	movups	(%rcx,%rax,1),%xmm0
    277 	addq	$16,%rax
    278 	jmp	L$enc_loop6_enter
    279 .p2align	4
    280 L$enc_loop6:
    281 .byte	102,15,56,220,209
    282 .byte	102,15,56,220,217
    283 .byte	102,15,56,220,225
    284 L$enc_loop6_enter:
    285 .byte	102,15,56,220,233
    286 .byte	102,15,56,220,241
    287 .byte	102,15,56,220,249
    288 	movups	(%rcx,%rax,1),%xmm1
    289 	addq	$32,%rax
    290 .byte	102,15,56,220,208
    291 .byte	102,15,56,220,216
    292 .byte	102,15,56,220,224
    293 .byte	102,15,56,220,232
    294 .byte	102,15,56,220,240
    295 .byte	102,15,56,220,248
    296 	movups	-16(%rcx,%rax,1),%xmm0
    297 	jnz	L$enc_loop6
    298 
    299 .byte	102,15,56,220,209
    300 .byte	102,15,56,220,217
    301 .byte	102,15,56,220,225
    302 .byte	102,15,56,220,233
    303 .byte	102,15,56,220,241
    304 .byte	102,15,56,220,249
    305 .byte	102,15,56,221,208
    306 .byte	102,15,56,221,216
    307 .byte	102,15,56,221,224
    308 .byte	102,15,56,221,232
    309 .byte	102,15,56,221,240
    310 .byte	102,15,56,221,248
    311 	.byte	0xf3,0xc3
    312 
    313 
    314 .p2align	4
    315 _aesni_decrypt6:
    316 	movups	(%rcx),%xmm0
    317 	shll	$4,%eax
    318 	movups	16(%rcx),%xmm1
    319 	xorps	%xmm0,%xmm2
    320 	pxor	%xmm0,%xmm3
    321 	pxor	%xmm0,%xmm4
    322 .byte	102,15,56,222,209
    323 	leaq	32(%rcx,%rax,1),%rcx
    324 	negq	%rax
    325 .byte	102,15,56,222,217
    326 	pxor	%xmm0,%xmm5
    327 	pxor	%xmm0,%xmm6
    328 .byte	102,15,56,222,225
    329 	pxor	%xmm0,%xmm7
    330 	movups	(%rcx,%rax,1),%xmm0
    331 	addq	$16,%rax
    332 	jmp	L$dec_loop6_enter
    333 .p2align	4
    334 L$dec_loop6:
    335 .byte	102,15,56,222,209
    336 .byte	102,15,56,222,217
    337 .byte	102,15,56,222,225
    338 L$dec_loop6_enter:
    339 .byte	102,15,56,222,233
    340 .byte	102,15,56,222,241
    341 .byte	102,15,56,222,249
    342 	movups	(%rcx,%rax,1),%xmm1
    343 	addq	$32,%rax
    344 .byte	102,15,56,222,208
    345 .byte	102,15,56,222,216
    346 .byte	102,15,56,222,224
    347 .byte	102,15,56,222,232
    348 .byte	102,15,56,222,240
    349 .byte	102,15,56,222,248
    350 	movups	-16(%rcx,%rax,1),%xmm0
    351 	jnz	L$dec_loop6
    352 
    353 .byte	102,15,56,222,209
    354 .byte	102,15,56,222,217
    355 .byte	102,15,56,222,225
    356 .byte	102,15,56,222,233
    357 .byte	102,15,56,222,241
    358 .byte	102,15,56,222,249
    359 .byte	102,15,56,223,208
    360 .byte	102,15,56,223,216
    361 .byte	102,15,56,223,224
    362 .byte	102,15,56,223,232
    363 .byte	102,15,56,223,240
    364 .byte	102,15,56,223,248
    365 	.byte	0xf3,0xc3
    366 
    367 
    368 .p2align	4
    369 _aesni_encrypt8:
    370 	movups	(%rcx),%xmm0
    371 	shll	$4,%eax
    372 	movups	16(%rcx),%xmm1
    373 	xorps	%xmm0,%xmm2
    374 	xorps	%xmm0,%xmm3
    375 	pxor	%xmm0,%xmm4
    376 	pxor	%xmm0,%xmm5
    377 	pxor	%xmm0,%xmm6
    378 	leaq	32(%rcx,%rax,1),%rcx
    379 	negq	%rax
    380 .byte	102,15,56,220,209
    381 	pxor	%xmm0,%xmm7
    382 	pxor	%xmm0,%xmm8
    383 .byte	102,15,56,220,217
    384 	pxor	%xmm0,%xmm9
    385 	movups	(%rcx,%rax,1),%xmm0
    386 	addq	$16,%rax
    387 	jmp	L$enc_loop8_inner
    388 .p2align	4
    389 L$enc_loop8:
    390 .byte	102,15,56,220,209
    391 .byte	102,15,56,220,217
    392 L$enc_loop8_inner:
    393 .byte	102,15,56,220,225
    394 .byte	102,15,56,220,233
    395 .byte	102,15,56,220,241
    396 .byte	102,15,56,220,249
    397 .byte	102,68,15,56,220,193
    398 .byte	102,68,15,56,220,201
    399 L$enc_loop8_enter:
    400 	movups	(%rcx,%rax,1),%xmm1
    401 	addq	$32,%rax
    402 .byte	102,15,56,220,208
    403 .byte	102,15,56,220,216
    404 .byte	102,15,56,220,224
    405 .byte	102,15,56,220,232
    406 .byte	102,15,56,220,240
    407 .byte	102,15,56,220,248
    408 .byte	102,68,15,56,220,192
    409 .byte	102,68,15,56,220,200
    410 	movups	-16(%rcx,%rax,1),%xmm0
    411 	jnz	L$enc_loop8
    412 
    413 .byte	102,15,56,220,209
    414 .byte	102,15,56,220,217
    415 .byte	102,15,56,220,225
    416 .byte	102,15,56,220,233
    417 .byte	102,15,56,220,241
    418 .byte	102,15,56,220,249
    419 .byte	102,68,15,56,220,193
    420 .byte	102,68,15,56,220,201
    421 .byte	102,15,56,221,208
    422 .byte	102,15,56,221,216
    423 .byte	102,15,56,221,224
    424 .byte	102,15,56,221,232
    425 .byte	102,15,56,221,240
    426 .byte	102,15,56,221,248
    427 .byte	102,68,15,56,221,192
    428 .byte	102,68,15,56,221,200
    429 	.byte	0xf3,0xc3
    430 
    431 
    432 .p2align	4
    433 _aesni_decrypt8:
    434 	movups	(%rcx),%xmm0
    435 	shll	$4,%eax
    436 	movups	16(%rcx),%xmm1
    437 	xorps	%xmm0,%xmm2
    438 	xorps	%xmm0,%xmm3
    439 	pxor	%xmm0,%xmm4
    440 	pxor	%xmm0,%xmm5
    441 	pxor	%xmm0,%xmm6
    442 	leaq	32(%rcx,%rax,1),%rcx
    443 	negq	%rax
    444 .byte	102,15,56,222,209
    445 	pxor	%xmm0,%xmm7
    446 	pxor	%xmm0,%xmm8
    447 .byte	102,15,56,222,217
    448 	pxor	%xmm0,%xmm9
    449 	movups	(%rcx,%rax,1),%xmm0
    450 	addq	$16,%rax
    451 	jmp	L$dec_loop8_inner
    452 .p2align	4
    453 L$dec_loop8:
    454 .byte	102,15,56,222,209
    455 .byte	102,15,56,222,217
    456 L$dec_loop8_inner:
    457 .byte	102,15,56,222,225
    458 .byte	102,15,56,222,233
    459 .byte	102,15,56,222,241
    460 .byte	102,15,56,222,249
    461 .byte	102,68,15,56,222,193
    462 .byte	102,68,15,56,222,201
    463 L$dec_loop8_enter:
    464 	movups	(%rcx,%rax,1),%xmm1
    465 	addq	$32,%rax
    466 .byte	102,15,56,222,208
    467 .byte	102,15,56,222,216
    468 .byte	102,15,56,222,224
    469 .byte	102,15,56,222,232
    470 .byte	102,15,56,222,240
    471 .byte	102,15,56,222,248
    472 .byte	102,68,15,56,222,192
    473 .byte	102,68,15,56,222,200
    474 	movups	-16(%rcx,%rax,1),%xmm0
    475 	jnz	L$dec_loop8
    476 
    477 .byte	102,15,56,222,209
    478 .byte	102,15,56,222,217
    479 .byte	102,15,56,222,225
    480 .byte	102,15,56,222,233
    481 .byte	102,15,56,222,241
    482 .byte	102,15,56,222,249
    483 .byte	102,68,15,56,222,193
    484 .byte	102,68,15,56,222,201
    485 .byte	102,15,56,223,208
    486 .byte	102,15,56,223,216
    487 .byte	102,15,56,223,224
    488 .byte	102,15,56,223,232
    489 .byte	102,15,56,223,240
    490 .byte	102,15,56,223,248
    491 .byte	102,68,15,56,223,192
    492 .byte	102,68,15,56,223,200
    493 	.byte	0xf3,0xc3
    494 
    495 .globl	_aesni_ecb_encrypt
    496 .private_extern _aesni_ecb_encrypt
    497 
    498 .p2align	4
    499 _aesni_ecb_encrypt:
    500 	andq	$-16,%rdx
    501 	jz	L$ecb_ret
    502 
    503 	movl	240(%rcx),%eax
    504 	movups	(%rcx),%xmm0
    505 	movq	%rcx,%r11
    506 	movl	%eax,%r10d
    507 	testl	%r8d,%r8d
    508 	jz	L$ecb_decrypt
    509 
    510 	cmpq	$128,%rdx
    511 	jb	L$ecb_enc_tail
    512 
    513 	movdqu	(%rdi),%xmm2
    514 	movdqu	16(%rdi),%xmm3
    515 	movdqu	32(%rdi),%xmm4
    516 	movdqu	48(%rdi),%xmm5
    517 	movdqu	64(%rdi),%xmm6
    518 	movdqu	80(%rdi),%xmm7
    519 	movdqu	96(%rdi),%xmm8
    520 	movdqu	112(%rdi),%xmm9
    521 	leaq	128(%rdi),%rdi
    522 	subq	$128,%rdx
    523 	jmp	L$ecb_enc_loop8_enter
    524 .p2align	4
    525 L$ecb_enc_loop8:
    526 	movups	%xmm2,(%rsi)
    527 	movq	%r11,%rcx
    528 	movdqu	(%rdi),%xmm2
    529 	movl	%r10d,%eax
    530 	movups	%xmm3,16(%rsi)
    531 	movdqu	16(%rdi),%xmm3
    532 	movups	%xmm4,32(%rsi)
    533 	movdqu	32(%rdi),%xmm4
    534 	movups	%xmm5,48(%rsi)
    535 	movdqu	48(%rdi),%xmm5
    536 	movups	%xmm6,64(%rsi)
    537 	movdqu	64(%rdi),%xmm6
    538 	movups	%xmm7,80(%rsi)
    539 	movdqu	80(%rdi),%xmm7
    540 	movups	%xmm8,96(%rsi)
    541 	movdqu	96(%rdi),%xmm8
    542 	movups	%xmm9,112(%rsi)
    543 	leaq	128(%rsi),%rsi
    544 	movdqu	112(%rdi),%xmm9
    545 	leaq	128(%rdi),%rdi
    546 L$ecb_enc_loop8_enter:
    547 
    548 	call	_aesni_encrypt8
    549 
    550 	subq	$128,%rdx
    551 	jnc	L$ecb_enc_loop8
    552 
    553 	movups	%xmm2,(%rsi)
    554 	movq	%r11,%rcx
    555 	movups	%xmm3,16(%rsi)
    556 	movl	%r10d,%eax
    557 	movups	%xmm4,32(%rsi)
    558 	movups	%xmm5,48(%rsi)
    559 	movups	%xmm6,64(%rsi)
    560 	movups	%xmm7,80(%rsi)
    561 	movups	%xmm8,96(%rsi)
    562 	movups	%xmm9,112(%rsi)
    563 	leaq	128(%rsi),%rsi
    564 	addq	$128,%rdx
    565 	jz	L$ecb_ret
    566 
    567 L$ecb_enc_tail:
    568 	movups	(%rdi),%xmm2
    569 	cmpq	$32,%rdx
    570 	jb	L$ecb_enc_one
    571 	movups	16(%rdi),%xmm3
    572 	je	L$ecb_enc_two
    573 	movups	32(%rdi),%xmm4
    574 	cmpq	$64,%rdx
    575 	jb	L$ecb_enc_three
    576 	movups	48(%rdi),%xmm5
    577 	je	L$ecb_enc_four
    578 	movups	64(%rdi),%xmm6
    579 	cmpq	$96,%rdx
    580 	jb	L$ecb_enc_five
    581 	movups	80(%rdi),%xmm7
    582 	je	L$ecb_enc_six
    583 	movdqu	96(%rdi),%xmm8
    584 	xorps	%xmm9,%xmm9
    585 	call	_aesni_encrypt8
    586 	movups	%xmm2,(%rsi)
    587 	movups	%xmm3,16(%rsi)
    588 	movups	%xmm4,32(%rsi)
    589 	movups	%xmm5,48(%rsi)
    590 	movups	%xmm6,64(%rsi)
    591 	movups	%xmm7,80(%rsi)
    592 	movups	%xmm8,96(%rsi)
    593 	jmp	L$ecb_ret
    594 .p2align	4
    595 L$ecb_enc_one:
    596 	movups	(%rcx),%xmm0
    597 	movups	16(%rcx),%xmm1
    598 	leaq	32(%rcx),%rcx
    599 	xorps	%xmm0,%xmm2
    600 L$oop_enc1_3:
    601 .byte	102,15,56,220,209
    602 	decl	%eax
    603 	movups	(%rcx),%xmm1
    604 	leaq	16(%rcx),%rcx
    605 	jnz	L$oop_enc1_3
    606 .byte	102,15,56,221,209
    607 	movups	%xmm2,(%rsi)
    608 	jmp	L$ecb_ret
    609 .p2align	4
    610 L$ecb_enc_two:
    611 	call	_aesni_encrypt2
    612 	movups	%xmm2,(%rsi)
    613 	movups	%xmm3,16(%rsi)
    614 	jmp	L$ecb_ret
    615 .p2align	4
    616 L$ecb_enc_three:
    617 	call	_aesni_encrypt3
    618 	movups	%xmm2,(%rsi)
    619 	movups	%xmm3,16(%rsi)
    620 	movups	%xmm4,32(%rsi)
    621 	jmp	L$ecb_ret
    622 .p2align	4
    623 L$ecb_enc_four:
    624 	call	_aesni_encrypt4
    625 	movups	%xmm2,(%rsi)
    626 	movups	%xmm3,16(%rsi)
    627 	movups	%xmm4,32(%rsi)
    628 	movups	%xmm5,48(%rsi)
    629 	jmp	L$ecb_ret
    630 .p2align	4
    631 L$ecb_enc_five:
    632 	xorps	%xmm7,%xmm7
    633 	call	_aesni_encrypt6
    634 	movups	%xmm2,(%rsi)
    635 	movups	%xmm3,16(%rsi)
    636 	movups	%xmm4,32(%rsi)
    637 	movups	%xmm5,48(%rsi)
    638 	movups	%xmm6,64(%rsi)
    639 	jmp	L$ecb_ret
    640 .p2align	4
    641 L$ecb_enc_six:
    642 	call	_aesni_encrypt6
    643 	movups	%xmm2,(%rsi)
    644 	movups	%xmm3,16(%rsi)
    645 	movups	%xmm4,32(%rsi)
    646 	movups	%xmm5,48(%rsi)
    647 	movups	%xmm6,64(%rsi)
    648 	movups	%xmm7,80(%rsi)
    649 	jmp	L$ecb_ret
    650 
    651 .p2align	4
    652 L$ecb_decrypt:
    653 	cmpq	$128,%rdx
    654 	jb	L$ecb_dec_tail
    655 
    656 	movdqu	(%rdi),%xmm2
    657 	movdqu	16(%rdi),%xmm3
    658 	movdqu	32(%rdi),%xmm4
    659 	movdqu	48(%rdi),%xmm5
    660 	movdqu	64(%rdi),%xmm6
    661 	movdqu	80(%rdi),%xmm7
    662 	movdqu	96(%rdi),%xmm8
    663 	movdqu	112(%rdi),%xmm9
    664 	leaq	128(%rdi),%rdi
    665 	subq	$128,%rdx
    666 	jmp	L$ecb_dec_loop8_enter
    667 .p2align	4
    668 L$ecb_dec_loop8:
    669 	movups	%xmm2,(%rsi)
    670 	movq	%r11,%rcx
    671 	movdqu	(%rdi),%xmm2
    672 	movl	%r10d,%eax
    673 	movups	%xmm3,16(%rsi)
    674 	movdqu	16(%rdi),%xmm3
    675 	movups	%xmm4,32(%rsi)
    676 	movdqu	32(%rdi),%xmm4
    677 	movups	%xmm5,48(%rsi)
    678 	movdqu	48(%rdi),%xmm5
    679 	movups	%xmm6,64(%rsi)
    680 	movdqu	64(%rdi),%xmm6
    681 	movups	%xmm7,80(%rsi)
    682 	movdqu	80(%rdi),%xmm7
    683 	movups	%xmm8,96(%rsi)
    684 	movdqu	96(%rdi),%xmm8
    685 	movups	%xmm9,112(%rsi)
    686 	leaq	128(%rsi),%rsi
    687 	movdqu	112(%rdi),%xmm9
    688 	leaq	128(%rdi),%rdi
    689 L$ecb_dec_loop8_enter:
    690 
    691 	call	_aesni_decrypt8
    692 
    693 	movups	(%r11),%xmm0
    694 	subq	$128,%rdx
    695 	jnc	L$ecb_dec_loop8
    696 
    697 	movups	%xmm2,(%rsi)
    698 	pxor	%xmm2,%xmm2
    699 	movq	%r11,%rcx
    700 	movups	%xmm3,16(%rsi)
    701 	pxor	%xmm3,%xmm3
    702 	movl	%r10d,%eax
    703 	movups	%xmm4,32(%rsi)
    704 	pxor	%xmm4,%xmm4
    705 	movups	%xmm5,48(%rsi)
    706 	pxor	%xmm5,%xmm5
    707 	movups	%xmm6,64(%rsi)
    708 	pxor	%xmm6,%xmm6
    709 	movups	%xmm7,80(%rsi)
    710 	pxor	%xmm7,%xmm7
    711 	movups	%xmm8,96(%rsi)
    712 	pxor	%xmm8,%xmm8
    713 	movups	%xmm9,112(%rsi)
    714 	pxor	%xmm9,%xmm9
    715 	leaq	128(%rsi),%rsi
    716 	addq	$128,%rdx
    717 	jz	L$ecb_ret
    718 
    719 L$ecb_dec_tail:
    720 	movups	(%rdi),%xmm2
    721 	cmpq	$32,%rdx
    722 	jb	L$ecb_dec_one
    723 	movups	16(%rdi),%xmm3
    724 	je	L$ecb_dec_two
    725 	movups	32(%rdi),%xmm4
    726 	cmpq	$64,%rdx
    727 	jb	L$ecb_dec_three
    728 	movups	48(%rdi),%xmm5
    729 	je	L$ecb_dec_four
    730 	movups	64(%rdi),%xmm6
    731 	cmpq	$96,%rdx
    732 	jb	L$ecb_dec_five
    733 	movups	80(%rdi),%xmm7
    734 	je	L$ecb_dec_six
    735 	movups	96(%rdi),%xmm8
    736 	movups	(%rcx),%xmm0
    737 	xorps	%xmm9,%xmm9
    738 	call	_aesni_decrypt8
    739 	movups	%xmm2,(%rsi)
    740 	pxor	%xmm2,%xmm2
    741 	movups	%xmm3,16(%rsi)
    742 	pxor	%xmm3,%xmm3
    743 	movups	%xmm4,32(%rsi)
    744 	pxor	%xmm4,%xmm4
    745 	movups	%xmm5,48(%rsi)
    746 	pxor	%xmm5,%xmm5
    747 	movups	%xmm6,64(%rsi)
    748 	pxor	%xmm6,%xmm6
    749 	movups	%xmm7,80(%rsi)
    750 	pxor	%xmm7,%xmm7
    751 	movups	%xmm8,96(%rsi)
    752 	pxor	%xmm8,%xmm8
    753 	pxor	%xmm9,%xmm9
    754 	jmp	L$ecb_ret
    755 .p2align	4
    756 L$ecb_dec_one:
    757 	movups	(%rcx),%xmm0
    758 	movups	16(%rcx),%xmm1
    759 	leaq	32(%rcx),%rcx
    760 	xorps	%xmm0,%xmm2
    761 L$oop_dec1_4:
    762 .byte	102,15,56,222,209
    763 	decl	%eax
    764 	movups	(%rcx),%xmm1
    765 	leaq	16(%rcx),%rcx
    766 	jnz	L$oop_dec1_4
    767 .byte	102,15,56,223,209
    768 	movups	%xmm2,(%rsi)
    769 	pxor	%xmm2,%xmm2
    770 	jmp	L$ecb_ret
    771 .p2align	4
    772 L$ecb_dec_two:
    773 	call	_aesni_decrypt2
    774 	movups	%xmm2,(%rsi)
    775 	pxor	%xmm2,%xmm2
    776 	movups	%xmm3,16(%rsi)
    777 	pxor	%xmm3,%xmm3
    778 	jmp	L$ecb_ret
    779 .p2align	4
    780 L$ecb_dec_three:
    781 	call	_aesni_decrypt3
    782 	movups	%xmm2,(%rsi)
    783 	pxor	%xmm2,%xmm2
    784 	movups	%xmm3,16(%rsi)
    785 	pxor	%xmm3,%xmm3
    786 	movups	%xmm4,32(%rsi)
    787 	pxor	%xmm4,%xmm4
    788 	jmp	L$ecb_ret
    789 .p2align	4
    790 L$ecb_dec_four:
    791 	call	_aesni_decrypt4
    792 	movups	%xmm2,(%rsi)
    793 	pxor	%xmm2,%xmm2
    794 	movups	%xmm3,16(%rsi)
    795 	pxor	%xmm3,%xmm3
    796 	movups	%xmm4,32(%rsi)
    797 	pxor	%xmm4,%xmm4
    798 	movups	%xmm5,48(%rsi)
    799 	pxor	%xmm5,%xmm5
    800 	jmp	L$ecb_ret
    801 .p2align	4
    802 L$ecb_dec_five:
    803 	xorps	%xmm7,%xmm7
    804 	call	_aesni_decrypt6
    805 	movups	%xmm2,(%rsi)
    806 	pxor	%xmm2,%xmm2
    807 	movups	%xmm3,16(%rsi)
    808 	pxor	%xmm3,%xmm3
    809 	movups	%xmm4,32(%rsi)
    810 	pxor	%xmm4,%xmm4
    811 	movups	%xmm5,48(%rsi)
    812 	pxor	%xmm5,%xmm5
    813 	movups	%xmm6,64(%rsi)
    814 	pxor	%xmm6,%xmm6
    815 	pxor	%xmm7,%xmm7
    816 	jmp	L$ecb_ret
    817 .p2align	4
    818 L$ecb_dec_six:
    819 	call	_aesni_decrypt6
    820 	movups	%xmm2,(%rsi)
    821 	pxor	%xmm2,%xmm2
    822 	movups	%xmm3,16(%rsi)
    823 	pxor	%xmm3,%xmm3
    824 	movups	%xmm4,32(%rsi)
    825 	pxor	%xmm4,%xmm4
    826 	movups	%xmm5,48(%rsi)
    827 	pxor	%xmm5,%xmm5
    828 	movups	%xmm6,64(%rsi)
    829 	pxor	%xmm6,%xmm6
    830 	movups	%xmm7,80(%rsi)
    831 	pxor	%xmm7,%xmm7
    832 
    833 L$ecb_ret:
    834 	xorps	%xmm0,%xmm0
    835 	pxor	%xmm1,%xmm1
    836 	.byte	0xf3,0xc3
    837 
    838 .globl	_aesni_ccm64_encrypt_blocks
    839 .private_extern _aesni_ccm64_encrypt_blocks
    840 
    841 .p2align	4
    842 _aesni_ccm64_encrypt_blocks:
    843 	movl	240(%rcx),%eax
    844 	movdqu	(%r8),%xmm6
    845 	movdqa	L$increment64(%rip),%xmm9
    846 	movdqa	L$bswap_mask(%rip),%xmm7
    847 
    848 	shll	$4,%eax
    849 	movl	$16,%r10d
    850 	leaq	0(%rcx),%r11
    851 	movdqu	(%r9),%xmm3
    852 	movdqa	%xmm6,%xmm2
    853 	leaq	32(%rcx,%rax,1),%rcx
    854 .byte	102,15,56,0,247
    855 	subq	%rax,%r10
    856 	jmp	L$ccm64_enc_outer
    857 .p2align	4
    858 L$ccm64_enc_outer:
    859 	movups	(%r11),%xmm0
    860 	movq	%r10,%rax
    861 	movups	(%rdi),%xmm8
    862 
    863 	xorps	%xmm0,%xmm2
    864 	movups	16(%r11),%xmm1
    865 	xorps	%xmm8,%xmm0
    866 	xorps	%xmm0,%xmm3
    867 	movups	32(%r11),%xmm0
    868 
    869 L$ccm64_enc2_loop:
    870 .byte	102,15,56,220,209
    871 .byte	102,15,56,220,217
    872 	movups	(%rcx,%rax,1),%xmm1
    873 	addq	$32,%rax
    874 .byte	102,15,56,220,208
    875 .byte	102,15,56,220,216
    876 	movups	-16(%rcx,%rax,1),%xmm0
    877 	jnz	L$ccm64_enc2_loop
    878 .byte	102,15,56,220,209
    879 .byte	102,15,56,220,217
    880 	paddq	%xmm9,%xmm6
    881 	decq	%rdx
    882 .byte	102,15,56,221,208
    883 .byte	102,15,56,221,216
    884 
    885 	leaq	16(%rdi),%rdi
    886 	xorps	%xmm2,%xmm8
    887 	movdqa	%xmm6,%xmm2
    888 	movups	%xmm8,(%rsi)
    889 .byte	102,15,56,0,215
    890 	leaq	16(%rsi),%rsi
    891 	jnz	L$ccm64_enc_outer
    892 
    893 	pxor	%xmm0,%xmm0
    894 	pxor	%xmm1,%xmm1
    895 	pxor	%xmm2,%xmm2
    896 	movups	%xmm3,(%r9)
    897 	pxor	%xmm3,%xmm3
    898 	pxor	%xmm8,%xmm8
    899 	pxor	%xmm6,%xmm6
    900 	.byte	0xf3,0xc3
    901 
    902 .globl	_aesni_ccm64_decrypt_blocks
    903 .private_extern _aesni_ccm64_decrypt_blocks
    904 
    905 .p2align	4
    906 _aesni_ccm64_decrypt_blocks:
    907 	movl	240(%rcx),%eax
    908 	movups	(%r8),%xmm6
    909 	movdqu	(%r9),%xmm3
    910 	movdqa	L$increment64(%rip),%xmm9
    911 	movdqa	L$bswap_mask(%rip),%xmm7
    912 
    913 	movaps	%xmm6,%xmm2
    914 	movl	%eax,%r10d
    915 	movq	%rcx,%r11
    916 .byte	102,15,56,0,247
    917 	movups	(%rcx),%xmm0
    918 	movups	16(%rcx),%xmm1
    919 	leaq	32(%rcx),%rcx
    920 	xorps	%xmm0,%xmm2
    921 L$oop_enc1_5:
    922 .byte	102,15,56,220,209
    923 	decl	%eax
    924 	movups	(%rcx),%xmm1
    925 	leaq	16(%rcx),%rcx
    926 	jnz	L$oop_enc1_5
    927 .byte	102,15,56,221,209
    928 	shll	$4,%r10d
    929 	movl	$16,%eax
    930 	movups	(%rdi),%xmm8
    931 	paddq	%xmm9,%xmm6
    932 	leaq	16(%rdi),%rdi
    933 	subq	%r10,%rax
    934 	leaq	32(%r11,%r10,1),%rcx
    935 	movq	%rax,%r10
    936 	jmp	L$ccm64_dec_outer
    937 .p2align	4
    938 L$ccm64_dec_outer:
    939 	xorps	%xmm2,%xmm8
    940 	movdqa	%xmm6,%xmm2
    941 	movups	%xmm8,(%rsi)
    942 	leaq	16(%rsi),%rsi
    943 .byte	102,15,56,0,215
    944 
    945 	subq	$1,%rdx
    946 	jz	L$ccm64_dec_break
    947 
    948 	movups	(%r11),%xmm0
    949 	movq	%r10,%rax
    950 	movups	16(%r11),%xmm1
    951 	xorps	%xmm0,%xmm8
    952 	xorps	%xmm0,%xmm2
    953 	xorps	%xmm8,%xmm3
    954 	movups	32(%r11),%xmm0
    955 	jmp	L$ccm64_dec2_loop
    956 .p2align	4
    957 L$ccm64_dec2_loop:
    958 .byte	102,15,56,220,209
    959 .byte	102,15,56,220,217
    960 	movups	(%rcx,%rax,1),%xmm1
    961 	addq	$32,%rax
    962 .byte	102,15,56,220,208
    963 .byte	102,15,56,220,216
    964 	movups	-16(%rcx,%rax,1),%xmm0
    965 	jnz	L$ccm64_dec2_loop
    966 	movups	(%rdi),%xmm8
    967 	paddq	%xmm9,%xmm6
    968 .byte	102,15,56,220,209
    969 .byte	102,15,56,220,217
    970 .byte	102,15,56,221,208
    971 .byte	102,15,56,221,216
    972 	leaq	16(%rdi),%rdi
    973 	jmp	L$ccm64_dec_outer
    974 
    975 .p2align	4
    976 L$ccm64_dec_break:
    977 
    978 	movl	240(%r11),%eax
    979 	movups	(%r11),%xmm0
    980 	movups	16(%r11),%xmm1
    981 	xorps	%xmm0,%xmm8
    982 	leaq	32(%r11),%r11
    983 	xorps	%xmm8,%xmm3
    984 L$oop_enc1_6:
    985 .byte	102,15,56,220,217
    986 	decl	%eax
    987 	movups	(%r11),%xmm1
    988 	leaq	16(%r11),%r11
    989 	jnz	L$oop_enc1_6
    990 .byte	102,15,56,221,217
    991 	pxor	%xmm0,%xmm0
    992 	pxor	%xmm1,%xmm1
    993 	pxor	%xmm2,%xmm2
    994 	movups	%xmm3,(%r9)
    995 	pxor	%xmm3,%xmm3
    996 	pxor	%xmm8,%xmm8
    997 	pxor	%xmm6,%xmm6
    998 	.byte	0xf3,0xc3
    999 
   1000 .globl	_aesni_ctr32_encrypt_blocks
   1001 .private_extern _aesni_ctr32_encrypt_blocks
   1002 
   1003 .p2align	4
   1004 _aesni_ctr32_encrypt_blocks:
   1005 	cmpq	$1,%rdx
   1006 	jne	L$ctr32_bulk
   1007 
   1008 
   1009 
   1010 	movups	(%r8),%xmm2
   1011 	movups	(%rdi),%xmm3
   1012 	movl	240(%rcx),%edx
   1013 	movups	(%rcx),%xmm0
   1014 	movups	16(%rcx),%xmm1
   1015 	leaq	32(%rcx),%rcx
   1016 	xorps	%xmm0,%xmm2
   1017 L$oop_enc1_7:
   1018 .byte	102,15,56,220,209
   1019 	decl	%edx
   1020 	movups	(%rcx),%xmm1
   1021 	leaq	16(%rcx),%rcx
   1022 	jnz	L$oop_enc1_7
   1023 .byte	102,15,56,221,209
   1024 	pxor	%xmm0,%xmm0
   1025 	pxor	%xmm1,%xmm1
   1026 	xorps	%xmm3,%xmm2
   1027 	pxor	%xmm3,%xmm3
   1028 	movups	%xmm2,(%rsi)
   1029 	xorps	%xmm2,%xmm2
   1030 	jmp	L$ctr32_epilogue
   1031 
   1032 .p2align	4
   1033 L$ctr32_bulk:
   1034 	leaq	(%rsp),%rax
   1035 	pushq	%rbp
   1036 	subq	$128,%rsp
   1037 	andq	$-16,%rsp
   1038 	leaq	-8(%rax),%rbp
   1039 
   1040 
   1041 
   1042 
   1043 	movdqu	(%r8),%xmm2
   1044 	movdqu	(%rcx),%xmm0
   1045 	movl	12(%r8),%r8d
   1046 	pxor	%xmm0,%xmm2
   1047 	movl	12(%rcx),%r11d
   1048 	movdqa	%xmm2,0(%rsp)
   1049 	bswapl	%r8d
   1050 	movdqa	%xmm2,%xmm3
   1051 	movdqa	%xmm2,%xmm4
   1052 	movdqa	%xmm2,%xmm5
   1053 	movdqa	%xmm2,64(%rsp)
   1054 	movdqa	%xmm2,80(%rsp)
   1055 	movdqa	%xmm2,96(%rsp)
   1056 	movq	%rdx,%r10
   1057 	movdqa	%xmm2,112(%rsp)
   1058 
   1059 	leaq	1(%r8),%rax
   1060 	leaq	2(%r8),%rdx
   1061 	bswapl	%eax
   1062 	bswapl	%edx
   1063 	xorl	%r11d,%eax
   1064 	xorl	%r11d,%edx
   1065 .byte	102,15,58,34,216,3
   1066 	leaq	3(%r8),%rax
   1067 	movdqa	%xmm3,16(%rsp)
   1068 .byte	102,15,58,34,226,3
   1069 	bswapl	%eax
   1070 	movq	%r10,%rdx
   1071 	leaq	4(%r8),%r10
   1072 	movdqa	%xmm4,32(%rsp)
   1073 	xorl	%r11d,%eax
   1074 	bswapl	%r10d
   1075 .byte	102,15,58,34,232,3
   1076 	xorl	%r11d,%r10d
   1077 	movdqa	%xmm5,48(%rsp)
   1078 	leaq	5(%r8),%r9
   1079 	movl	%r10d,64+12(%rsp)
   1080 	bswapl	%r9d
   1081 	leaq	6(%r8),%r10
   1082 	movl	240(%rcx),%eax
   1083 	xorl	%r11d,%r9d
   1084 	bswapl	%r10d
   1085 	movl	%r9d,80+12(%rsp)
   1086 	xorl	%r11d,%r10d
   1087 	leaq	7(%r8),%r9
   1088 	movl	%r10d,96+12(%rsp)
   1089 	bswapl	%r9d
   1090 	movl	_OPENSSL_ia32cap_P+4(%rip),%r10d
   1091 	xorl	%r11d,%r9d
   1092 	andl	$71303168,%r10d
   1093 	movl	%r9d,112+12(%rsp)
   1094 
   1095 	movups	16(%rcx),%xmm1
   1096 
   1097 	movdqa	64(%rsp),%xmm6
   1098 	movdqa	80(%rsp),%xmm7
   1099 
   1100 	cmpq	$8,%rdx
   1101 	jb	L$ctr32_tail
   1102 
   1103 	subq	$6,%rdx
   1104 	cmpl	$4194304,%r10d
   1105 	je	L$ctr32_6x
   1106 
   1107 	leaq	128(%rcx),%rcx
   1108 	subq	$2,%rdx
   1109 	jmp	L$ctr32_loop8
   1110 
   1111 .p2align	4
   1112 L$ctr32_6x:
   1113 	shll	$4,%eax
   1114 	movl	$48,%r10d
   1115 	bswapl	%r11d
   1116 	leaq	32(%rcx,%rax,1),%rcx
   1117 	subq	%rax,%r10
   1118 	jmp	L$ctr32_loop6
   1119 
   1120 .p2align	4
   1121 L$ctr32_loop6:
   1122 	addl	$6,%r8d
   1123 	movups	-48(%rcx,%r10,1),%xmm0
   1124 .byte	102,15,56,220,209
   1125 	movl	%r8d,%eax
   1126 	xorl	%r11d,%eax
   1127 .byte	102,15,56,220,217
   1128 .byte	0x0f,0x38,0xf1,0x44,0x24,12
   1129 	leal	1(%r8),%eax
   1130 .byte	102,15,56,220,225
   1131 	xorl	%r11d,%eax
   1132 .byte	0x0f,0x38,0xf1,0x44,0x24,28
   1133 .byte	102,15,56,220,233
   1134 	leal	2(%r8),%eax
   1135 	xorl	%r11d,%eax
   1136 .byte	102,15,56,220,241
   1137 .byte	0x0f,0x38,0xf1,0x44,0x24,44
   1138 	leal	3(%r8),%eax
   1139 .byte	102,15,56,220,249
   1140 	movups	-32(%rcx,%r10,1),%xmm1
   1141 	xorl	%r11d,%eax
   1142 
   1143 .byte	102,15,56,220,208
   1144 .byte	0x0f,0x38,0xf1,0x44,0x24,60
   1145 	leal	4(%r8),%eax
   1146 .byte	102,15,56,220,216
   1147 	xorl	%r11d,%eax
   1148 .byte	0x0f,0x38,0xf1,0x44,0x24,76
   1149 .byte	102,15,56,220,224
   1150 	leal	5(%r8),%eax
   1151 	xorl	%r11d,%eax
   1152 .byte	102,15,56,220,232
   1153 .byte	0x0f,0x38,0xf1,0x44,0x24,92
   1154 	movq	%r10,%rax
   1155 .byte	102,15,56,220,240
   1156 .byte	102,15,56,220,248
   1157 	movups	-16(%rcx,%r10,1),%xmm0
   1158 
   1159 	call	L$enc_loop6
   1160 
   1161 	movdqu	(%rdi),%xmm8
   1162 	movdqu	16(%rdi),%xmm9
   1163 	movdqu	32(%rdi),%xmm10
   1164 	movdqu	48(%rdi),%xmm11
   1165 	movdqu	64(%rdi),%xmm12
   1166 	movdqu	80(%rdi),%xmm13
   1167 	leaq	96(%rdi),%rdi
   1168 	movups	-64(%rcx,%r10,1),%xmm1
   1169 	pxor	%xmm2,%xmm8
   1170 	movaps	0(%rsp),%xmm2
   1171 	pxor	%xmm3,%xmm9
   1172 	movaps	16(%rsp),%xmm3
   1173 	pxor	%xmm4,%xmm10
   1174 	movaps	32(%rsp),%xmm4
   1175 	pxor	%xmm5,%xmm11
   1176 	movaps	48(%rsp),%xmm5
   1177 	pxor	%xmm6,%xmm12
   1178 	movaps	64(%rsp),%xmm6
   1179 	pxor	%xmm7,%xmm13
   1180 	movaps	80(%rsp),%xmm7
   1181 	movdqu	%xmm8,(%rsi)
   1182 	movdqu	%xmm9,16(%rsi)
   1183 	movdqu	%xmm10,32(%rsi)
   1184 	movdqu	%xmm11,48(%rsi)
   1185 	movdqu	%xmm12,64(%rsi)
   1186 	movdqu	%xmm13,80(%rsi)
   1187 	leaq	96(%rsi),%rsi
   1188 
   1189 	subq	$6,%rdx
   1190 	jnc	L$ctr32_loop6
   1191 
   1192 	addq	$6,%rdx
   1193 	jz	L$ctr32_done
   1194 
   1195 	leal	-48(%r10),%eax
   1196 	leaq	-80(%rcx,%r10,1),%rcx
   1197 	negl	%eax
   1198 	shrl	$4,%eax
   1199 	jmp	L$ctr32_tail
   1200 
   1201 .p2align	5
   1202 L$ctr32_loop8:
   1203 	addl	$8,%r8d
   1204 	movdqa	96(%rsp),%xmm8
   1205 .byte	102,15,56,220,209
   1206 	movl	%r8d,%r9d
   1207 	movdqa	112(%rsp),%xmm9
   1208 .byte	102,15,56,220,217
   1209 	bswapl	%r9d
   1210 	movups	32-128(%rcx),%xmm0
   1211 .byte	102,15,56,220,225
   1212 	xorl	%r11d,%r9d
   1213 	nop
   1214 .byte	102,15,56,220,233
   1215 	movl	%r9d,0+12(%rsp)
   1216 	leaq	1(%r8),%r9
   1217 .byte	102,15,56,220,241
   1218 .byte	102,15,56,220,249
   1219 .byte	102,68,15,56,220,193
   1220 .byte	102,68,15,56,220,201
   1221 	movups	48-128(%rcx),%xmm1
   1222 	bswapl	%r9d
   1223 .byte	102,15,56,220,208
   1224 .byte	102,15,56,220,216
   1225 	xorl	%r11d,%r9d
   1226 .byte	0x66,0x90
   1227 .byte	102,15,56,220,224
   1228 .byte	102,15,56,220,232
   1229 	movl	%r9d,16+12(%rsp)
   1230 	leaq	2(%r8),%r9
   1231 .byte	102,15,56,220,240
   1232 .byte	102,15,56,220,248
   1233 .byte	102,68,15,56,220,192
   1234 .byte	102,68,15,56,220,200
   1235 	movups	64-128(%rcx),%xmm0
   1236 	bswapl	%r9d
   1237 .byte	102,15,56,220,209
   1238 .byte	102,15,56,220,217
   1239 	xorl	%r11d,%r9d
   1240 .byte	0x66,0x90
   1241 .byte	102,15,56,220,225
   1242 .byte	102,15,56,220,233
   1243 	movl	%r9d,32+12(%rsp)
   1244 	leaq	3(%r8),%r9
   1245 .byte	102,15,56,220,241
   1246 .byte	102,15,56,220,249
   1247 .byte	102,68,15,56,220,193
   1248 .byte	102,68,15,56,220,201
   1249 	movups	80-128(%rcx),%xmm1
   1250 	bswapl	%r9d
   1251 .byte	102,15,56,220,208
   1252 .byte	102,15,56,220,216
   1253 	xorl	%r11d,%r9d
   1254 .byte	0x66,0x90
   1255 .byte	102,15,56,220,224
   1256 .byte	102,15,56,220,232
   1257 	movl	%r9d,48+12(%rsp)
   1258 	leaq	4(%r8),%r9
   1259 .byte	102,15,56,220,240
   1260 .byte	102,15,56,220,248
   1261 .byte	102,68,15,56,220,192
   1262 .byte	102,68,15,56,220,200
   1263 	movups	96-128(%rcx),%xmm0
   1264 	bswapl	%r9d
   1265 .byte	102,15,56,220,209
   1266 .byte	102,15,56,220,217
   1267 	xorl	%r11d,%r9d
   1268 .byte	0x66,0x90
   1269 .byte	102,15,56,220,225
   1270 .byte	102,15,56,220,233
   1271 	movl	%r9d,64+12(%rsp)
   1272 	leaq	5(%r8),%r9
   1273 .byte	102,15,56,220,241
   1274 .byte	102,15,56,220,249
   1275 .byte	102,68,15,56,220,193
   1276 .byte	102,68,15,56,220,201
   1277 	movups	112-128(%rcx),%xmm1
   1278 	bswapl	%r9d
   1279 .byte	102,15,56,220,208
   1280 .byte	102,15,56,220,216
   1281 	xorl	%r11d,%r9d
   1282 .byte	0x66,0x90
   1283 .byte	102,15,56,220,224
   1284 .byte	102,15,56,220,232
   1285 	movl	%r9d,80+12(%rsp)
   1286 	leaq	6(%r8),%r9
   1287 .byte	102,15,56,220,240
   1288 .byte	102,15,56,220,248
   1289 .byte	102,68,15,56,220,192
   1290 .byte	102,68,15,56,220,200
   1291 	movups	128-128(%rcx),%xmm0
   1292 	bswapl	%r9d
   1293 .byte	102,15,56,220,209
   1294 .byte	102,15,56,220,217
   1295 	xorl	%r11d,%r9d
   1296 .byte	0x66,0x90
   1297 .byte	102,15,56,220,225
   1298 .byte	102,15,56,220,233
   1299 	movl	%r9d,96+12(%rsp)
   1300 	leaq	7(%r8),%r9
   1301 .byte	102,15,56,220,241
   1302 .byte	102,15,56,220,249
   1303 .byte	102,68,15,56,220,193
   1304 .byte	102,68,15,56,220,201
   1305 	movups	144-128(%rcx),%xmm1
   1306 	bswapl	%r9d
   1307 .byte	102,15,56,220,208
   1308 .byte	102,15,56,220,216
   1309 .byte	102,15,56,220,224
   1310 	xorl	%r11d,%r9d
   1311 	movdqu	0(%rdi),%xmm10
   1312 .byte	102,15,56,220,232
   1313 	movl	%r9d,112+12(%rsp)
   1314 	cmpl	$11,%eax
   1315 .byte	102,15,56,220,240
   1316 .byte	102,15,56,220,248
   1317 .byte	102,68,15,56,220,192
   1318 .byte	102,68,15,56,220,200
   1319 	movups	160-128(%rcx),%xmm0
   1320 
   1321 	jb	L$ctr32_enc_done
   1322 
   1323 .byte	102,15,56,220,209
   1324 .byte	102,15,56,220,217
   1325 .byte	102,15,56,220,225
   1326 .byte	102,15,56,220,233
   1327 .byte	102,15,56,220,241
   1328 .byte	102,15,56,220,249
   1329 .byte	102,68,15,56,220,193
   1330 .byte	102,68,15,56,220,201
   1331 	movups	176-128(%rcx),%xmm1
   1332 
   1333 .byte	102,15,56,220,208
   1334 .byte	102,15,56,220,216
   1335 .byte	102,15,56,220,224
   1336 .byte	102,15,56,220,232
   1337 .byte	102,15,56,220,240
   1338 .byte	102,15,56,220,248
   1339 .byte	102,68,15,56,220,192
   1340 .byte	102,68,15,56,220,200
   1341 	movups	192-128(%rcx),%xmm0
   1342 	je	L$ctr32_enc_done
   1343 
   1344 .byte	102,15,56,220,209
   1345 .byte	102,15,56,220,217
   1346 .byte	102,15,56,220,225
   1347 .byte	102,15,56,220,233
   1348 .byte	102,15,56,220,241
   1349 .byte	102,15,56,220,249
   1350 .byte	102,68,15,56,220,193
   1351 .byte	102,68,15,56,220,201
   1352 	movups	208-128(%rcx),%xmm1
   1353 
   1354 .byte	102,15,56,220,208
   1355 .byte	102,15,56,220,216
   1356 .byte	102,15,56,220,224
   1357 .byte	102,15,56,220,232
   1358 .byte	102,15,56,220,240
   1359 .byte	102,15,56,220,248
   1360 .byte	102,68,15,56,220,192
   1361 .byte	102,68,15,56,220,200
   1362 	movups	224-128(%rcx),%xmm0
   1363 	jmp	L$ctr32_enc_done
   1364 
   1365 .p2align	4
   1366 L$ctr32_enc_done:
   1367 	movdqu	16(%rdi),%xmm11
   1368 	pxor	%xmm0,%xmm10
   1369 	movdqu	32(%rdi),%xmm12
   1370 	pxor	%xmm0,%xmm11
   1371 	movdqu	48(%rdi),%xmm13
   1372 	pxor	%xmm0,%xmm12
   1373 	movdqu	64(%rdi),%xmm14
   1374 	pxor	%xmm0,%xmm13
   1375 	movdqu	80(%rdi),%xmm15
   1376 	pxor	%xmm0,%xmm14
   1377 	pxor	%xmm0,%xmm15
   1378 .byte	102,15,56,220,209
   1379 .byte	102,15,56,220,217
   1380 .byte	102,15,56,220,225
   1381 .byte	102,15,56,220,233
   1382 .byte	102,15,56,220,241
   1383 .byte	102,15,56,220,249
   1384 .byte	102,68,15,56,220,193
   1385 .byte	102,68,15,56,220,201
   1386 	movdqu	96(%rdi),%xmm1
   1387 	leaq	128(%rdi),%rdi
   1388 
   1389 .byte	102,65,15,56,221,210
   1390 	pxor	%xmm0,%xmm1
   1391 	movdqu	112-128(%rdi),%xmm10
   1392 .byte	102,65,15,56,221,219
   1393 	pxor	%xmm0,%xmm10
   1394 	movdqa	0(%rsp),%xmm11
   1395 .byte	102,65,15,56,221,228
   1396 .byte	102,65,15,56,221,237
   1397 	movdqa	16(%rsp),%xmm12
   1398 	movdqa	32(%rsp),%xmm13
   1399 .byte	102,65,15,56,221,246
   1400 .byte	102,65,15,56,221,255
   1401 	movdqa	48(%rsp),%xmm14
   1402 	movdqa	64(%rsp),%xmm15
   1403 .byte	102,68,15,56,221,193
   1404 	movdqa	80(%rsp),%xmm0
   1405 	movups	16-128(%rcx),%xmm1
   1406 .byte	102,69,15,56,221,202
   1407 
   1408 	movups	%xmm2,(%rsi)
   1409 	movdqa	%xmm11,%xmm2
   1410 	movups	%xmm3,16(%rsi)
   1411 	movdqa	%xmm12,%xmm3
   1412 	movups	%xmm4,32(%rsi)
   1413 	movdqa	%xmm13,%xmm4
   1414 	movups	%xmm5,48(%rsi)
   1415 	movdqa	%xmm14,%xmm5
   1416 	movups	%xmm6,64(%rsi)
   1417 	movdqa	%xmm15,%xmm6
   1418 	movups	%xmm7,80(%rsi)
   1419 	movdqa	%xmm0,%xmm7
   1420 	movups	%xmm8,96(%rsi)
   1421 	movups	%xmm9,112(%rsi)
   1422 	leaq	128(%rsi),%rsi
   1423 
   1424 	subq	$8,%rdx
   1425 	jnc	L$ctr32_loop8
   1426 
   1427 	addq	$8,%rdx
   1428 	jz	L$ctr32_done
   1429 	leaq	-128(%rcx),%rcx
   1430 
   1431 L$ctr32_tail:
   1432 
   1433 
   1434 	leaq	16(%rcx),%rcx
   1435 	cmpq	$4,%rdx
   1436 	jb	L$ctr32_loop3
   1437 	je	L$ctr32_loop4
   1438 
   1439 
   1440 	shll	$4,%eax
   1441 	movdqa	96(%rsp),%xmm8
   1442 	pxor	%xmm9,%xmm9
   1443 
   1444 	movups	16(%rcx),%xmm0
   1445 .byte	102,15,56,220,209
   1446 .byte	102,15,56,220,217
   1447 	leaq	32-16(%rcx,%rax,1),%rcx
   1448 	negq	%rax
   1449 .byte	102,15,56,220,225
   1450 	addq	$16,%rax
   1451 	movups	(%rdi),%xmm10
   1452 .byte	102,15,56,220,233
   1453 .byte	102,15,56,220,241
   1454 	movups	16(%rdi),%xmm11
   1455 	movups	32(%rdi),%xmm12
   1456 .byte	102,15,56,220,249
   1457 .byte	102,68,15,56,220,193
   1458 
   1459 	call	L$enc_loop8_enter
   1460 
   1461 	movdqu	48(%rdi),%xmm13
   1462 	pxor	%xmm10,%xmm2
   1463 	movdqu	64(%rdi),%xmm10
   1464 	pxor	%xmm11,%xmm3
   1465 	movdqu	%xmm2,(%rsi)
   1466 	pxor	%xmm12,%xmm4
   1467 	movdqu	%xmm3,16(%rsi)
   1468 	pxor	%xmm13,%xmm5
   1469 	movdqu	%xmm4,32(%rsi)
   1470 	pxor	%xmm10,%xmm6
   1471 	movdqu	%xmm5,48(%rsi)
   1472 	movdqu	%xmm6,64(%rsi)
   1473 	cmpq	$6,%rdx
   1474 	jb	L$ctr32_done
   1475 
   1476 	movups	80(%rdi),%xmm11
   1477 	xorps	%xmm11,%xmm7
   1478 	movups	%xmm7,80(%rsi)
   1479 	je	L$ctr32_done
   1480 
   1481 	movups	96(%rdi),%xmm12
   1482 	xorps	%xmm12,%xmm8
   1483 	movups	%xmm8,96(%rsi)
   1484 	jmp	L$ctr32_done
   1485 
   1486 .p2align	5
   1487 L$ctr32_loop4:
   1488 .byte	102,15,56,220,209
   1489 	leaq	16(%rcx),%rcx
   1490 	decl	%eax
   1491 .byte	102,15,56,220,217
   1492 .byte	102,15,56,220,225
   1493 .byte	102,15,56,220,233
   1494 	movups	(%rcx),%xmm1
   1495 	jnz	L$ctr32_loop4
   1496 .byte	102,15,56,221,209
   1497 .byte	102,15,56,221,217
   1498 	movups	(%rdi),%xmm10
   1499 	movups	16(%rdi),%xmm11
   1500 .byte	102,15,56,221,225
   1501 .byte	102,15,56,221,233
   1502 	movups	32(%rdi),%xmm12
   1503 	movups	48(%rdi),%xmm13
   1504 
   1505 	xorps	%xmm10,%xmm2
   1506 	movups	%xmm2,(%rsi)
   1507 	xorps	%xmm11,%xmm3
   1508 	movups	%xmm3,16(%rsi)
   1509 	pxor	%xmm12,%xmm4
   1510 	movdqu	%xmm4,32(%rsi)
   1511 	pxor	%xmm13,%xmm5
   1512 	movdqu	%xmm5,48(%rsi)
   1513 	jmp	L$ctr32_done
   1514 
   1515 .p2align	5
   1516 L$ctr32_loop3:
   1517 .byte	102,15,56,220,209
   1518 	leaq	16(%rcx),%rcx
   1519 	decl	%eax
   1520 .byte	102,15,56,220,217
   1521 .byte	102,15,56,220,225
   1522 	movups	(%rcx),%xmm1
   1523 	jnz	L$ctr32_loop3
   1524 .byte	102,15,56,221,209
   1525 .byte	102,15,56,221,217
   1526 .byte	102,15,56,221,225
   1527 
   1528 	movups	(%rdi),%xmm10
   1529 	xorps	%xmm10,%xmm2
   1530 	movups	%xmm2,(%rsi)
   1531 	cmpq	$2,%rdx
   1532 	jb	L$ctr32_done
   1533 
   1534 	movups	16(%rdi),%xmm11
   1535 	xorps	%xmm11,%xmm3
   1536 	movups	%xmm3,16(%rsi)
   1537 	je	L$ctr32_done
   1538 
   1539 	movups	32(%rdi),%xmm12
   1540 	xorps	%xmm12,%xmm4
   1541 	movups	%xmm4,32(%rsi)
   1542 
   1543 L$ctr32_done:
   1544 	xorps	%xmm0,%xmm0
   1545 	xorl	%r11d,%r11d
   1546 	pxor	%xmm1,%xmm1
   1547 	pxor	%xmm2,%xmm2
   1548 	pxor	%xmm3,%xmm3
   1549 	pxor	%xmm4,%xmm4
   1550 	pxor	%xmm5,%xmm5
   1551 	pxor	%xmm6,%xmm6
   1552 	pxor	%xmm7,%xmm7
   1553 	movaps	%xmm0,0(%rsp)
   1554 	pxor	%xmm8,%xmm8
   1555 	movaps	%xmm0,16(%rsp)
   1556 	pxor	%xmm9,%xmm9
   1557 	movaps	%xmm0,32(%rsp)
   1558 	pxor	%xmm10,%xmm10
   1559 	movaps	%xmm0,48(%rsp)
   1560 	pxor	%xmm11,%xmm11
   1561 	movaps	%xmm0,64(%rsp)
   1562 	pxor	%xmm12,%xmm12
   1563 	movaps	%xmm0,80(%rsp)
   1564 	pxor	%xmm13,%xmm13
   1565 	movaps	%xmm0,96(%rsp)
   1566 	pxor	%xmm14,%xmm14
   1567 	movaps	%xmm0,112(%rsp)
   1568 	pxor	%xmm15,%xmm15
   1569 	leaq	(%rbp),%rsp
   1570 	popq	%rbp
   1571 L$ctr32_epilogue:
   1572 	.byte	0xf3,0xc3
   1573 
   1574 .globl	_aesni_xts_encrypt
   1575 .private_extern _aesni_xts_encrypt
   1576 
   1577 .p2align	4
   1578 _aesni_xts_encrypt:
   1579 	leaq	(%rsp),%rax
   1580 	pushq	%rbp
   1581 	subq	$112,%rsp
   1582 	andq	$-16,%rsp
   1583 	leaq	-8(%rax),%rbp
   1584 	movups	(%r9),%xmm2
   1585 	movl	240(%r8),%eax
   1586 	movl	240(%rcx),%r10d
   1587 	movups	(%r8),%xmm0
   1588 	movups	16(%r8),%xmm1
   1589 	leaq	32(%r8),%r8
   1590 	xorps	%xmm0,%xmm2
   1591 L$oop_enc1_8:
   1592 .byte	102,15,56,220,209
   1593 	decl	%eax
   1594 	movups	(%r8),%xmm1
   1595 	leaq	16(%r8),%r8
   1596 	jnz	L$oop_enc1_8
   1597 .byte	102,15,56,221,209
   1598 	movups	(%rcx),%xmm0
   1599 	movq	%rcx,%r11
   1600 	movl	%r10d,%eax
   1601 	shll	$4,%r10d
   1602 	movq	%rdx,%r9
   1603 	andq	$-16,%rdx
   1604 
   1605 	movups	16(%rcx,%r10,1),%xmm1
   1606 
   1607 	movdqa	L$xts_magic(%rip),%xmm8
   1608 	movdqa	%xmm2,%xmm15
   1609 	pshufd	$95,%xmm2,%xmm9
   1610 	pxor	%xmm0,%xmm1
   1611 	movdqa	%xmm9,%xmm14
   1612 	paddd	%xmm9,%xmm9
   1613 	movdqa	%xmm15,%xmm10
   1614 	psrad	$31,%xmm14
   1615 	paddq	%xmm15,%xmm15
   1616 	pand	%xmm8,%xmm14
   1617 	pxor	%xmm0,%xmm10
   1618 	pxor	%xmm14,%xmm15
   1619 	movdqa	%xmm9,%xmm14
   1620 	paddd	%xmm9,%xmm9
   1621 	movdqa	%xmm15,%xmm11
   1622 	psrad	$31,%xmm14
   1623 	paddq	%xmm15,%xmm15
   1624 	pand	%xmm8,%xmm14
   1625 	pxor	%xmm0,%xmm11
   1626 	pxor	%xmm14,%xmm15
   1627 	movdqa	%xmm9,%xmm14
   1628 	paddd	%xmm9,%xmm9
   1629 	movdqa	%xmm15,%xmm12
   1630 	psrad	$31,%xmm14
   1631 	paddq	%xmm15,%xmm15
   1632 	pand	%xmm8,%xmm14
   1633 	pxor	%xmm0,%xmm12
   1634 	pxor	%xmm14,%xmm15
   1635 	movdqa	%xmm9,%xmm14
   1636 	paddd	%xmm9,%xmm9
   1637 	movdqa	%xmm15,%xmm13
   1638 	psrad	$31,%xmm14
   1639 	paddq	%xmm15,%xmm15
   1640 	pand	%xmm8,%xmm14
   1641 	pxor	%xmm0,%xmm13
   1642 	pxor	%xmm14,%xmm15
   1643 	movdqa	%xmm15,%xmm14
   1644 	psrad	$31,%xmm9
   1645 	paddq	%xmm15,%xmm15
   1646 	pand	%xmm8,%xmm9
   1647 	pxor	%xmm0,%xmm14
   1648 	pxor	%xmm9,%xmm15
   1649 	movaps	%xmm1,96(%rsp)
   1650 
   1651 	subq	$96,%rdx
   1652 	jc	L$xts_enc_short
   1653 
   1654 	movl	$16+96,%eax
   1655 	leaq	32(%r11,%r10,1),%rcx
   1656 	subq	%r10,%rax
   1657 	movups	16(%r11),%xmm1
   1658 	movq	%rax,%r10
   1659 	leaq	L$xts_magic(%rip),%r8
   1660 	jmp	L$xts_enc_grandloop
   1661 
   1662 .p2align	5
   1663 L$xts_enc_grandloop:
   1664 	movdqu	0(%rdi),%xmm2
   1665 	movdqa	%xmm0,%xmm8
   1666 	movdqu	16(%rdi),%xmm3
   1667 	pxor	%xmm10,%xmm2
   1668 	movdqu	32(%rdi),%xmm4
   1669 	pxor	%xmm11,%xmm3
   1670 .byte	102,15,56,220,209
   1671 	movdqu	48(%rdi),%xmm5
   1672 	pxor	%xmm12,%xmm4
   1673 .byte	102,15,56,220,217
   1674 	movdqu	64(%rdi),%xmm6
   1675 	pxor	%xmm13,%xmm5
   1676 .byte	102,15,56,220,225
   1677 	movdqu	80(%rdi),%xmm7
   1678 	pxor	%xmm15,%xmm8
   1679 	movdqa	96(%rsp),%xmm9
   1680 	pxor	%xmm14,%xmm6
   1681 .byte	102,15,56,220,233
   1682 	movups	32(%r11),%xmm0
   1683 	leaq	96(%rdi),%rdi
   1684 	pxor	%xmm8,%xmm7
   1685 
   1686 	pxor	%xmm9,%xmm10
   1687 .byte	102,15,56,220,241
   1688 	pxor	%xmm9,%xmm11
   1689 	movdqa	%xmm10,0(%rsp)
   1690 .byte	102,15,56,220,249
   1691 	movups	48(%r11),%xmm1
   1692 	pxor	%xmm9,%xmm12
   1693 
   1694 .byte	102,15,56,220,208
   1695 	pxor	%xmm9,%xmm13
   1696 	movdqa	%xmm11,16(%rsp)
   1697 .byte	102,15,56,220,216
   1698 	pxor	%xmm9,%xmm14
   1699 	movdqa	%xmm12,32(%rsp)
   1700 .byte	102,15,56,220,224
   1701 .byte	102,15,56,220,232
   1702 	pxor	%xmm9,%xmm8
   1703 	movdqa	%xmm14,64(%rsp)
   1704 .byte	102,15,56,220,240
   1705 .byte	102,15,56,220,248
   1706 	movups	64(%r11),%xmm0
   1707 	movdqa	%xmm8,80(%rsp)
   1708 	pshufd	$95,%xmm15,%xmm9
   1709 	jmp	L$xts_enc_loop6
   1710 .p2align	5
   1711 L$xts_enc_loop6:
   1712 .byte	102,15,56,220,209
   1713 .byte	102,15,56,220,217
   1714 .byte	102,15,56,220,225
   1715 .byte	102,15,56,220,233
   1716 .byte	102,15,56,220,241
   1717 .byte	102,15,56,220,249
   1718 	movups	-64(%rcx,%rax,1),%xmm1
   1719 	addq	$32,%rax
   1720 
   1721 .byte	102,15,56,220,208
   1722 .byte	102,15,56,220,216
   1723 .byte	102,15,56,220,224
   1724 .byte	102,15,56,220,232
   1725 .byte	102,15,56,220,240
   1726 .byte	102,15,56,220,248
   1727 	movups	-80(%rcx,%rax,1),%xmm0
   1728 	jnz	L$xts_enc_loop6
   1729 
   1730 	movdqa	(%r8),%xmm8
   1731 	movdqa	%xmm9,%xmm14
   1732 	paddd	%xmm9,%xmm9
   1733 .byte	102,15,56,220,209
   1734 	paddq	%xmm15,%xmm15
   1735 	psrad	$31,%xmm14
   1736 .byte	102,15,56,220,217
   1737 	pand	%xmm8,%xmm14
   1738 	movups	(%r11),%xmm10
   1739 .byte	102,15,56,220,225
   1740 .byte	102,15,56,220,233
   1741 .byte	102,15,56,220,241
   1742 	pxor	%xmm14,%xmm15
   1743 	movaps	%xmm10,%xmm11
   1744 .byte	102,15,56,220,249
   1745 	movups	-64(%rcx),%xmm1
   1746 
   1747 	movdqa	%xmm9,%xmm14
   1748 .byte	102,15,56,220,208
   1749 	paddd	%xmm9,%xmm9
   1750 	pxor	%xmm15,%xmm10
   1751 .byte	102,15,56,220,216
   1752 	psrad	$31,%xmm14
   1753 	paddq	%xmm15,%xmm15
   1754 .byte	102,15,56,220,224
   1755 .byte	102,15,56,220,232
   1756 	pand	%xmm8,%xmm14
   1757 	movaps	%xmm11,%xmm12
   1758 .byte	102,15,56,220,240
   1759 	pxor	%xmm14,%xmm15
   1760 	movdqa	%xmm9,%xmm14
   1761 .byte	102,15,56,220,248
   1762 	movups	-48(%rcx),%xmm0
   1763 
   1764 	paddd	%xmm9,%xmm9
   1765 .byte	102,15,56,220,209
   1766 	pxor	%xmm15,%xmm11
   1767 	psrad	$31,%xmm14
   1768 .byte	102,15,56,220,217
   1769 	paddq	%xmm15,%xmm15
   1770 	pand	%xmm8,%xmm14
   1771 .byte	102,15,56,220,225
   1772 .byte	102,15,56,220,233
   1773 	movdqa	%xmm13,48(%rsp)
   1774 	pxor	%xmm14,%xmm15
   1775 .byte	102,15,56,220,241
   1776 	movaps	%xmm12,%xmm13
   1777 	movdqa	%xmm9,%xmm14
   1778 .byte	102,15,56,220,249
   1779 	movups	-32(%rcx),%xmm1
   1780 
   1781 	paddd	%xmm9,%xmm9
   1782 .byte	102,15,56,220,208
   1783 	pxor	%xmm15,%xmm12
   1784 	psrad	$31,%xmm14
   1785 .byte	102,15,56,220,216
   1786 	paddq	%xmm15,%xmm15
   1787 	pand	%xmm8,%xmm14
   1788 .byte	102,15,56,220,224
   1789 .byte	102,15,56,220,232
   1790 .byte	102,15,56,220,240
   1791 	pxor	%xmm14,%xmm15
   1792 	movaps	%xmm13,%xmm14
   1793 .byte	102,15,56,220,248
   1794 
   1795 	movdqa	%xmm9,%xmm0
   1796 	paddd	%xmm9,%xmm9
   1797 .byte	102,15,56,220,209
   1798 	pxor	%xmm15,%xmm13
   1799 	psrad	$31,%xmm0
   1800 .byte	102,15,56,220,217
   1801 	paddq	%xmm15,%xmm15
   1802 	pand	%xmm8,%xmm0
   1803 .byte	102,15,56,220,225
   1804 .byte	102,15,56,220,233
   1805 	pxor	%xmm0,%xmm15
   1806 	movups	(%r11),%xmm0
   1807 .byte	102,15,56,220,241
   1808 .byte	102,15,56,220,249
   1809 	movups	16(%r11),%xmm1
   1810 
   1811 	pxor	%xmm15,%xmm14
   1812 .byte	102,15,56,221,84,36,0
   1813 	psrad	$31,%xmm9
   1814 	paddq	%xmm15,%xmm15
   1815 .byte	102,15,56,221,92,36,16
   1816 .byte	102,15,56,221,100,36,32
   1817 	pand	%xmm8,%xmm9
   1818 	movq	%r10,%rax
   1819 .byte	102,15,56,221,108,36,48
   1820 .byte	102,15,56,221,116,36,64
   1821 .byte	102,15,56,221,124,36,80
   1822 	pxor	%xmm9,%xmm15
   1823 
   1824 	leaq	96(%rsi),%rsi
   1825 	movups	%xmm2,-96(%rsi)
   1826 	movups	%xmm3,-80(%rsi)
   1827 	movups	%xmm4,-64(%rsi)
   1828 	movups	%xmm5,-48(%rsi)
   1829 	movups	%xmm6,-32(%rsi)
   1830 	movups	%xmm7,-16(%rsi)
   1831 	subq	$96,%rdx
   1832 	jnc	L$xts_enc_grandloop
   1833 
   1834 	movl	$16+96,%eax
   1835 	subl	%r10d,%eax
   1836 	movq	%r11,%rcx
   1837 	shrl	$4,%eax
   1838 
   1839 L$xts_enc_short:
   1840 
   1841 	movl	%eax,%r10d
   1842 	pxor	%xmm0,%xmm10
   1843 	addq	$96,%rdx
   1844 	jz	L$xts_enc_done
   1845 
   1846 	pxor	%xmm0,%xmm11
   1847 	cmpq	$32,%rdx
   1848 	jb	L$xts_enc_one
   1849 	pxor	%xmm0,%xmm12
   1850 	je	L$xts_enc_two
   1851 
   1852 	pxor	%xmm0,%xmm13
   1853 	cmpq	$64,%rdx
   1854 	jb	L$xts_enc_three
   1855 	pxor	%xmm0,%xmm14
   1856 	je	L$xts_enc_four
   1857 
   1858 	movdqu	(%rdi),%xmm2
   1859 	movdqu	16(%rdi),%xmm3
   1860 	movdqu	32(%rdi),%xmm4
   1861 	pxor	%xmm10,%xmm2
   1862 	movdqu	48(%rdi),%xmm5
   1863 	pxor	%xmm11,%xmm3
   1864 	movdqu	64(%rdi),%xmm6
   1865 	leaq	80(%rdi),%rdi
   1866 	pxor	%xmm12,%xmm4
   1867 	pxor	%xmm13,%xmm5
   1868 	pxor	%xmm14,%xmm6
   1869 	pxor	%xmm7,%xmm7
   1870 
   1871 	call	_aesni_encrypt6
   1872 
   1873 	xorps	%xmm10,%xmm2
   1874 	movdqa	%xmm15,%xmm10
   1875 	xorps	%xmm11,%xmm3
   1876 	xorps	%xmm12,%xmm4
   1877 	movdqu	%xmm2,(%rsi)
   1878 	xorps	%xmm13,%xmm5
   1879 	movdqu	%xmm3,16(%rsi)
   1880 	xorps	%xmm14,%xmm6
   1881 	movdqu	%xmm4,32(%rsi)
   1882 	movdqu	%xmm5,48(%rsi)
   1883 	movdqu	%xmm6,64(%rsi)
   1884 	leaq	80(%rsi),%rsi
   1885 	jmp	L$xts_enc_done
   1886 
   1887 .p2align	4
   1888 L$xts_enc_one:
   1889 	movups	(%rdi),%xmm2
   1890 	leaq	16(%rdi),%rdi
   1891 	xorps	%xmm10,%xmm2
   1892 	movups	(%rcx),%xmm0
   1893 	movups	16(%rcx),%xmm1
   1894 	leaq	32(%rcx),%rcx
   1895 	xorps	%xmm0,%xmm2
   1896 L$oop_enc1_9:
   1897 .byte	102,15,56,220,209
   1898 	decl	%eax
   1899 	movups	(%rcx),%xmm1
   1900 	leaq	16(%rcx),%rcx
   1901 	jnz	L$oop_enc1_9
   1902 .byte	102,15,56,221,209
   1903 	xorps	%xmm10,%xmm2
   1904 	movdqa	%xmm11,%xmm10
   1905 	movups	%xmm2,(%rsi)
   1906 	leaq	16(%rsi),%rsi
   1907 	jmp	L$xts_enc_done
   1908 
   1909 .p2align	4
   1910 L$xts_enc_two:
   1911 	movups	(%rdi),%xmm2
   1912 	movups	16(%rdi),%xmm3
   1913 	leaq	32(%rdi),%rdi
   1914 	xorps	%xmm10,%xmm2
   1915 	xorps	%xmm11,%xmm3
   1916 
   1917 	call	_aesni_encrypt2
   1918 
   1919 	xorps	%xmm10,%xmm2
   1920 	movdqa	%xmm12,%xmm10
   1921 	xorps	%xmm11,%xmm3
   1922 	movups	%xmm2,(%rsi)
   1923 	movups	%xmm3,16(%rsi)
   1924 	leaq	32(%rsi),%rsi
   1925 	jmp	L$xts_enc_done
   1926 
   1927 .p2align	4
   1928 L$xts_enc_three:
   1929 	movups	(%rdi),%xmm2
   1930 	movups	16(%rdi),%xmm3
   1931 	movups	32(%rdi),%xmm4
   1932 	leaq	48(%rdi),%rdi
   1933 	xorps	%xmm10,%xmm2
   1934 	xorps	%xmm11,%xmm3
   1935 	xorps	%xmm12,%xmm4
   1936 
   1937 	call	_aesni_encrypt3
   1938 
   1939 	xorps	%xmm10,%xmm2
   1940 	movdqa	%xmm13,%xmm10
   1941 	xorps	%xmm11,%xmm3
   1942 	xorps	%xmm12,%xmm4
   1943 	movups	%xmm2,(%rsi)
   1944 	movups	%xmm3,16(%rsi)
   1945 	movups	%xmm4,32(%rsi)
   1946 	leaq	48(%rsi),%rsi
   1947 	jmp	L$xts_enc_done
   1948 
   1949 .p2align	4
   1950 L$xts_enc_four:
   1951 	movups	(%rdi),%xmm2
   1952 	movups	16(%rdi),%xmm3
   1953 	movups	32(%rdi),%xmm4
   1954 	xorps	%xmm10,%xmm2
   1955 	movups	48(%rdi),%xmm5
   1956 	leaq	64(%rdi),%rdi
   1957 	xorps	%xmm11,%xmm3
   1958 	xorps	%xmm12,%xmm4
   1959 	xorps	%xmm13,%xmm5
   1960 
   1961 	call	_aesni_encrypt4
   1962 
   1963 	pxor	%xmm10,%xmm2
   1964 	movdqa	%xmm14,%xmm10
   1965 	pxor	%xmm11,%xmm3
   1966 	pxor	%xmm12,%xmm4
   1967 	movdqu	%xmm2,(%rsi)
   1968 	pxor	%xmm13,%xmm5
   1969 	movdqu	%xmm3,16(%rsi)
   1970 	movdqu	%xmm4,32(%rsi)
   1971 	movdqu	%xmm5,48(%rsi)
   1972 	leaq	64(%rsi),%rsi
   1973 	jmp	L$xts_enc_done
   1974 
   1975 .p2align	4
   1976 L$xts_enc_done:
   1977 	andq	$15,%r9
   1978 	jz	L$xts_enc_ret
   1979 	movq	%r9,%rdx
   1980 
   1981 L$xts_enc_steal:
   1982 	movzbl	(%rdi),%eax
   1983 	movzbl	-16(%rsi),%ecx
   1984 	leaq	1(%rdi),%rdi
   1985 	movb	%al,-16(%rsi)
   1986 	movb	%cl,0(%rsi)
   1987 	leaq	1(%rsi),%rsi
   1988 	subq	$1,%rdx
   1989 	jnz	L$xts_enc_steal
   1990 
   1991 	subq	%r9,%rsi
   1992 	movq	%r11,%rcx
   1993 	movl	%r10d,%eax
   1994 
   1995 	movups	-16(%rsi),%xmm2
   1996 	xorps	%xmm10,%xmm2
   1997 	movups	(%rcx),%xmm0
   1998 	movups	16(%rcx),%xmm1
   1999 	leaq	32(%rcx),%rcx
   2000 	xorps	%xmm0,%xmm2
   2001 L$oop_enc1_10:
   2002 .byte	102,15,56,220,209
   2003 	decl	%eax
   2004 	movups	(%rcx),%xmm1
   2005 	leaq	16(%rcx),%rcx
   2006 	jnz	L$oop_enc1_10
   2007 .byte	102,15,56,221,209
   2008 	xorps	%xmm10,%xmm2
   2009 	movups	%xmm2,-16(%rsi)
   2010 
   2011 L$xts_enc_ret:
   2012 	xorps	%xmm0,%xmm0
   2013 	pxor	%xmm1,%xmm1
   2014 	pxor	%xmm2,%xmm2
   2015 	pxor	%xmm3,%xmm3
   2016 	pxor	%xmm4,%xmm4
   2017 	pxor	%xmm5,%xmm5
   2018 	pxor	%xmm6,%xmm6
   2019 	pxor	%xmm7,%xmm7
   2020 	movaps	%xmm0,0(%rsp)
   2021 	pxor	%xmm8,%xmm8
   2022 	movaps	%xmm0,16(%rsp)
   2023 	pxor	%xmm9,%xmm9
   2024 	movaps	%xmm0,32(%rsp)
   2025 	pxor	%xmm10,%xmm10
   2026 	movaps	%xmm0,48(%rsp)
   2027 	pxor	%xmm11,%xmm11
   2028 	movaps	%xmm0,64(%rsp)
   2029 	pxor	%xmm12,%xmm12
   2030 	movaps	%xmm0,80(%rsp)
   2031 	pxor	%xmm13,%xmm13
   2032 	movaps	%xmm0,96(%rsp)
   2033 	pxor	%xmm14,%xmm14
   2034 	pxor	%xmm15,%xmm15
   2035 	leaq	(%rbp),%rsp
   2036 	popq	%rbp
   2037 L$xts_enc_epilogue:
   2038 	.byte	0xf3,0xc3
   2039 
   2040 .globl	_aesni_xts_decrypt
   2041 .private_extern _aesni_xts_decrypt
   2042 
   2043 .p2align	4
   2044 _aesni_xts_decrypt:
   2045 	leaq	(%rsp),%rax
   2046 	pushq	%rbp
   2047 	subq	$112,%rsp
   2048 	andq	$-16,%rsp
   2049 	leaq	-8(%rax),%rbp
   2050 	movups	(%r9),%xmm2
   2051 	movl	240(%r8),%eax
   2052 	movl	240(%rcx),%r10d
   2053 	movups	(%r8),%xmm0
   2054 	movups	16(%r8),%xmm1
   2055 	leaq	32(%r8),%r8
   2056 	xorps	%xmm0,%xmm2
   2057 L$oop_enc1_11:
   2058 .byte	102,15,56,220,209
   2059 	decl	%eax
   2060 	movups	(%r8),%xmm1
   2061 	leaq	16(%r8),%r8
   2062 	jnz	L$oop_enc1_11
   2063 .byte	102,15,56,221,209
   2064 	xorl	%eax,%eax
   2065 	testq	$15,%rdx
   2066 	setnz	%al
   2067 	shlq	$4,%rax
   2068 	subq	%rax,%rdx
   2069 
   2070 	movups	(%rcx),%xmm0
   2071 	movq	%rcx,%r11
   2072 	movl	%r10d,%eax
   2073 	shll	$4,%r10d
   2074 	movq	%rdx,%r9
   2075 	andq	$-16,%rdx
   2076 
   2077 	movups	16(%rcx,%r10,1),%xmm1
   2078 
   2079 	movdqa	L$xts_magic(%rip),%xmm8
   2080 	movdqa	%xmm2,%xmm15
   2081 	pshufd	$95,%xmm2,%xmm9
   2082 	pxor	%xmm0,%xmm1
   2083 	movdqa	%xmm9,%xmm14
   2084 	paddd	%xmm9,%xmm9
   2085 	movdqa	%xmm15,%xmm10
   2086 	psrad	$31,%xmm14
   2087 	paddq	%xmm15,%xmm15
   2088 	pand	%xmm8,%xmm14
   2089 	pxor	%xmm0,%xmm10
   2090 	pxor	%xmm14,%xmm15
   2091 	movdqa	%xmm9,%xmm14
   2092 	paddd	%xmm9,%xmm9
   2093 	movdqa	%xmm15,%xmm11
   2094 	psrad	$31,%xmm14
   2095 	paddq	%xmm15,%xmm15
   2096 	pand	%xmm8,%xmm14
   2097 	pxor	%xmm0,%xmm11
   2098 	pxor	%xmm14,%xmm15
   2099 	movdqa	%xmm9,%xmm14
   2100 	paddd	%xmm9,%xmm9
   2101 	movdqa	%xmm15,%xmm12
   2102 	psrad	$31,%xmm14
   2103 	paddq	%xmm15,%xmm15
   2104 	pand	%xmm8,%xmm14
   2105 	pxor	%xmm0,%xmm12
   2106 	pxor	%xmm14,%xmm15
   2107 	movdqa	%xmm9,%xmm14
   2108 	paddd	%xmm9,%xmm9
   2109 	movdqa	%xmm15,%xmm13
   2110 	psrad	$31,%xmm14
   2111 	paddq	%xmm15,%xmm15
   2112 	pand	%xmm8,%xmm14
   2113 	pxor	%xmm0,%xmm13
   2114 	pxor	%xmm14,%xmm15
   2115 	movdqa	%xmm15,%xmm14
   2116 	psrad	$31,%xmm9
   2117 	paddq	%xmm15,%xmm15
   2118 	pand	%xmm8,%xmm9
   2119 	pxor	%xmm0,%xmm14
   2120 	pxor	%xmm9,%xmm15
   2121 	movaps	%xmm1,96(%rsp)
   2122 
   2123 	subq	$96,%rdx
   2124 	jc	L$xts_dec_short
   2125 
   2126 	movl	$16+96,%eax
   2127 	leaq	32(%r11,%r10,1),%rcx
   2128 	subq	%r10,%rax
   2129 	movups	16(%r11),%xmm1
   2130 	movq	%rax,%r10
   2131 	leaq	L$xts_magic(%rip),%r8
   2132 	jmp	L$xts_dec_grandloop
   2133 
   2134 .p2align	5
   2135 L$xts_dec_grandloop:
   2136 	movdqu	0(%rdi),%xmm2
   2137 	movdqa	%xmm0,%xmm8
   2138 	movdqu	16(%rdi),%xmm3
   2139 	pxor	%xmm10,%xmm2
   2140 	movdqu	32(%rdi),%xmm4
   2141 	pxor	%xmm11,%xmm3
   2142 .byte	102,15,56,222,209
   2143 	movdqu	48(%rdi),%xmm5
   2144 	pxor	%xmm12,%xmm4
   2145 .byte	102,15,56,222,217
   2146 	movdqu	64(%rdi),%xmm6
   2147 	pxor	%xmm13,%xmm5
   2148 .byte	102,15,56,222,225
   2149 	movdqu	80(%rdi),%xmm7
   2150 	pxor	%xmm15,%xmm8
   2151 	movdqa	96(%rsp),%xmm9
   2152 	pxor	%xmm14,%xmm6
   2153 .byte	102,15,56,222,233
   2154 	movups	32(%r11),%xmm0
   2155 	leaq	96(%rdi),%rdi
   2156 	pxor	%xmm8,%xmm7
   2157 
   2158 	pxor	%xmm9,%xmm10
   2159 .byte	102,15,56,222,241
   2160 	pxor	%xmm9,%xmm11
   2161 	movdqa	%xmm10,0(%rsp)
   2162 .byte	102,15,56,222,249
   2163 	movups	48(%r11),%xmm1
   2164 	pxor	%xmm9,%xmm12
   2165 
   2166 .byte	102,15,56,222,208
   2167 	pxor	%xmm9,%xmm13
   2168 	movdqa	%xmm11,16(%rsp)
   2169 .byte	102,15,56,222,216
   2170 	pxor	%xmm9,%xmm14
   2171 	movdqa	%xmm12,32(%rsp)
   2172 .byte	102,15,56,222,224
   2173 .byte	102,15,56,222,232
   2174 	pxor	%xmm9,%xmm8
   2175 	movdqa	%xmm14,64(%rsp)
   2176 .byte	102,15,56,222,240
   2177 .byte	102,15,56,222,248
   2178 	movups	64(%r11),%xmm0
   2179 	movdqa	%xmm8,80(%rsp)
   2180 	pshufd	$95,%xmm15,%xmm9
   2181 	jmp	L$xts_dec_loop6
   2182 .p2align	5
   2183 L$xts_dec_loop6:
   2184 .byte	102,15,56,222,209
   2185 .byte	102,15,56,222,217
   2186 .byte	102,15,56,222,225
   2187 .byte	102,15,56,222,233
   2188 .byte	102,15,56,222,241
   2189 .byte	102,15,56,222,249
   2190 	movups	-64(%rcx,%rax,1),%xmm1
   2191 	addq	$32,%rax
   2192 
   2193 .byte	102,15,56,222,208
   2194 .byte	102,15,56,222,216
   2195 .byte	102,15,56,222,224
   2196 .byte	102,15,56,222,232
   2197 .byte	102,15,56,222,240
   2198 .byte	102,15,56,222,248
   2199 	movups	-80(%rcx,%rax,1),%xmm0
   2200 	jnz	L$xts_dec_loop6
   2201 
   2202 	movdqa	(%r8),%xmm8
   2203 	movdqa	%xmm9,%xmm14
   2204 	paddd	%xmm9,%xmm9
   2205 .byte	102,15,56,222,209
   2206 	paddq	%xmm15,%xmm15
   2207 	psrad	$31,%xmm14
   2208 .byte	102,15,56,222,217
   2209 	pand	%xmm8,%xmm14
   2210 	movups	(%r11),%xmm10
   2211 .byte	102,15,56,222,225
   2212 .byte	102,15,56,222,233
   2213 .byte	102,15,56,222,241
   2214 	pxor	%xmm14,%xmm15
   2215 	movaps	%xmm10,%xmm11
   2216 .byte	102,15,56,222,249
   2217 	movups	-64(%rcx),%xmm1
   2218 
   2219 	movdqa	%xmm9,%xmm14
   2220 .byte	102,15,56,222,208
   2221 	paddd	%xmm9,%xmm9
   2222 	pxor	%xmm15,%xmm10
   2223 .byte	102,15,56,222,216
   2224 	psrad	$31,%xmm14
   2225 	paddq	%xmm15,%xmm15
   2226 .byte	102,15,56,222,224
   2227 .byte	102,15,56,222,232
   2228 	pand	%xmm8,%xmm14
   2229 	movaps	%xmm11,%xmm12
   2230 .byte	102,15,56,222,240
   2231 	pxor	%xmm14,%xmm15
   2232 	movdqa	%xmm9,%xmm14
   2233 .byte	102,15,56,222,248
   2234 	movups	-48(%rcx),%xmm0
   2235 
   2236 	paddd	%xmm9,%xmm9
   2237 .byte	102,15,56,222,209
   2238 	pxor	%xmm15,%xmm11
   2239 	psrad	$31,%xmm14
   2240 .byte	102,15,56,222,217
   2241 	paddq	%xmm15,%xmm15
   2242 	pand	%xmm8,%xmm14
   2243 .byte	102,15,56,222,225
   2244 .byte	102,15,56,222,233
   2245 	movdqa	%xmm13,48(%rsp)
   2246 	pxor	%xmm14,%xmm15
   2247 .byte	102,15,56,222,241
   2248 	movaps	%xmm12,%xmm13
   2249 	movdqa	%xmm9,%xmm14
   2250 .byte	102,15,56,222,249
   2251 	movups	-32(%rcx),%xmm1
   2252 
   2253 	paddd	%xmm9,%xmm9
   2254 .byte	102,15,56,222,208
   2255 	pxor	%xmm15,%xmm12
   2256 	psrad	$31,%xmm14
   2257 .byte	102,15,56,222,216
   2258 	paddq	%xmm15,%xmm15
   2259 	pand	%xmm8,%xmm14
   2260 .byte	102,15,56,222,224
   2261 .byte	102,15,56,222,232
   2262 .byte	102,15,56,222,240
   2263 	pxor	%xmm14,%xmm15
   2264 	movaps	%xmm13,%xmm14
   2265 .byte	102,15,56,222,248
   2266 
   2267 	movdqa	%xmm9,%xmm0
   2268 	paddd	%xmm9,%xmm9
   2269 .byte	102,15,56,222,209
   2270 	pxor	%xmm15,%xmm13
   2271 	psrad	$31,%xmm0
   2272 .byte	102,15,56,222,217
   2273 	paddq	%xmm15,%xmm15
   2274 	pand	%xmm8,%xmm0
   2275 .byte	102,15,56,222,225
   2276 .byte	102,15,56,222,233
   2277 	pxor	%xmm0,%xmm15
   2278 	movups	(%r11),%xmm0
   2279 .byte	102,15,56,222,241
   2280 .byte	102,15,56,222,249
   2281 	movups	16(%r11),%xmm1
   2282 
   2283 	pxor	%xmm15,%xmm14
   2284 .byte	102,15,56,223,84,36,0
   2285 	psrad	$31,%xmm9
   2286 	paddq	%xmm15,%xmm15
   2287 .byte	102,15,56,223,92,36,16
   2288 .byte	102,15,56,223,100,36,32
   2289 	pand	%xmm8,%xmm9
   2290 	movq	%r10,%rax
   2291 .byte	102,15,56,223,108,36,48
   2292 .byte	102,15,56,223,116,36,64
   2293 .byte	102,15,56,223,124,36,80
   2294 	pxor	%xmm9,%xmm15
   2295 
   2296 	leaq	96(%rsi),%rsi
   2297 	movups	%xmm2,-96(%rsi)
   2298 	movups	%xmm3,-80(%rsi)
   2299 	movups	%xmm4,-64(%rsi)
   2300 	movups	%xmm5,-48(%rsi)
   2301 	movups	%xmm6,-32(%rsi)
   2302 	movups	%xmm7,-16(%rsi)
   2303 	subq	$96,%rdx
   2304 	jnc	L$xts_dec_grandloop
   2305 
   2306 	movl	$16+96,%eax
   2307 	subl	%r10d,%eax
   2308 	movq	%r11,%rcx
   2309 	shrl	$4,%eax
   2310 
   2311 L$xts_dec_short:
   2312 
   2313 	movl	%eax,%r10d
   2314 	pxor	%xmm0,%xmm10
   2315 	pxor	%xmm0,%xmm11
   2316 	addq	$96,%rdx
   2317 	jz	L$xts_dec_done
   2318 
   2319 	pxor	%xmm0,%xmm12
   2320 	cmpq	$32,%rdx
   2321 	jb	L$xts_dec_one
   2322 	pxor	%xmm0,%xmm13
   2323 	je	L$xts_dec_two
   2324 
   2325 	pxor	%xmm0,%xmm14
   2326 	cmpq	$64,%rdx
   2327 	jb	L$xts_dec_three
   2328 	je	L$xts_dec_four
   2329 
   2330 	movdqu	(%rdi),%xmm2
   2331 	movdqu	16(%rdi),%xmm3
   2332 	movdqu	32(%rdi),%xmm4
   2333 	pxor	%xmm10,%xmm2
   2334 	movdqu	48(%rdi),%xmm5
   2335 	pxor	%xmm11,%xmm3
   2336 	movdqu	64(%rdi),%xmm6
   2337 	leaq	80(%rdi),%rdi
   2338 	pxor	%xmm12,%xmm4
   2339 	pxor	%xmm13,%xmm5
   2340 	pxor	%xmm14,%xmm6
   2341 
   2342 	call	_aesni_decrypt6
   2343 
   2344 	xorps	%xmm10,%xmm2
   2345 	xorps	%xmm11,%xmm3
   2346 	xorps	%xmm12,%xmm4
   2347 	movdqu	%xmm2,(%rsi)
   2348 	xorps	%xmm13,%xmm5
   2349 	movdqu	%xmm3,16(%rsi)
   2350 	xorps	%xmm14,%xmm6
   2351 	movdqu	%xmm4,32(%rsi)
   2352 	pxor	%xmm14,%xmm14
   2353 	movdqu	%xmm5,48(%rsi)
   2354 	pcmpgtd	%xmm15,%xmm14
   2355 	movdqu	%xmm6,64(%rsi)
   2356 	leaq	80(%rsi),%rsi
   2357 	pshufd	$19,%xmm14,%xmm11
   2358 	andq	$15,%r9
   2359 	jz	L$xts_dec_ret
   2360 
   2361 	movdqa	%xmm15,%xmm10
   2362 	paddq	%xmm15,%xmm15
   2363 	pand	%xmm8,%xmm11
   2364 	pxor	%xmm15,%xmm11
   2365 	jmp	L$xts_dec_done2
   2366 
   2367 .p2align	4
   2368 L$xts_dec_one:
   2369 	movups	(%rdi),%xmm2
   2370 	leaq	16(%rdi),%rdi
   2371 	xorps	%xmm10,%xmm2
   2372 	movups	(%rcx),%xmm0
   2373 	movups	16(%rcx),%xmm1
   2374 	leaq	32(%rcx),%rcx
   2375 	xorps	%xmm0,%xmm2
   2376 L$oop_dec1_12:
   2377 .byte	102,15,56,222,209
   2378 	decl	%eax
   2379 	movups	(%rcx),%xmm1
   2380 	leaq	16(%rcx),%rcx
   2381 	jnz	L$oop_dec1_12
   2382 .byte	102,15,56,223,209
   2383 	xorps	%xmm10,%xmm2
   2384 	movdqa	%xmm11,%xmm10
   2385 	movups	%xmm2,(%rsi)
   2386 	movdqa	%xmm12,%xmm11
   2387 	leaq	16(%rsi),%rsi
   2388 	jmp	L$xts_dec_done
   2389 
   2390 .p2align	4
   2391 L$xts_dec_two:
   2392 	movups	(%rdi),%xmm2
   2393 	movups	16(%rdi),%xmm3
   2394 	leaq	32(%rdi),%rdi
   2395 	xorps	%xmm10,%xmm2
   2396 	xorps	%xmm11,%xmm3
   2397 
   2398 	call	_aesni_decrypt2
   2399 
   2400 	xorps	%xmm10,%xmm2
   2401 	movdqa	%xmm12,%xmm10
   2402 	xorps	%xmm11,%xmm3
   2403 	movdqa	%xmm13,%xmm11
   2404 	movups	%xmm2,(%rsi)
   2405 	movups	%xmm3,16(%rsi)
   2406 	leaq	32(%rsi),%rsi
   2407 	jmp	L$xts_dec_done
   2408 
   2409 .p2align	4
   2410 L$xts_dec_three:
   2411 	movups	(%rdi),%xmm2
   2412 	movups	16(%rdi),%xmm3
   2413 	movups	32(%rdi),%xmm4
   2414 	leaq	48(%rdi),%rdi
   2415 	xorps	%xmm10,%xmm2
   2416 	xorps	%xmm11,%xmm3
   2417 	xorps	%xmm12,%xmm4
   2418 
   2419 	call	_aesni_decrypt3
   2420 
   2421 	xorps	%xmm10,%xmm2
   2422 	movdqa	%xmm13,%xmm10
   2423 	xorps	%xmm11,%xmm3
   2424 	movdqa	%xmm14,%xmm11
   2425 	xorps	%xmm12,%xmm4
   2426 	movups	%xmm2,(%rsi)
   2427 	movups	%xmm3,16(%rsi)
   2428 	movups	%xmm4,32(%rsi)
   2429 	leaq	48(%rsi),%rsi
   2430 	jmp	L$xts_dec_done
   2431 
   2432 .p2align	4
   2433 L$xts_dec_four:
   2434 	movups	(%rdi),%xmm2
   2435 	movups	16(%rdi),%xmm3
   2436 	movups	32(%rdi),%xmm4
   2437 	xorps	%xmm10,%xmm2
   2438 	movups	48(%rdi),%xmm5
   2439 	leaq	64(%rdi),%rdi
   2440 	xorps	%xmm11,%xmm3
   2441 	xorps	%xmm12,%xmm4
   2442 	xorps	%xmm13,%xmm5
   2443 
   2444 	call	_aesni_decrypt4
   2445 
   2446 	pxor	%xmm10,%xmm2
   2447 	movdqa	%xmm14,%xmm10
   2448 	pxor	%xmm11,%xmm3
   2449 	movdqa	%xmm15,%xmm11
   2450 	pxor	%xmm12,%xmm4
   2451 	movdqu	%xmm2,(%rsi)
   2452 	pxor	%xmm13,%xmm5
   2453 	movdqu	%xmm3,16(%rsi)
   2454 	movdqu	%xmm4,32(%rsi)
   2455 	movdqu	%xmm5,48(%rsi)
   2456 	leaq	64(%rsi),%rsi
   2457 	jmp	L$xts_dec_done
   2458 
   2459 .p2align	4
   2460 L$xts_dec_done:
   2461 	andq	$15,%r9
   2462 	jz	L$xts_dec_ret
   2463 L$xts_dec_done2:
   2464 	movq	%r9,%rdx
   2465 	movq	%r11,%rcx
   2466 	movl	%r10d,%eax
   2467 
   2468 	movups	(%rdi),%xmm2
   2469 	xorps	%xmm11,%xmm2
   2470 	movups	(%rcx),%xmm0
   2471 	movups	16(%rcx),%xmm1
   2472 	leaq	32(%rcx),%rcx
   2473 	xorps	%xmm0,%xmm2
   2474 L$oop_dec1_13:
   2475 .byte	102,15,56,222,209
   2476 	decl	%eax
   2477 	movups	(%rcx),%xmm1
   2478 	leaq	16(%rcx),%rcx
   2479 	jnz	L$oop_dec1_13
   2480 .byte	102,15,56,223,209
   2481 	xorps	%xmm11,%xmm2
   2482 	movups	%xmm2,(%rsi)
   2483 
   2484 L$xts_dec_steal:
   2485 	movzbl	16(%rdi),%eax
   2486 	movzbl	(%rsi),%ecx
   2487 	leaq	1(%rdi),%rdi
   2488 	movb	%al,(%rsi)
   2489 	movb	%cl,16(%rsi)
   2490 	leaq	1(%rsi),%rsi
   2491 	subq	$1,%rdx
   2492 	jnz	L$xts_dec_steal
   2493 
   2494 	subq	%r9,%rsi
   2495 	movq	%r11,%rcx
   2496 	movl	%r10d,%eax
   2497 
   2498 	movups	(%rsi),%xmm2
   2499 	xorps	%xmm10,%xmm2
   2500 	movups	(%rcx),%xmm0
   2501 	movups	16(%rcx),%xmm1
   2502 	leaq	32(%rcx),%rcx
   2503 	xorps	%xmm0,%xmm2
   2504 L$oop_dec1_14:
   2505 .byte	102,15,56,222,209
   2506 	decl	%eax
   2507 	movups	(%rcx),%xmm1
   2508 	leaq	16(%rcx),%rcx
   2509 	jnz	L$oop_dec1_14
   2510 .byte	102,15,56,223,209
   2511 	xorps	%xmm10,%xmm2
   2512 	movups	%xmm2,(%rsi)
   2513 
   2514 L$xts_dec_ret:
   2515 	xorps	%xmm0,%xmm0
   2516 	pxor	%xmm1,%xmm1
   2517 	pxor	%xmm2,%xmm2
   2518 	pxor	%xmm3,%xmm3
   2519 	pxor	%xmm4,%xmm4
   2520 	pxor	%xmm5,%xmm5
   2521 	pxor	%xmm6,%xmm6
   2522 	pxor	%xmm7,%xmm7
   2523 	movaps	%xmm0,0(%rsp)
   2524 	pxor	%xmm8,%xmm8
   2525 	movaps	%xmm0,16(%rsp)
   2526 	pxor	%xmm9,%xmm9
   2527 	movaps	%xmm0,32(%rsp)
   2528 	pxor	%xmm10,%xmm10
   2529 	movaps	%xmm0,48(%rsp)
   2530 	pxor	%xmm11,%xmm11
   2531 	movaps	%xmm0,64(%rsp)
   2532 	pxor	%xmm12,%xmm12
   2533 	movaps	%xmm0,80(%rsp)
   2534 	pxor	%xmm13,%xmm13
   2535 	movaps	%xmm0,96(%rsp)
   2536 	pxor	%xmm14,%xmm14
   2537 	pxor	%xmm15,%xmm15
   2538 	leaq	(%rbp),%rsp
   2539 	popq	%rbp
   2540 L$xts_dec_epilogue:
   2541 	.byte	0xf3,0xc3
   2542 
   2543 .globl	_aesni_cbc_encrypt
   2544 .private_extern _aesni_cbc_encrypt
   2545 
   2546 .p2align	4
   2547 _aesni_cbc_encrypt:
   2548 	testq	%rdx,%rdx
   2549 	jz	L$cbc_ret
   2550 
   2551 	movl	240(%rcx),%r10d
   2552 	movq	%rcx,%r11
   2553 	testl	%r9d,%r9d
   2554 	jz	L$cbc_decrypt
   2555 
   2556 	movups	(%r8),%xmm2
   2557 	movl	%r10d,%eax
   2558 	cmpq	$16,%rdx
   2559 	jb	L$cbc_enc_tail
   2560 	subq	$16,%rdx
   2561 	jmp	L$cbc_enc_loop
   2562 .p2align	4
   2563 L$cbc_enc_loop:
   2564 	movups	(%rdi),%xmm3
   2565 	leaq	16(%rdi),%rdi
   2566 
   2567 	movups	(%rcx),%xmm0
   2568 	movups	16(%rcx),%xmm1
   2569 	xorps	%xmm0,%xmm3
   2570 	leaq	32(%rcx),%rcx
   2571 	xorps	%xmm3,%xmm2
   2572 L$oop_enc1_15:
   2573 .byte	102,15,56,220,209
   2574 	decl	%eax
   2575 	movups	(%rcx),%xmm1
   2576 	leaq	16(%rcx),%rcx
   2577 	jnz	L$oop_enc1_15
   2578 .byte	102,15,56,221,209
   2579 	movl	%r10d,%eax
   2580 	movq	%r11,%rcx
   2581 	movups	%xmm2,0(%rsi)
   2582 	leaq	16(%rsi),%rsi
   2583 	subq	$16,%rdx
   2584 	jnc	L$cbc_enc_loop
   2585 	addq	$16,%rdx
   2586 	jnz	L$cbc_enc_tail
   2587 	pxor	%xmm0,%xmm0
   2588 	pxor	%xmm1,%xmm1
   2589 	movups	%xmm2,(%r8)
   2590 	pxor	%xmm2,%xmm2
   2591 	pxor	%xmm3,%xmm3
   2592 	jmp	L$cbc_ret
   2593 
   2594 L$cbc_enc_tail:
   2595 	movq	%rdx,%rcx
   2596 	xchgq	%rdi,%rsi
   2597 .long	0x9066A4F3
   2598 	movl	$16,%ecx
   2599 	subq	%rdx,%rcx
   2600 	xorl	%eax,%eax
   2601 .long	0x9066AAF3
   2602 	leaq	-16(%rdi),%rdi
   2603 	movl	%r10d,%eax
   2604 	movq	%rdi,%rsi
   2605 	movq	%r11,%rcx
   2606 	xorq	%rdx,%rdx
   2607 	jmp	L$cbc_enc_loop
   2608 
   2609 .p2align	4
   2610 L$cbc_decrypt:
   2611 	cmpq	$16,%rdx
   2612 	jne	L$cbc_decrypt_bulk
   2613 
   2614 
   2615 
   2616 	movdqu	(%rdi),%xmm2
   2617 	movdqu	(%r8),%xmm3
   2618 	movdqa	%xmm2,%xmm4
   2619 	movups	(%rcx),%xmm0
   2620 	movups	16(%rcx),%xmm1
   2621 	leaq	32(%rcx),%rcx
   2622 	xorps	%xmm0,%xmm2
   2623 L$oop_dec1_16:
   2624 .byte	102,15,56,222,209
   2625 	decl	%r10d
   2626 	movups	(%rcx),%xmm1
   2627 	leaq	16(%rcx),%rcx
   2628 	jnz	L$oop_dec1_16
   2629 .byte	102,15,56,223,209
   2630 	pxor	%xmm0,%xmm0
   2631 	pxor	%xmm1,%xmm1
   2632 	movdqu	%xmm4,(%r8)
   2633 	xorps	%xmm3,%xmm2
   2634 	pxor	%xmm3,%xmm3
   2635 	movups	%xmm2,(%rsi)
   2636 	pxor	%xmm2,%xmm2
   2637 	jmp	L$cbc_ret
   2638 .p2align	4
   2639 L$cbc_decrypt_bulk:
   2640 	leaq	(%rsp),%rax
   2641 	pushq	%rbp
   2642 	subq	$16,%rsp
   2643 	andq	$-16,%rsp
   2644 	leaq	-8(%rax),%rbp
   2645 	movups	(%r8),%xmm10
   2646 	movl	%r10d,%eax
   2647 	cmpq	$80,%rdx
   2648 	jbe	L$cbc_dec_tail
   2649 
   2650 	movups	(%rcx),%xmm0
   2651 	movdqu	0(%rdi),%xmm2
   2652 	movdqu	16(%rdi),%xmm3
   2653 	movdqa	%xmm2,%xmm11
   2654 	movdqu	32(%rdi),%xmm4
   2655 	movdqa	%xmm3,%xmm12
   2656 	movdqu	48(%rdi),%xmm5
   2657 	movdqa	%xmm4,%xmm13
   2658 	movdqu	64(%rdi),%xmm6
   2659 	movdqa	%xmm5,%xmm14
   2660 	movdqu	80(%rdi),%xmm7
   2661 	movdqa	%xmm6,%xmm15
   2662 	movl	_OPENSSL_ia32cap_P+4(%rip),%r9d
   2663 	cmpq	$112,%rdx
   2664 	jbe	L$cbc_dec_six_or_seven
   2665 
   2666 	andl	$71303168,%r9d
   2667 	subq	$80,%rdx
   2668 	cmpl	$4194304,%r9d
   2669 	je	L$cbc_dec_loop6_enter
   2670 	subq	$32,%rdx
   2671 	leaq	112(%rcx),%rcx
   2672 	jmp	L$cbc_dec_loop8_enter
   2673 .p2align	4
   2674 L$cbc_dec_loop8:
   2675 	movups	%xmm9,(%rsi)
   2676 	leaq	16(%rsi),%rsi
   2677 L$cbc_dec_loop8_enter:
   2678 	movdqu	96(%rdi),%xmm8
   2679 	pxor	%xmm0,%xmm2
   2680 	movdqu	112(%rdi),%xmm9
   2681 	pxor	%xmm0,%xmm3
   2682 	movups	16-112(%rcx),%xmm1
   2683 	pxor	%xmm0,%xmm4
   2684 	xorq	%r11,%r11
   2685 	cmpq	$112,%rdx
   2686 	pxor	%xmm0,%xmm5
   2687 	pxor	%xmm0,%xmm6
   2688 	pxor	%xmm0,%xmm7
   2689 	pxor	%xmm0,%xmm8
   2690 
   2691 .byte	102,15,56,222,209
   2692 	pxor	%xmm0,%xmm9
   2693 	movups	32-112(%rcx),%xmm0
   2694 .byte	102,15,56,222,217
   2695 .byte	102,15,56,222,225
   2696 .byte	102,15,56,222,233
   2697 .byte	102,15,56,222,241
   2698 .byte	102,15,56,222,249
   2699 .byte	102,68,15,56,222,193
   2700 	setnc	%r11b
   2701 	shlq	$7,%r11
   2702 .byte	102,68,15,56,222,201
   2703 	addq	%rdi,%r11
   2704 	movups	48-112(%rcx),%xmm1
   2705 .byte	102,15,56,222,208
   2706 .byte	102,15,56,222,216
   2707 .byte	102,15,56,222,224
   2708 .byte	102,15,56,222,232
   2709 .byte	102,15,56,222,240
   2710 .byte	102,15,56,222,248
   2711 .byte	102,68,15,56,222,192
   2712 .byte	102,68,15,56,222,200
   2713 	movups	64-112(%rcx),%xmm0
   2714 	nop
   2715 .byte	102,15,56,222,209
   2716 .byte	102,15,56,222,217
   2717 .byte	102,15,56,222,225
   2718 .byte	102,15,56,222,233
   2719 .byte	102,15,56,222,241
   2720 .byte	102,15,56,222,249
   2721 .byte	102,68,15,56,222,193
   2722 .byte	102,68,15,56,222,201
   2723 	movups	80-112(%rcx),%xmm1
   2724 	nop
   2725 .byte	102,15,56,222,208
   2726 .byte	102,15,56,222,216
   2727 .byte	102,15,56,222,224
   2728 .byte	102,15,56,222,232
   2729 .byte	102,15,56,222,240
   2730 .byte	102,15,56,222,248
   2731 .byte	102,68,15,56,222,192
   2732 .byte	102,68,15,56,222,200
   2733 	movups	96-112(%rcx),%xmm0
   2734 	nop
   2735 .byte	102,15,56,222,209
   2736 .byte	102,15,56,222,217
   2737 .byte	102,15,56,222,225
   2738 .byte	102,15,56,222,233
   2739 .byte	102,15,56,222,241
   2740 .byte	102,15,56,222,249
   2741 .byte	102,68,15,56,222,193
   2742 .byte	102,68,15,56,222,201
   2743 	movups	112-112(%rcx),%xmm1
   2744 	nop
   2745 .byte	102,15,56,222,208
   2746 .byte	102,15,56,222,216
   2747 .byte	102,15,56,222,224
   2748 .byte	102,15,56,222,232
   2749 .byte	102,15,56,222,240
   2750 .byte	102,15,56,222,248
   2751 .byte	102,68,15,56,222,192
   2752 .byte	102,68,15,56,222,200
   2753 	movups	128-112(%rcx),%xmm0
   2754 	nop
   2755 .byte	102,15,56,222,209
   2756 .byte	102,15,56,222,217
   2757 .byte	102,15,56,222,225
   2758 .byte	102,15,56,222,233
   2759 .byte	102,15,56,222,241
   2760 .byte	102,15,56,222,249
   2761 .byte	102,68,15,56,222,193
   2762 .byte	102,68,15,56,222,201
   2763 	movups	144-112(%rcx),%xmm1
   2764 	cmpl	$11,%eax
   2765 .byte	102,15,56,222,208
   2766 .byte	102,15,56,222,216
   2767 .byte	102,15,56,222,224
   2768 .byte	102,15,56,222,232
   2769 .byte	102,15,56,222,240
   2770 .byte	102,15,56,222,248
   2771 .byte	102,68,15,56,222,192
   2772 .byte	102,68,15,56,222,200
   2773 	movups	160-112(%rcx),%xmm0
   2774 	jb	L$cbc_dec_done
   2775 .byte	102,15,56,222,209
   2776 .byte	102,15,56,222,217
   2777 .byte	102,15,56,222,225
   2778 .byte	102,15,56,222,233
   2779 .byte	102,15,56,222,241
   2780 .byte	102,15,56,222,249
   2781 .byte	102,68,15,56,222,193
   2782 .byte	102,68,15,56,222,201
   2783 	movups	176-112(%rcx),%xmm1
   2784 	nop
   2785 .byte	102,15,56,222,208
   2786 .byte	102,15,56,222,216
   2787 .byte	102,15,56,222,224
   2788 .byte	102,15,56,222,232
   2789 .byte	102,15,56,222,240
   2790 .byte	102,15,56,222,248
   2791 .byte	102,68,15,56,222,192
   2792 .byte	102,68,15,56,222,200
   2793 	movups	192-112(%rcx),%xmm0
   2794 	je	L$cbc_dec_done
   2795 .byte	102,15,56,222,209
   2796 .byte	102,15,56,222,217
   2797 .byte	102,15,56,222,225
   2798 .byte	102,15,56,222,233
   2799 .byte	102,15,56,222,241
   2800 .byte	102,15,56,222,249
   2801 .byte	102,68,15,56,222,193
   2802 .byte	102,68,15,56,222,201
   2803 	movups	208-112(%rcx),%xmm1
   2804 	nop
   2805 .byte	102,15,56,222,208
   2806 .byte	102,15,56,222,216
   2807 .byte	102,15,56,222,224
   2808 .byte	102,15,56,222,232
   2809 .byte	102,15,56,222,240
   2810 .byte	102,15,56,222,248
   2811 .byte	102,68,15,56,222,192
   2812 .byte	102,68,15,56,222,200
   2813 	movups	224-112(%rcx),%xmm0
   2814 	jmp	L$cbc_dec_done
   2815 .p2align	4
   2816 L$cbc_dec_done:
   2817 .byte	102,15,56,222,209
   2818 .byte	102,15,56,222,217
   2819 	pxor	%xmm0,%xmm10
   2820 	pxor	%xmm0,%xmm11
   2821 .byte	102,15,56,222,225
   2822 .byte	102,15,56,222,233
   2823 	pxor	%xmm0,%xmm12
   2824 	pxor	%xmm0,%xmm13
   2825 .byte	102,15,56,222,241
   2826 .byte	102,15,56,222,249
   2827 	pxor	%xmm0,%xmm14
   2828 	pxor	%xmm0,%xmm15
   2829 .byte	102,68,15,56,222,193
   2830 .byte	102,68,15,56,222,201
   2831 	movdqu	80(%rdi),%xmm1
   2832 
   2833 .byte	102,65,15,56,223,210
   2834 	movdqu	96(%rdi),%xmm10
   2835 	pxor	%xmm0,%xmm1
   2836 .byte	102,65,15,56,223,219
   2837 	pxor	%xmm0,%xmm10
   2838 	movdqu	112(%rdi),%xmm0
   2839 .byte	102,65,15,56,223,228
   2840 	leaq	128(%rdi),%rdi
   2841 	movdqu	0(%r11),%xmm11
   2842 .byte	102,65,15,56,223,237
   2843 .byte	102,65,15,56,223,246
   2844 	movdqu	16(%r11),%xmm12
   2845 	movdqu	32(%r11),%xmm13
   2846 .byte	102,65,15,56,223,255
   2847 .byte	102,68,15,56,223,193
   2848 	movdqu	48(%r11),%xmm14
   2849 	movdqu	64(%r11),%xmm15
   2850 .byte	102,69,15,56,223,202
   2851 	movdqa	%xmm0,%xmm10
   2852 	movdqu	80(%r11),%xmm1
   2853 	movups	-112(%rcx),%xmm0
   2854 
   2855 	movups	%xmm2,(%rsi)
   2856 	movdqa	%xmm11,%xmm2
   2857 	movups	%xmm3,16(%rsi)
   2858 	movdqa	%xmm12,%xmm3
   2859 	movups	%xmm4,32(%rsi)
   2860 	movdqa	%xmm13,%xmm4
   2861 	movups	%xmm5,48(%rsi)
   2862 	movdqa	%xmm14,%xmm5
   2863 	movups	%xmm6,64(%rsi)
   2864 	movdqa	%xmm15,%xmm6
   2865 	movups	%xmm7,80(%rsi)
   2866 	movdqa	%xmm1,%xmm7
   2867 	movups	%xmm8,96(%rsi)
   2868 	leaq	112(%rsi),%rsi
   2869 
   2870 	subq	$128,%rdx
   2871 	ja	L$cbc_dec_loop8
   2872 
   2873 	movaps	%xmm9,%xmm2
   2874 	leaq	-112(%rcx),%rcx
   2875 	addq	$112,%rdx
   2876 	jle	L$cbc_dec_clear_tail_collected
   2877 	movups	%xmm9,(%rsi)
   2878 	leaq	16(%rsi),%rsi
   2879 	cmpq	$80,%rdx
   2880 	jbe	L$cbc_dec_tail
   2881 
   2882 	movaps	%xmm11,%xmm2
   2883 L$cbc_dec_six_or_seven:
   2884 	cmpq	$96,%rdx
   2885 	ja	L$cbc_dec_seven
   2886 
   2887 	movaps	%xmm7,%xmm8
   2888 	call	_aesni_decrypt6
   2889 	pxor	%xmm10,%xmm2
   2890 	movaps	%xmm8,%xmm10
   2891 	pxor	%xmm11,%xmm3
   2892 	movdqu	%xmm2,(%rsi)
   2893 	pxor	%xmm12,%xmm4
   2894 	movdqu	%xmm3,16(%rsi)
   2895 	pxor	%xmm3,%xmm3
   2896 	pxor	%xmm13,%xmm5
   2897 	movdqu	%xmm4,32(%rsi)
   2898 	pxor	%xmm4,%xmm4
   2899 	pxor	%xmm14,%xmm6
   2900 	movdqu	%xmm5,48(%rsi)
   2901 	pxor	%xmm5,%xmm5
   2902 	pxor	%xmm15,%xmm7
   2903 	movdqu	%xmm6,64(%rsi)
   2904 	pxor	%xmm6,%xmm6
   2905 	leaq	80(%rsi),%rsi
   2906 	movdqa	%xmm7,%xmm2
   2907 	pxor	%xmm7,%xmm7
   2908 	jmp	L$cbc_dec_tail_collected
   2909 
   2910 .p2align	4
   2911 L$cbc_dec_seven:
   2912 	movups	96(%rdi),%xmm8
   2913 	xorps	%xmm9,%xmm9
   2914 	call	_aesni_decrypt8
   2915 	movups	80(%rdi),%xmm9
   2916 	pxor	%xmm10,%xmm2
   2917 	movups	96(%rdi),%xmm10
   2918 	pxor	%xmm11,%xmm3
   2919 	movdqu	%xmm2,(%rsi)
   2920 	pxor	%xmm12,%xmm4
   2921 	movdqu	%xmm3,16(%rsi)
   2922 	pxor	%xmm3,%xmm3
   2923 	pxor	%xmm13,%xmm5
   2924 	movdqu	%xmm4,32(%rsi)
   2925 	pxor	%xmm4,%xmm4
   2926 	pxor	%xmm14,%xmm6
   2927 	movdqu	%xmm5,48(%rsi)
   2928 	pxor	%xmm5,%xmm5
   2929 	pxor	%xmm15,%xmm7
   2930 	movdqu	%xmm6,64(%rsi)
   2931 	pxor	%xmm6,%xmm6
   2932 	pxor	%xmm9,%xmm8
   2933 	movdqu	%xmm7,80(%rsi)
   2934 	pxor	%xmm7,%xmm7
   2935 	leaq	96(%rsi),%rsi
   2936 	movdqa	%xmm8,%xmm2
   2937 	pxor	%xmm8,%xmm8
   2938 	pxor	%xmm9,%xmm9
   2939 	jmp	L$cbc_dec_tail_collected
   2940 
   2941 .p2align	4
   2942 L$cbc_dec_loop6:
   2943 	movups	%xmm7,(%rsi)
   2944 	leaq	16(%rsi),%rsi
   2945 	movdqu	0(%rdi),%xmm2
   2946 	movdqu	16(%rdi),%xmm3
   2947 	movdqa	%xmm2,%xmm11
   2948 	movdqu	32(%rdi),%xmm4
   2949 	movdqa	%xmm3,%xmm12
   2950 	movdqu	48(%rdi),%xmm5
   2951 	movdqa	%xmm4,%xmm13
   2952 	movdqu	64(%rdi),%xmm6
   2953 	movdqa	%xmm5,%xmm14
   2954 	movdqu	80(%rdi),%xmm7
   2955 	movdqa	%xmm6,%xmm15
   2956 L$cbc_dec_loop6_enter:
   2957 	leaq	96(%rdi),%rdi
   2958 	movdqa	%xmm7,%xmm8
   2959 
   2960 	call	_aesni_decrypt6
   2961 
   2962 	pxor	%xmm10,%xmm2
   2963 	movdqa	%xmm8,%xmm10
   2964 	pxor	%xmm11,%xmm3
   2965 	movdqu	%xmm2,(%rsi)
   2966 	pxor	%xmm12,%xmm4
   2967 	movdqu	%xmm3,16(%rsi)
   2968 	pxor	%xmm13,%xmm5
   2969 	movdqu	%xmm4,32(%rsi)
   2970 	pxor	%xmm14,%xmm6
   2971 	movq	%r11,%rcx
   2972 	movdqu	%xmm5,48(%rsi)
   2973 	pxor	%xmm15,%xmm7
   2974 	movl	%r10d,%eax
   2975 	movdqu	%xmm6,64(%rsi)
   2976 	leaq	80(%rsi),%rsi
   2977 	subq	$96,%rdx
   2978 	ja	L$cbc_dec_loop6
   2979 
   2980 	movdqa	%xmm7,%xmm2
   2981 	addq	$80,%rdx
   2982 	jle	L$cbc_dec_clear_tail_collected
   2983 	movups	%xmm7,(%rsi)
   2984 	leaq	16(%rsi),%rsi
   2985 
   2986 L$cbc_dec_tail:
   2987 	movups	(%rdi),%xmm2
   2988 	subq	$16,%rdx
   2989 	jbe	L$cbc_dec_one
   2990 
   2991 	movups	16(%rdi),%xmm3
   2992 	movaps	%xmm2,%xmm11
   2993 	subq	$16,%rdx
   2994 	jbe	L$cbc_dec_two
   2995 
   2996 	movups	32(%rdi),%xmm4
   2997 	movaps	%xmm3,%xmm12
   2998 	subq	$16,%rdx
   2999 	jbe	L$cbc_dec_three
   3000 
   3001 	movups	48(%rdi),%xmm5
   3002 	movaps	%xmm4,%xmm13
   3003 	subq	$16,%rdx
   3004 	jbe	L$cbc_dec_four
   3005 
   3006 	movups	64(%rdi),%xmm6
   3007 	movaps	%xmm5,%xmm14
   3008 	movaps	%xmm6,%xmm15
   3009 	xorps	%xmm7,%xmm7
   3010 	call	_aesni_decrypt6
   3011 	pxor	%xmm10,%xmm2
   3012 	movaps	%xmm15,%xmm10
   3013 	pxor	%xmm11,%xmm3
   3014 	movdqu	%xmm2,(%rsi)
   3015 	pxor	%xmm12,%xmm4
   3016 	movdqu	%xmm3,16(%rsi)
   3017 	pxor	%xmm3,%xmm3
   3018 	pxor	%xmm13,%xmm5
   3019 	movdqu	%xmm4,32(%rsi)
   3020 	pxor	%xmm4,%xmm4
   3021 	pxor	%xmm14,%xmm6
   3022 	movdqu	%xmm5,48(%rsi)
   3023 	pxor	%xmm5,%xmm5
   3024 	leaq	64(%rsi),%rsi
   3025 	movdqa	%xmm6,%xmm2
   3026 	pxor	%xmm6,%xmm6
   3027 	pxor	%xmm7,%xmm7
   3028 	subq	$16,%rdx
   3029 	jmp	L$cbc_dec_tail_collected
   3030 
   3031 .p2align	4
   3032 L$cbc_dec_one:
   3033 	movaps	%xmm2,%xmm11
   3034 	movups	(%rcx),%xmm0
   3035 	movups	16(%rcx),%xmm1
   3036 	leaq	32(%rcx),%rcx
   3037 	xorps	%xmm0,%xmm2
   3038 L$oop_dec1_17:
   3039 .byte	102,15,56,222,209
   3040 	decl	%eax
   3041 	movups	(%rcx),%xmm1
   3042 	leaq	16(%rcx),%rcx
   3043 	jnz	L$oop_dec1_17
   3044 .byte	102,15,56,223,209
   3045 	xorps	%xmm10,%xmm2
   3046 	movaps	%xmm11,%xmm10
   3047 	jmp	L$cbc_dec_tail_collected
   3048 .p2align	4
   3049 L$cbc_dec_two:
   3050 	movaps	%xmm3,%xmm12
   3051 	call	_aesni_decrypt2
   3052 	pxor	%xmm10,%xmm2
   3053 	movaps	%xmm12,%xmm10
   3054 	pxor	%xmm11,%xmm3
   3055 	movdqu	%xmm2,(%rsi)
   3056 	movdqa	%xmm3,%xmm2
   3057 	pxor	%xmm3,%xmm3
   3058 	leaq	16(%rsi),%rsi
   3059 	jmp	L$cbc_dec_tail_collected
   3060 .p2align	4
   3061 L$cbc_dec_three:
   3062 	movaps	%xmm4,%xmm13
   3063 	call	_aesni_decrypt3
   3064 	pxor	%xmm10,%xmm2
   3065 	movaps	%xmm13,%xmm10
   3066 	pxor	%xmm11,%xmm3
   3067 	movdqu	%xmm2,(%rsi)
   3068 	pxor	%xmm12,%xmm4
   3069 	movdqu	%xmm3,16(%rsi)
   3070 	pxor	%xmm3,%xmm3
   3071 	movdqa	%xmm4,%xmm2
   3072 	pxor	%xmm4,%xmm4
   3073 	leaq	32(%rsi),%rsi
   3074 	jmp	L$cbc_dec_tail_collected
   3075 .p2align	4
   3076 L$cbc_dec_four:
   3077 	movaps	%xmm5,%xmm14
   3078 	call	_aesni_decrypt4
   3079 	pxor	%xmm10,%xmm2
   3080 	movaps	%xmm14,%xmm10
   3081 	pxor	%xmm11,%xmm3
   3082 	movdqu	%xmm2,(%rsi)
   3083 	pxor	%xmm12,%xmm4
   3084 	movdqu	%xmm3,16(%rsi)
   3085 	pxor	%xmm3,%xmm3
   3086 	pxor	%xmm13,%xmm5
   3087 	movdqu	%xmm4,32(%rsi)
   3088 	pxor	%xmm4,%xmm4
   3089 	movdqa	%xmm5,%xmm2
   3090 	pxor	%xmm5,%xmm5
   3091 	leaq	48(%rsi),%rsi
   3092 	jmp	L$cbc_dec_tail_collected
   3093 
   3094 .p2align	4
   3095 L$cbc_dec_clear_tail_collected:
   3096 	pxor	%xmm3,%xmm3
   3097 	pxor	%xmm4,%xmm4
   3098 	pxor	%xmm5,%xmm5
   3099 	pxor	%xmm6,%xmm6
   3100 	pxor	%xmm7,%xmm7
   3101 	pxor	%xmm8,%xmm8
   3102 	pxor	%xmm9,%xmm9
   3103 L$cbc_dec_tail_collected:
   3104 	movups	%xmm10,(%r8)
   3105 	andq	$15,%rdx
   3106 	jnz	L$cbc_dec_tail_partial
   3107 	movups	%xmm2,(%rsi)
   3108 	pxor	%xmm2,%xmm2
   3109 	jmp	L$cbc_dec_ret
   3110 .p2align	4
   3111 L$cbc_dec_tail_partial:
   3112 	movaps	%xmm2,(%rsp)
   3113 	pxor	%xmm2,%xmm2
   3114 	movq	$16,%rcx
   3115 	movq	%rsi,%rdi
   3116 	subq	%rdx,%rcx
   3117 	leaq	(%rsp),%rsi
   3118 .long	0x9066A4F3
   3119 	movdqa	%xmm2,(%rsp)
   3120 
   3121 L$cbc_dec_ret:
   3122 	xorps	%xmm0,%xmm0
   3123 	pxor	%xmm1,%xmm1
   3124 	leaq	(%rbp),%rsp
   3125 	popq	%rbp
   3126 L$cbc_ret:
   3127 	.byte	0xf3,0xc3
   3128 
   3129 .globl	_aesni_set_decrypt_key
   3130 .private_extern _aesni_set_decrypt_key
   3131 
   3132 .p2align	4
   3133 _aesni_set_decrypt_key:
   3134 .byte	0x48,0x83,0xEC,0x08
   3135 	call	__aesni_set_encrypt_key
   3136 	shll	$4,%esi
   3137 	testl	%eax,%eax
   3138 	jnz	L$dec_key_ret
   3139 	leaq	16(%rdx,%rsi,1),%rdi
   3140 
   3141 	movups	(%rdx),%xmm0
   3142 	movups	(%rdi),%xmm1
   3143 	movups	%xmm0,(%rdi)
   3144 	movups	%xmm1,(%rdx)
   3145 	leaq	16(%rdx),%rdx
   3146 	leaq	-16(%rdi),%rdi
   3147 
   3148 L$dec_key_inverse:
   3149 	movups	(%rdx),%xmm0
   3150 	movups	(%rdi),%xmm1
   3151 .byte	102,15,56,219,192
   3152 .byte	102,15,56,219,201
   3153 	leaq	16(%rdx),%rdx
   3154 	leaq	-16(%rdi),%rdi
   3155 	movups	%xmm0,16(%rdi)
   3156 	movups	%xmm1,-16(%rdx)
   3157 	cmpq	%rdx,%rdi
   3158 	ja	L$dec_key_inverse
   3159 
   3160 	movups	(%rdx),%xmm0
   3161 .byte	102,15,56,219,192
   3162 	pxor	%xmm1,%xmm1
   3163 	movups	%xmm0,(%rdi)
   3164 	pxor	%xmm0,%xmm0
   3165 L$dec_key_ret:
   3166 	addq	$8,%rsp
   3167 	.byte	0xf3,0xc3
   3168 L$SEH_end_set_decrypt_key:
   3169 
   3170 .globl	_aesni_set_encrypt_key
   3171 .private_extern _aesni_set_encrypt_key
   3172 
   3173 .p2align	4
   3174 _aesni_set_encrypt_key:
   3175 __aesni_set_encrypt_key:
   3176 .byte	0x48,0x83,0xEC,0x08
   3177 	movq	$-1,%rax
   3178 	testq	%rdi,%rdi
   3179 	jz	L$enc_key_ret
   3180 	testq	%rdx,%rdx
   3181 	jz	L$enc_key_ret
   3182 
   3183 	movl	$268437504,%r10d
   3184 	movups	(%rdi),%xmm0
   3185 	xorps	%xmm4,%xmm4
   3186 	andl	_OPENSSL_ia32cap_P+4(%rip),%r10d
   3187 	leaq	16(%rdx),%rax
   3188 	cmpl	$256,%esi
   3189 	je	L$14rounds
   3190 	cmpl	$192,%esi
   3191 	je	L$12rounds
   3192 	cmpl	$128,%esi
   3193 	jne	L$bad_keybits
   3194 
   3195 L$10rounds:
   3196 	movl	$9,%esi
   3197 	cmpl	$268435456,%r10d
   3198 	je	L$10rounds_alt
   3199 
   3200 	movups	%xmm0,(%rdx)
   3201 .byte	102,15,58,223,200,1
   3202 	call	L$key_expansion_128_cold
   3203 .byte	102,15,58,223,200,2
   3204 	call	L$key_expansion_128
   3205 .byte	102,15,58,223,200,4
   3206 	call	L$key_expansion_128
   3207 .byte	102,15,58,223,200,8
   3208 	call	L$key_expansion_128
   3209 .byte	102,15,58,223,200,16
   3210 	call	L$key_expansion_128
   3211 .byte	102,15,58,223,200,32
   3212 	call	L$key_expansion_128
   3213 .byte	102,15,58,223,200,64
   3214 	call	L$key_expansion_128
   3215 .byte	102,15,58,223,200,128
   3216 	call	L$key_expansion_128
   3217 .byte	102,15,58,223,200,27
   3218 	call	L$key_expansion_128
   3219 .byte	102,15,58,223,200,54
   3220 	call	L$key_expansion_128
   3221 	movups	%xmm0,(%rax)
   3222 	movl	%esi,80(%rax)
   3223 	xorl	%eax,%eax
   3224 	jmp	L$enc_key_ret
   3225 
   3226 .p2align	4
   3227 L$10rounds_alt:
   3228 	movdqa	L$key_rotate(%rip),%xmm5
   3229 	movl	$8,%r10d
   3230 	movdqa	L$key_rcon1(%rip),%xmm4
   3231 	movdqa	%xmm0,%xmm2
   3232 	movdqu	%xmm0,(%rdx)
   3233 	jmp	L$oop_key128
   3234 
   3235 .p2align	4
   3236 L$oop_key128:
   3237 .byte	102,15,56,0,197
   3238 .byte	102,15,56,221,196
   3239 	pslld	$1,%xmm4
   3240 	leaq	16(%rax),%rax
   3241 
   3242 	movdqa	%xmm2,%xmm3
   3243 	pslldq	$4,%xmm2
   3244 	pxor	%xmm2,%xmm3
   3245 	pslldq	$4,%xmm2
   3246 	pxor	%xmm2,%xmm3
   3247 	pslldq	$4,%xmm2
   3248 	pxor	%xmm3,%xmm2
   3249 
   3250 	pxor	%xmm2,%xmm0
   3251 	movdqu	%xmm0,-16(%rax)
   3252 	movdqa	%xmm0,%xmm2
   3253 
   3254 	decl	%r10d
   3255 	jnz	L$oop_key128
   3256 
   3257 	movdqa	L$key_rcon1b(%rip),%xmm4
   3258 
   3259 .byte	102,15,56,0,197
   3260 .byte	102,15,56,221,196
   3261 	pslld	$1,%xmm4
   3262 
   3263 	movdqa	%xmm2,%xmm3
   3264 	pslldq	$4,%xmm2
   3265 	pxor	%xmm2,%xmm3
   3266 	pslldq	$4,%xmm2
   3267 	pxor	%xmm2,%xmm3
   3268 	pslldq	$4,%xmm2
   3269 	pxor	%xmm3,%xmm2
   3270 
   3271 	pxor	%xmm2,%xmm0
   3272 	movdqu	%xmm0,(%rax)
   3273 
   3274 	movdqa	%xmm0,%xmm2
   3275 .byte	102,15,56,0,197
   3276 .byte	102,15,56,221,196
   3277 
   3278 	movdqa	%xmm2,%xmm3
   3279 	pslldq	$4,%xmm2
   3280 	pxor	%xmm2,%xmm3
   3281 	pslldq	$4,%xmm2
   3282 	pxor	%xmm2,%xmm3
   3283 	pslldq	$4,%xmm2
   3284 	pxor	%xmm3,%xmm2
   3285 
   3286 	pxor	%xmm2,%xmm0
   3287 	movdqu	%xmm0,16(%rax)
   3288 
   3289 	movl	%esi,96(%rax)
   3290 	xorl	%eax,%eax
   3291 	jmp	L$enc_key_ret
   3292 
   3293 .p2align	4
   3294 L$12rounds:
   3295 	movq	16(%rdi),%xmm2
   3296 	movl	$11,%esi
   3297 	cmpl	$268435456,%r10d
   3298 	je	L$12rounds_alt
   3299 
   3300 	movups	%xmm0,(%rdx)
   3301 .byte	102,15,58,223,202,1
   3302 	call	L$key_expansion_192a_cold
   3303 .byte	102,15,58,223,202,2
   3304 	call	L$key_expansion_192b
   3305 .byte	102,15,58,223,202,4
   3306 	call	L$key_expansion_192a
   3307 .byte	102,15,58,223,202,8
   3308 	call	L$key_expansion_192b
   3309 .byte	102,15,58,223,202,16
   3310 	call	L$key_expansion_192a
   3311 .byte	102,15,58,223,202,32
   3312 	call	L$key_expansion_192b
   3313 .byte	102,15,58,223,202,64
   3314 	call	L$key_expansion_192a
   3315 .byte	102,15,58,223,202,128
   3316 	call	L$key_expansion_192b
   3317 	movups	%xmm0,(%rax)
   3318 	movl	%esi,48(%rax)
   3319 	xorq	%rax,%rax
   3320 	jmp	L$enc_key_ret
   3321 
   3322 .p2align	4
   3323 L$12rounds_alt:
   3324 	movdqa	L$key_rotate192(%rip),%xmm5
   3325 	movdqa	L$key_rcon1(%rip),%xmm4
   3326 	movl	$8,%r10d
   3327 	movdqu	%xmm0,(%rdx)
   3328 	jmp	L$oop_key192
   3329 
   3330 .p2align	4
   3331 L$oop_key192:
   3332 	movq	%xmm2,0(%rax)
   3333 	movdqa	%xmm2,%xmm1
   3334 .byte	102,15,56,0,213
   3335 .byte	102,15,56,221,212
   3336 	pslld	$1,%xmm4
   3337 	leaq	24(%rax),%rax
   3338 
   3339 	movdqa	%xmm0,%xmm3
   3340 	pslldq	$4,%xmm0
   3341 	pxor	%xmm0,%xmm3
   3342 	pslldq	$4,%xmm0
   3343 	pxor	%xmm0,%xmm3
   3344 	pslldq	$4,%xmm0
   3345 	pxor	%xmm3,%xmm0
   3346 
   3347 	pshufd	$255,%xmm0,%xmm3
   3348 	pxor	%xmm1,%xmm3
   3349 	pslldq	$4,%xmm1
   3350 	pxor	%xmm1,%xmm3
   3351 
   3352 	pxor	%xmm2,%xmm0
   3353 	pxor	%xmm3,%xmm2
   3354 	movdqu	%xmm0,-16(%rax)
   3355 
   3356 	decl	%r10d
   3357 	jnz	L$oop_key192
   3358 
   3359 	movl	%esi,32(%rax)
   3360 	xorl	%eax,%eax
   3361 	jmp	L$enc_key_ret
   3362 
   3363 .p2align	4
   3364 L$14rounds:
   3365 	movups	16(%rdi),%xmm2
   3366 	movl	$13,%esi
   3367 	leaq	16(%rax),%rax
   3368 	cmpl	$268435456,%r10d
   3369 	je	L$14rounds_alt
   3370 
   3371 	movups	%xmm0,(%rdx)
   3372 	movups	%xmm2,16(%rdx)
   3373 .byte	102,15,58,223,202,1
   3374 	call	L$key_expansion_256a_cold
   3375 .byte	102,15,58,223,200,1
   3376 	call	L$key_expansion_256b
   3377 .byte	102,15,58,223,202,2
   3378 	call	L$key_expansion_256a
   3379 .byte	102,15,58,223,200,2
   3380 	call	L$key_expansion_256b
   3381 .byte	102,15,58,223,202,4
   3382 	call	L$key_expansion_256a
   3383 .byte	102,15,58,223,200,4
   3384 	call	L$key_expansion_256b
   3385 .byte	102,15,58,223,202,8
   3386 	call	L$key_expansion_256a
   3387 .byte	102,15,58,223,200,8
   3388 	call	L$key_expansion_256b
   3389 .byte	102,15,58,223,202,16
   3390 	call	L$key_expansion_256a
   3391 .byte	102,15,58,223,200,16
   3392 	call	L$key_expansion_256b
   3393 .byte	102,15,58,223,202,32
   3394 	call	L$key_expansion_256a
   3395 .byte	102,15,58,223,200,32
   3396 	call	L$key_expansion_256b
   3397 .byte	102,15,58,223,202,64
   3398 	call	L$key_expansion_256a
   3399 	movups	%xmm0,(%rax)
   3400 	movl	%esi,16(%rax)
   3401 	xorq	%rax,%rax
   3402 	jmp	L$enc_key_ret
   3403 
   3404 .p2align	4
   3405 L$14rounds_alt:
   3406 	movdqa	L$key_rotate(%rip),%xmm5
   3407 	movdqa	L$key_rcon1(%rip),%xmm4
   3408 	movl	$7,%r10d
   3409 	movdqu	%xmm0,0(%rdx)
   3410 	movdqa	%xmm2,%xmm1
   3411 	movdqu	%xmm2,16(%rdx)
   3412 	jmp	L$oop_key256
   3413 
   3414 .p2align	4
   3415 L$oop_key256:
   3416 .byte	102,15,56,0,213
   3417 .byte	102,15,56,221,212
   3418 
   3419 	movdqa	%xmm0,%xmm3
   3420 	pslldq	$4,%xmm0
   3421 	pxor	%xmm0,%xmm3
   3422 	pslldq	$4,%xmm0
   3423 	pxor	%xmm0,%xmm3
   3424 	pslldq	$4,%xmm0
   3425 	pxor	%xmm3,%xmm0
   3426 	pslld	$1,%xmm4
   3427 
   3428 	pxor	%xmm2,%xmm0
   3429 	movdqu	%xmm0,(%rax)
   3430 
   3431 	decl	%r10d
   3432 	jz	L$done_key256
   3433 
   3434 	pshufd	$255,%xmm0,%xmm2
   3435 	pxor	%xmm3,%xmm3
   3436 .byte	102,15,56,221,211
   3437 
   3438 	movdqa	%xmm1,%xmm3
   3439 	pslldq	$4,%xmm1
   3440 	pxor	%xmm1,%xmm3
   3441 	pslldq	$4,%xmm1
   3442 	pxor	%xmm1,%xmm3
   3443 	pslldq	$4,%xmm1
   3444 	pxor	%xmm3,%xmm1
   3445 
   3446 	pxor	%xmm1,%xmm2
   3447 	movdqu	%xmm2,16(%rax)
   3448 	leaq	32(%rax),%rax
   3449 	movdqa	%xmm2,%xmm1
   3450 
   3451 	jmp	L$oop_key256
   3452 
   3453 L$done_key256:
   3454 	movl	%esi,16(%rax)
   3455 	xorl	%eax,%eax
   3456 	jmp	L$enc_key_ret
   3457 
   3458 .p2align	4
   3459 L$bad_keybits:
   3460 	movq	$-2,%rax
   3461 L$enc_key_ret:
   3462 	pxor	%xmm0,%xmm0
   3463 	pxor	%xmm1,%xmm1
   3464 	pxor	%xmm2,%xmm2
   3465 	pxor	%xmm3,%xmm3
   3466 	pxor	%xmm4,%xmm4
   3467 	pxor	%xmm5,%xmm5
   3468 	addq	$8,%rsp
   3469 	.byte	0xf3,0xc3
   3470 L$SEH_end_set_encrypt_key:
   3471 
   3472 .p2align	4
   3473 L$key_expansion_128:
   3474 	movups	%xmm0,(%rax)
   3475 	leaq	16(%rax),%rax
   3476 L$key_expansion_128_cold:
   3477 	shufps	$16,%xmm0,%xmm4
   3478 	xorps	%xmm4,%xmm0
   3479 	shufps	$140,%xmm0,%xmm4
   3480 	xorps	%xmm4,%xmm0
   3481 	shufps	$255,%xmm1,%xmm1
   3482 	xorps	%xmm1,%xmm0
   3483 	.byte	0xf3,0xc3
   3484 
   3485 .p2align	4
   3486 L$key_expansion_192a:
   3487 	movups	%xmm0,(%rax)
   3488 	leaq	16(%rax),%rax
   3489 L$key_expansion_192a_cold:
   3490 	movaps	%xmm2,%xmm5
   3491 L$key_expansion_192b_warm:
   3492 	shufps	$16,%xmm0,%xmm4
   3493 	movdqa	%xmm2,%xmm3
   3494 	xorps	%xmm4,%xmm0
   3495 	shufps	$140,%xmm0,%xmm4
   3496 	pslldq	$4,%xmm3
   3497 	xorps	%xmm4,%xmm0
   3498 	pshufd	$85,%xmm1,%xmm1
   3499 	pxor	%xmm3,%xmm2
   3500 	pxor	%xmm1,%xmm0
   3501 	pshufd	$255,%xmm0,%xmm3
   3502 	pxor	%xmm3,%xmm2
   3503 	.byte	0xf3,0xc3
   3504 
   3505 .p2align	4
   3506 L$key_expansion_192b:
   3507 	movaps	%xmm0,%xmm3
   3508 	shufps	$68,%xmm0,%xmm5
   3509 	movups	%xmm5,(%rax)
   3510 	shufps	$78,%xmm2,%xmm3
   3511 	movups	%xmm3,16(%rax)
   3512 	leaq	32(%rax),%rax
   3513 	jmp	L$key_expansion_192b_warm
   3514 
   3515 .p2align	4
   3516 L$key_expansion_256a:
   3517 	movups	%xmm2,(%rax)
   3518 	leaq	16(%rax),%rax
   3519 L$key_expansion_256a_cold:
   3520 	shufps	$16,%xmm0,%xmm4
   3521 	xorps	%xmm4,%xmm0
   3522 	shufps	$140,%xmm0,%xmm4
   3523 	xorps	%xmm4,%xmm0
   3524 	shufps	$255,%xmm1,%xmm1
   3525 	xorps	%xmm1,%xmm0
   3526 	.byte	0xf3,0xc3
   3527 
   3528 .p2align	4
   3529 L$key_expansion_256b:
   3530 	movups	%xmm0,(%rax)
   3531 	leaq	16(%rax),%rax
   3532 
   3533 	shufps	$16,%xmm2,%xmm4
   3534 	xorps	%xmm4,%xmm2
   3535 	shufps	$140,%xmm2,%xmm4
   3536 	xorps	%xmm4,%xmm2
   3537 	shufps	$170,%xmm1,%xmm1
   3538 	xorps	%xmm1,%xmm2
   3539 	.byte	0xf3,0xc3
   3540 
   3541 
   3542 .p2align	6
   3543 L$bswap_mask:
   3544 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   3545 L$increment32:
   3546 .long	6,6,6,0
   3547 L$increment64:
   3548 .long	1,0,0,0
   3549 L$xts_magic:
   3550 .long	0x87,0,1,0
   3551 L$increment1:
   3552 .byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
   3553 L$key_rotate:
   3554 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
   3555 L$key_rotate192:
   3556 .long	0x04070605,0x04070605,0x04070605,0x04070605
   3557 L$key_rcon1:
   3558 .long	1,1,1,1
   3559 L$key_rcon1b:
   3560 .long	0x1b,0x1b,0x1b,0x1b
   3561 
   3562 .byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   3563 .p2align	6
   3564 #endif
   3565