Home | History | Annotate | Download | only in fipsmodule
      1 #if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
      2 .text
      3 
      4 .globl	_aesni_encrypt
      5 .private_extern _aesni_encrypt
      6 
      7 .p2align	4
      8 _aesni_encrypt:
      9 	movups	(%rdi),%xmm2
     10 	movl	240(%rdx),%eax
     11 	movups	(%rdx),%xmm0
     12 	movups	16(%rdx),%xmm1
     13 	leaq	32(%rdx),%rdx
     14 	xorps	%xmm0,%xmm2
     15 L$oop_enc1_1:
     16 .byte	102,15,56,220,209
     17 	decl	%eax
     18 	movups	(%rdx),%xmm1
     19 	leaq	16(%rdx),%rdx
     20 	jnz	L$oop_enc1_1
     21 .byte	102,15,56,221,209
     22 	pxor	%xmm0,%xmm0
     23 	pxor	%xmm1,%xmm1
     24 	movups	%xmm2,(%rsi)
     25 	pxor	%xmm2,%xmm2
     26 	.byte	0xf3,0xc3
     27 
     28 
     29 .globl	_aesni_decrypt
     30 .private_extern _aesni_decrypt
     31 
     32 .p2align	4
     33 _aesni_decrypt:
     34 	movups	(%rdi),%xmm2
     35 	movl	240(%rdx),%eax
     36 	movups	(%rdx),%xmm0
     37 	movups	16(%rdx),%xmm1
     38 	leaq	32(%rdx),%rdx
     39 	xorps	%xmm0,%xmm2
     40 L$oop_dec1_2:
     41 .byte	102,15,56,222,209
     42 	decl	%eax
     43 	movups	(%rdx),%xmm1
     44 	leaq	16(%rdx),%rdx
     45 	jnz	L$oop_dec1_2
     46 .byte	102,15,56,223,209
     47 	pxor	%xmm0,%xmm0
     48 	pxor	%xmm1,%xmm1
     49 	movups	%xmm2,(%rsi)
     50 	pxor	%xmm2,%xmm2
     51 	.byte	0xf3,0xc3
     52 
     53 
     54 .p2align	4
     55 _aesni_encrypt2:
     56 	movups	(%rcx),%xmm0
     57 	shll	$4,%eax
     58 	movups	16(%rcx),%xmm1
     59 	xorps	%xmm0,%xmm2
     60 	xorps	%xmm0,%xmm3
     61 	movups	32(%rcx),%xmm0
     62 	leaq	32(%rcx,%rax,1),%rcx
     63 	negq	%rax
     64 	addq	$16,%rax
     65 
     66 L$enc_loop2:
     67 .byte	102,15,56,220,209
     68 .byte	102,15,56,220,217
     69 	movups	(%rcx,%rax,1),%xmm1
     70 	addq	$32,%rax
     71 .byte	102,15,56,220,208
     72 .byte	102,15,56,220,216
     73 	movups	-16(%rcx,%rax,1),%xmm0
     74 	jnz	L$enc_loop2
     75 
     76 .byte	102,15,56,220,209
     77 .byte	102,15,56,220,217
     78 .byte	102,15,56,221,208
     79 .byte	102,15,56,221,216
     80 	.byte	0xf3,0xc3
     81 
     82 
     83 .p2align	4
     84 _aesni_decrypt2:
     85 	movups	(%rcx),%xmm0
     86 	shll	$4,%eax
     87 	movups	16(%rcx),%xmm1
     88 	xorps	%xmm0,%xmm2
     89 	xorps	%xmm0,%xmm3
     90 	movups	32(%rcx),%xmm0
     91 	leaq	32(%rcx,%rax,1),%rcx
     92 	negq	%rax
     93 	addq	$16,%rax
     94 
     95 L$dec_loop2:
     96 .byte	102,15,56,222,209
     97 .byte	102,15,56,222,217
     98 	movups	(%rcx,%rax,1),%xmm1
     99 	addq	$32,%rax
    100 .byte	102,15,56,222,208
    101 .byte	102,15,56,222,216
    102 	movups	-16(%rcx,%rax,1),%xmm0
    103 	jnz	L$dec_loop2
    104 
    105 .byte	102,15,56,222,209
    106 .byte	102,15,56,222,217
    107 .byte	102,15,56,223,208
    108 .byte	102,15,56,223,216
    109 	.byte	0xf3,0xc3
    110 
    111 
    112 .p2align	4
    113 _aesni_encrypt3:
    114 	movups	(%rcx),%xmm0
    115 	shll	$4,%eax
    116 	movups	16(%rcx),%xmm1
    117 	xorps	%xmm0,%xmm2
    118 	xorps	%xmm0,%xmm3
    119 	xorps	%xmm0,%xmm4
    120 	movups	32(%rcx),%xmm0
    121 	leaq	32(%rcx,%rax,1),%rcx
    122 	negq	%rax
    123 	addq	$16,%rax
    124 
    125 L$enc_loop3:
    126 .byte	102,15,56,220,209
    127 .byte	102,15,56,220,217
    128 .byte	102,15,56,220,225
    129 	movups	(%rcx,%rax,1),%xmm1
    130 	addq	$32,%rax
    131 .byte	102,15,56,220,208
    132 .byte	102,15,56,220,216
    133 .byte	102,15,56,220,224
    134 	movups	-16(%rcx,%rax,1),%xmm0
    135 	jnz	L$enc_loop3
    136 
    137 .byte	102,15,56,220,209
    138 .byte	102,15,56,220,217
    139 .byte	102,15,56,220,225
    140 .byte	102,15,56,221,208
    141 .byte	102,15,56,221,216
    142 .byte	102,15,56,221,224
    143 	.byte	0xf3,0xc3
    144 
    145 
    146 .p2align	4
    147 _aesni_decrypt3:
    148 	movups	(%rcx),%xmm0
    149 	shll	$4,%eax
    150 	movups	16(%rcx),%xmm1
    151 	xorps	%xmm0,%xmm2
    152 	xorps	%xmm0,%xmm3
    153 	xorps	%xmm0,%xmm4
    154 	movups	32(%rcx),%xmm0
    155 	leaq	32(%rcx,%rax,1),%rcx
    156 	negq	%rax
    157 	addq	$16,%rax
    158 
    159 L$dec_loop3:
    160 .byte	102,15,56,222,209
    161 .byte	102,15,56,222,217
    162 .byte	102,15,56,222,225
    163 	movups	(%rcx,%rax,1),%xmm1
    164 	addq	$32,%rax
    165 .byte	102,15,56,222,208
    166 .byte	102,15,56,222,216
    167 .byte	102,15,56,222,224
    168 	movups	-16(%rcx,%rax,1),%xmm0
    169 	jnz	L$dec_loop3
    170 
    171 .byte	102,15,56,222,209
    172 .byte	102,15,56,222,217
    173 .byte	102,15,56,222,225
    174 .byte	102,15,56,223,208
    175 .byte	102,15,56,223,216
    176 .byte	102,15,56,223,224
    177 	.byte	0xf3,0xc3
    178 
    179 
    180 .p2align	4
    181 _aesni_encrypt4:
    182 	movups	(%rcx),%xmm0
    183 	shll	$4,%eax
    184 	movups	16(%rcx),%xmm1
    185 	xorps	%xmm0,%xmm2
    186 	xorps	%xmm0,%xmm3
    187 	xorps	%xmm0,%xmm4
    188 	xorps	%xmm0,%xmm5
    189 	movups	32(%rcx),%xmm0
    190 	leaq	32(%rcx,%rax,1),%rcx
    191 	negq	%rax
    192 .byte	0x0f,0x1f,0x00
    193 	addq	$16,%rax
    194 
    195 L$enc_loop4:
    196 .byte	102,15,56,220,209
    197 .byte	102,15,56,220,217
    198 .byte	102,15,56,220,225
    199 .byte	102,15,56,220,233
    200 	movups	(%rcx,%rax,1),%xmm1
    201 	addq	$32,%rax
    202 .byte	102,15,56,220,208
    203 .byte	102,15,56,220,216
    204 .byte	102,15,56,220,224
    205 .byte	102,15,56,220,232
    206 	movups	-16(%rcx,%rax,1),%xmm0
    207 	jnz	L$enc_loop4
    208 
    209 .byte	102,15,56,220,209
    210 .byte	102,15,56,220,217
    211 .byte	102,15,56,220,225
    212 .byte	102,15,56,220,233
    213 .byte	102,15,56,221,208
    214 .byte	102,15,56,221,216
    215 .byte	102,15,56,221,224
    216 .byte	102,15,56,221,232
    217 	.byte	0xf3,0xc3
    218 
    219 
    220 .p2align	4
    221 _aesni_decrypt4:
    222 	movups	(%rcx),%xmm0
    223 	shll	$4,%eax
    224 	movups	16(%rcx),%xmm1
    225 	xorps	%xmm0,%xmm2
    226 	xorps	%xmm0,%xmm3
    227 	xorps	%xmm0,%xmm4
    228 	xorps	%xmm0,%xmm5
    229 	movups	32(%rcx),%xmm0
    230 	leaq	32(%rcx,%rax,1),%rcx
    231 	negq	%rax
    232 .byte	0x0f,0x1f,0x00
    233 	addq	$16,%rax
    234 
    235 L$dec_loop4:
    236 .byte	102,15,56,222,209
    237 .byte	102,15,56,222,217
    238 .byte	102,15,56,222,225
    239 .byte	102,15,56,222,233
    240 	movups	(%rcx,%rax,1),%xmm1
    241 	addq	$32,%rax
    242 .byte	102,15,56,222,208
    243 .byte	102,15,56,222,216
    244 .byte	102,15,56,222,224
    245 .byte	102,15,56,222,232
    246 	movups	-16(%rcx,%rax,1),%xmm0
    247 	jnz	L$dec_loop4
    248 
    249 .byte	102,15,56,222,209
    250 .byte	102,15,56,222,217
    251 .byte	102,15,56,222,225
    252 .byte	102,15,56,222,233
    253 .byte	102,15,56,223,208
    254 .byte	102,15,56,223,216
    255 .byte	102,15,56,223,224
    256 .byte	102,15,56,223,232
    257 	.byte	0xf3,0xc3
    258 
    259 
    260 .p2align	4
    261 _aesni_encrypt6:
    262 	movups	(%rcx),%xmm0
    263 	shll	$4,%eax
    264 	movups	16(%rcx),%xmm1
    265 	xorps	%xmm0,%xmm2
    266 	pxor	%xmm0,%xmm3
    267 	pxor	%xmm0,%xmm4
    268 .byte	102,15,56,220,209
    269 	leaq	32(%rcx,%rax,1),%rcx
    270 	negq	%rax
    271 .byte	102,15,56,220,217
    272 	pxor	%xmm0,%xmm5
    273 	pxor	%xmm0,%xmm6
    274 .byte	102,15,56,220,225
    275 	pxor	%xmm0,%xmm7
    276 	movups	(%rcx,%rax,1),%xmm0
    277 	addq	$16,%rax
    278 	jmp	L$enc_loop6_enter
    279 .p2align	4
    280 L$enc_loop6:
    281 .byte	102,15,56,220,209
    282 .byte	102,15,56,220,217
    283 .byte	102,15,56,220,225
    284 L$enc_loop6_enter:
    285 .byte	102,15,56,220,233
    286 .byte	102,15,56,220,241
    287 .byte	102,15,56,220,249
    288 	movups	(%rcx,%rax,1),%xmm1
    289 	addq	$32,%rax
    290 .byte	102,15,56,220,208
    291 .byte	102,15,56,220,216
    292 .byte	102,15,56,220,224
    293 .byte	102,15,56,220,232
    294 .byte	102,15,56,220,240
    295 .byte	102,15,56,220,248
    296 	movups	-16(%rcx,%rax,1),%xmm0
    297 	jnz	L$enc_loop6
    298 
    299 .byte	102,15,56,220,209
    300 .byte	102,15,56,220,217
    301 .byte	102,15,56,220,225
    302 .byte	102,15,56,220,233
    303 .byte	102,15,56,220,241
    304 .byte	102,15,56,220,249
    305 .byte	102,15,56,221,208
    306 .byte	102,15,56,221,216
    307 .byte	102,15,56,221,224
    308 .byte	102,15,56,221,232
    309 .byte	102,15,56,221,240
    310 .byte	102,15,56,221,248
    311 	.byte	0xf3,0xc3
    312 
    313 
    314 .p2align	4
    315 _aesni_decrypt6:
    316 	movups	(%rcx),%xmm0
    317 	shll	$4,%eax
    318 	movups	16(%rcx),%xmm1
    319 	xorps	%xmm0,%xmm2
    320 	pxor	%xmm0,%xmm3
    321 	pxor	%xmm0,%xmm4
    322 .byte	102,15,56,222,209
    323 	leaq	32(%rcx,%rax,1),%rcx
    324 	negq	%rax
    325 .byte	102,15,56,222,217
    326 	pxor	%xmm0,%xmm5
    327 	pxor	%xmm0,%xmm6
    328 .byte	102,15,56,222,225
    329 	pxor	%xmm0,%xmm7
    330 	movups	(%rcx,%rax,1),%xmm0
    331 	addq	$16,%rax
    332 	jmp	L$dec_loop6_enter
    333 .p2align	4
    334 L$dec_loop6:
    335 .byte	102,15,56,222,209
    336 .byte	102,15,56,222,217
    337 .byte	102,15,56,222,225
    338 L$dec_loop6_enter:
    339 .byte	102,15,56,222,233
    340 .byte	102,15,56,222,241
    341 .byte	102,15,56,222,249
    342 	movups	(%rcx,%rax,1),%xmm1
    343 	addq	$32,%rax
    344 .byte	102,15,56,222,208
    345 .byte	102,15,56,222,216
    346 .byte	102,15,56,222,224
    347 .byte	102,15,56,222,232
    348 .byte	102,15,56,222,240
    349 .byte	102,15,56,222,248
    350 	movups	-16(%rcx,%rax,1),%xmm0
    351 	jnz	L$dec_loop6
    352 
    353 .byte	102,15,56,222,209
    354 .byte	102,15,56,222,217
    355 .byte	102,15,56,222,225
    356 .byte	102,15,56,222,233
    357 .byte	102,15,56,222,241
    358 .byte	102,15,56,222,249
    359 .byte	102,15,56,223,208
    360 .byte	102,15,56,223,216
    361 .byte	102,15,56,223,224
    362 .byte	102,15,56,223,232
    363 .byte	102,15,56,223,240
    364 .byte	102,15,56,223,248
    365 	.byte	0xf3,0xc3
    366 
    367 
    368 .p2align	4
    369 _aesni_encrypt8:
    370 	movups	(%rcx),%xmm0
    371 	shll	$4,%eax
    372 	movups	16(%rcx),%xmm1
    373 	xorps	%xmm0,%xmm2
    374 	xorps	%xmm0,%xmm3
    375 	pxor	%xmm0,%xmm4
    376 	pxor	%xmm0,%xmm5
    377 	pxor	%xmm0,%xmm6
    378 	leaq	32(%rcx,%rax,1),%rcx
    379 	negq	%rax
    380 .byte	102,15,56,220,209
    381 	pxor	%xmm0,%xmm7
    382 	pxor	%xmm0,%xmm8
    383 .byte	102,15,56,220,217
    384 	pxor	%xmm0,%xmm9
    385 	movups	(%rcx,%rax,1),%xmm0
    386 	addq	$16,%rax
    387 	jmp	L$enc_loop8_inner
    388 .p2align	4
    389 L$enc_loop8:
    390 .byte	102,15,56,220,209
    391 .byte	102,15,56,220,217
    392 L$enc_loop8_inner:
    393 .byte	102,15,56,220,225
    394 .byte	102,15,56,220,233
    395 .byte	102,15,56,220,241
    396 .byte	102,15,56,220,249
    397 .byte	102,68,15,56,220,193
    398 .byte	102,68,15,56,220,201
    399 L$enc_loop8_enter:
    400 	movups	(%rcx,%rax,1),%xmm1
    401 	addq	$32,%rax
    402 .byte	102,15,56,220,208
    403 .byte	102,15,56,220,216
    404 .byte	102,15,56,220,224
    405 .byte	102,15,56,220,232
    406 .byte	102,15,56,220,240
    407 .byte	102,15,56,220,248
    408 .byte	102,68,15,56,220,192
    409 .byte	102,68,15,56,220,200
    410 	movups	-16(%rcx,%rax,1),%xmm0
    411 	jnz	L$enc_loop8
    412 
    413 .byte	102,15,56,220,209
    414 .byte	102,15,56,220,217
    415 .byte	102,15,56,220,225
    416 .byte	102,15,56,220,233
    417 .byte	102,15,56,220,241
    418 .byte	102,15,56,220,249
    419 .byte	102,68,15,56,220,193
    420 .byte	102,68,15,56,220,201
    421 .byte	102,15,56,221,208
    422 .byte	102,15,56,221,216
    423 .byte	102,15,56,221,224
    424 .byte	102,15,56,221,232
    425 .byte	102,15,56,221,240
    426 .byte	102,15,56,221,248
    427 .byte	102,68,15,56,221,192
    428 .byte	102,68,15,56,221,200
    429 	.byte	0xf3,0xc3
    430 
    431 
    432 .p2align	4
    433 _aesni_decrypt8:
    434 	movups	(%rcx),%xmm0
    435 	shll	$4,%eax
    436 	movups	16(%rcx),%xmm1
    437 	xorps	%xmm0,%xmm2
    438 	xorps	%xmm0,%xmm3
    439 	pxor	%xmm0,%xmm4
    440 	pxor	%xmm0,%xmm5
    441 	pxor	%xmm0,%xmm6
    442 	leaq	32(%rcx,%rax,1),%rcx
    443 	negq	%rax
    444 .byte	102,15,56,222,209
    445 	pxor	%xmm0,%xmm7
    446 	pxor	%xmm0,%xmm8
    447 .byte	102,15,56,222,217
    448 	pxor	%xmm0,%xmm9
    449 	movups	(%rcx,%rax,1),%xmm0
    450 	addq	$16,%rax
    451 	jmp	L$dec_loop8_inner
    452 .p2align	4
    453 L$dec_loop8:
    454 .byte	102,15,56,222,209
    455 .byte	102,15,56,222,217
    456 L$dec_loop8_inner:
    457 .byte	102,15,56,222,225
    458 .byte	102,15,56,222,233
    459 .byte	102,15,56,222,241
    460 .byte	102,15,56,222,249
    461 .byte	102,68,15,56,222,193
    462 .byte	102,68,15,56,222,201
    463 L$dec_loop8_enter:
    464 	movups	(%rcx,%rax,1),%xmm1
    465 	addq	$32,%rax
    466 .byte	102,15,56,222,208
    467 .byte	102,15,56,222,216
    468 .byte	102,15,56,222,224
    469 .byte	102,15,56,222,232
    470 .byte	102,15,56,222,240
    471 .byte	102,15,56,222,248
    472 .byte	102,68,15,56,222,192
    473 .byte	102,68,15,56,222,200
    474 	movups	-16(%rcx,%rax,1),%xmm0
    475 	jnz	L$dec_loop8
    476 
    477 .byte	102,15,56,222,209
    478 .byte	102,15,56,222,217
    479 .byte	102,15,56,222,225
    480 .byte	102,15,56,222,233
    481 .byte	102,15,56,222,241
    482 .byte	102,15,56,222,249
    483 .byte	102,68,15,56,222,193
    484 .byte	102,68,15,56,222,201
    485 .byte	102,15,56,223,208
    486 .byte	102,15,56,223,216
    487 .byte	102,15,56,223,224
    488 .byte	102,15,56,223,232
    489 .byte	102,15,56,223,240
    490 .byte	102,15,56,223,248
    491 .byte	102,68,15,56,223,192
    492 .byte	102,68,15,56,223,200
    493 	.byte	0xf3,0xc3
    494 
    495 .globl	_aesni_ecb_encrypt
    496 .private_extern _aesni_ecb_encrypt
    497 
    498 .p2align	4
    499 _aesni_ecb_encrypt:
    500 	andq	$-16,%rdx
    501 	jz	L$ecb_ret
    502 
    503 	movl	240(%rcx),%eax
    504 	movups	(%rcx),%xmm0
    505 	movq	%rcx,%r11
    506 	movl	%eax,%r10d
    507 	testl	%r8d,%r8d
    508 	jz	L$ecb_decrypt
    509 
    510 	cmpq	$0x80,%rdx
    511 	jb	L$ecb_enc_tail
    512 
    513 	movdqu	(%rdi),%xmm2
    514 	movdqu	16(%rdi),%xmm3
    515 	movdqu	32(%rdi),%xmm4
    516 	movdqu	48(%rdi),%xmm5
    517 	movdqu	64(%rdi),%xmm6
    518 	movdqu	80(%rdi),%xmm7
    519 	movdqu	96(%rdi),%xmm8
    520 	movdqu	112(%rdi),%xmm9
    521 	leaq	128(%rdi),%rdi
    522 	subq	$0x80,%rdx
    523 	jmp	L$ecb_enc_loop8_enter
    524 .p2align	4
    525 L$ecb_enc_loop8:
    526 	movups	%xmm2,(%rsi)
    527 	movq	%r11,%rcx
    528 	movdqu	(%rdi),%xmm2
    529 	movl	%r10d,%eax
    530 	movups	%xmm3,16(%rsi)
    531 	movdqu	16(%rdi),%xmm3
    532 	movups	%xmm4,32(%rsi)
    533 	movdqu	32(%rdi),%xmm4
    534 	movups	%xmm5,48(%rsi)
    535 	movdqu	48(%rdi),%xmm5
    536 	movups	%xmm6,64(%rsi)
    537 	movdqu	64(%rdi),%xmm6
    538 	movups	%xmm7,80(%rsi)
    539 	movdqu	80(%rdi),%xmm7
    540 	movups	%xmm8,96(%rsi)
    541 	movdqu	96(%rdi),%xmm8
    542 	movups	%xmm9,112(%rsi)
    543 	leaq	128(%rsi),%rsi
    544 	movdqu	112(%rdi),%xmm9
    545 	leaq	128(%rdi),%rdi
    546 L$ecb_enc_loop8_enter:
    547 
    548 	call	_aesni_encrypt8
    549 
    550 	subq	$0x80,%rdx
    551 	jnc	L$ecb_enc_loop8
    552 
    553 	movups	%xmm2,(%rsi)
    554 	movq	%r11,%rcx
    555 	movups	%xmm3,16(%rsi)
    556 	movl	%r10d,%eax
    557 	movups	%xmm4,32(%rsi)
    558 	movups	%xmm5,48(%rsi)
    559 	movups	%xmm6,64(%rsi)
    560 	movups	%xmm7,80(%rsi)
    561 	movups	%xmm8,96(%rsi)
    562 	movups	%xmm9,112(%rsi)
    563 	leaq	128(%rsi),%rsi
    564 	addq	$0x80,%rdx
    565 	jz	L$ecb_ret
    566 
    567 L$ecb_enc_tail:
    568 	movups	(%rdi),%xmm2
    569 	cmpq	$0x20,%rdx
    570 	jb	L$ecb_enc_one
    571 	movups	16(%rdi),%xmm3
    572 	je	L$ecb_enc_two
    573 	movups	32(%rdi),%xmm4
    574 	cmpq	$0x40,%rdx
    575 	jb	L$ecb_enc_three
    576 	movups	48(%rdi),%xmm5
    577 	je	L$ecb_enc_four
    578 	movups	64(%rdi),%xmm6
    579 	cmpq	$0x60,%rdx
    580 	jb	L$ecb_enc_five
    581 	movups	80(%rdi),%xmm7
    582 	je	L$ecb_enc_six
    583 	movdqu	96(%rdi),%xmm8
    584 	xorps	%xmm9,%xmm9
    585 	call	_aesni_encrypt8
    586 	movups	%xmm2,(%rsi)
    587 	movups	%xmm3,16(%rsi)
    588 	movups	%xmm4,32(%rsi)
    589 	movups	%xmm5,48(%rsi)
    590 	movups	%xmm6,64(%rsi)
    591 	movups	%xmm7,80(%rsi)
    592 	movups	%xmm8,96(%rsi)
    593 	jmp	L$ecb_ret
    594 .p2align	4
    595 L$ecb_enc_one:
    596 	movups	(%rcx),%xmm0
    597 	movups	16(%rcx),%xmm1
    598 	leaq	32(%rcx),%rcx
    599 	xorps	%xmm0,%xmm2
    600 L$oop_enc1_3:
    601 .byte	102,15,56,220,209
    602 	decl	%eax
    603 	movups	(%rcx),%xmm1
    604 	leaq	16(%rcx),%rcx
    605 	jnz	L$oop_enc1_3
    606 .byte	102,15,56,221,209
    607 	movups	%xmm2,(%rsi)
    608 	jmp	L$ecb_ret
    609 .p2align	4
    610 L$ecb_enc_two:
    611 	call	_aesni_encrypt2
    612 	movups	%xmm2,(%rsi)
    613 	movups	%xmm3,16(%rsi)
    614 	jmp	L$ecb_ret
    615 .p2align	4
    616 L$ecb_enc_three:
    617 	call	_aesni_encrypt3
    618 	movups	%xmm2,(%rsi)
    619 	movups	%xmm3,16(%rsi)
    620 	movups	%xmm4,32(%rsi)
    621 	jmp	L$ecb_ret
    622 .p2align	4
    623 L$ecb_enc_four:
    624 	call	_aesni_encrypt4
    625 	movups	%xmm2,(%rsi)
    626 	movups	%xmm3,16(%rsi)
    627 	movups	%xmm4,32(%rsi)
    628 	movups	%xmm5,48(%rsi)
    629 	jmp	L$ecb_ret
    630 .p2align	4
    631 L$ecb_enc_five:
    632 	xorps	%xmm7,%xmm7
    633 	call	_aesni_encrypt6
    634 	movups	%xmm2,(%rsi)
    635 	movups	%xmm3,16(%rsi)
    636 	movups	%xmm4,32(%rsi)
    637 	movups	%xmm5,48(%rsi)
    638 	movups	%xmm6,64(%rsi)
    639 	jmp	L$ecb_ret
    640 .p2align	4
    641 L$ecb_enc_six:
    642 	call	_aesni_encrypt6
    643 	movups	%xmm2,(%rsi)
    644 	movups	%xmm3,16(%rsi)
    645 	movups	%xmm4,32(%rsi)
    646 	movups	%xmm5,48(%rsi)
    647 	movups	%xmm6,64(%rsi)
    648 	movups	%xmm7,80(%rsi)
    649 	jmp	L$ecb_ret
    650 
    651 .p2align	4
    652 L$ecb_decrypt:
    653 	cmpq	$0x80,%rdx
    654 	jb	L$ecb_dec_tail
    655 
    656 	movdqu	(%rdi),%xmm2
    657 	movdqu	16(%rdi),%xmm3
    658 	movdqu	32(%rdi),%xmm4
    659 	movdqu	48(%rdi),%xmm5
    660 	movdqu	64(%rdi),%xmm6
    661 	movdqu	80(%rdi),%xmm7
    662 	movdqu	96(%rdi),%xmm8
    663 	movdqu	112(%rdi),%xmm9
    664 	leaq	128(%rdi),%rdi
    665 	subq	$0x80,%rdx
    666 	jmp	L$ecb_dec_loop8_enter
    667 .p2align	4
    668 L$ecb_dec_loop8:
    669 	movups	%xmm2,(%rsi)
    670 	movq	%r11,%rcx
    671 	movdqu	(%rdi),%xmm2
    672 	movl	%r10d,%eax
    673 	movups	%xmm3,16(%rsi)
    674 	movdqu	16(%rdi),%xmm3
    675 	movups	%xmm4,32(%rsi)
    676 	movdqu	32(%rdi),%xmm4
    677 	movups	%xmm5,48(%rsi)
    678 	movdqu	48(%rdi),%xmm5
    679 	movups	%xmm6,64(%rsi)
    680 	movdqu	64(%rdi),%xmm6
    681 	movups	%xmm7,80(%rsi)
    682 	movdqu	80(%rdi),%xmm7
    683 	movups	%xmm8,96(%rsi)
    684 	movdqu	96(%rdi),%xmm8
    685 	movups	%xmm9,112(%rsi)
    686 	leaq	128(%rsi),%rsi
    687 	movdqu	112(%rdi),%xmm9
    688 	leaq	128(%rdi),%rdi
    689 L$ecb_dec_loop8_enter:
    690 
    691 	call	_aesni_decrypt8
    692 
    693 	movups	(%r11),%xmm0
    694 	subq	$0x80,%rdx
    695 	jnc	L$ecb_dec_loop8
    696 
    697 	movups	%xmm2,(%rsi)
    698 	pxor	%xmm2,%xmm2
    699 	movq	%r11,%rcx
    700 	movups	%xmm3,16(%rsi)
    701 	pxor	%xmm3,%xmm3
    702 	movl	%r10d,%eax
    703 	movups	%xmm4,32(%rsi)
    704 	pxor	%xmm4,%xmm4
    705 	movups	%xmm5,48(%rsi)
    706 	pxor	%xmm5,%xmm5
    707 	movups	%xmm6,64(%rsi)
    708 	pxor	%xmm6,%xmm6
    709 	movups	%xmm7,80(%rsi)
    710 	pxor	%xmm7,%xmm7
    711 	movups	%xmm8,96(%rsi)
    712 	pxor	%xmm8,%xmm8
    713 	movups	%xmm9,112(%rsi)
    714 	pxor	%xmm9,%xmm9
    715 	leaq	128(%rsi),%rsi
    716 	addq	$0x80,%rdx
    717 	jz	L$ecb_ret
    718 
    719 L$ecb_dec_tail:
    720 	movups	(%rdi),%xmm2
    721 	cmpq	$0x20,%rdx
    722 	jb	L$ecb_dec_one
    723 	movups	16(%rdi),%xmm3
    724 	je	L$ecb_dec_two
    725 	movups	32(%rdi),%xmm4
    726 	cmpq	$0x40,%rdx
    727 	jb	L$ecb_dec_three
    728 	movups	48(%rdi),%xmm5
    729 	je	L$ecb_dec_four
    730 	movups	64(%rdi),%xmm6
    731 	cmpq	$0x60,%rdx
    732 	jb	L$ecb_dec_five
    733 	movups	80(%rdi),%xmm7
    734 	je	L$ecb_dec_six
    735 	movups	96(%rdi),%xmm8
    736 	movups	(%rcx),%xmm0
    737 	xorps	%xmm9,%xmm9
    738 	call	_aesni_decrypt8
    739 	movups	%xmm2,(%rsi)
    740 	pxor	%xmm2,%xmm2
    741 	movups	%xmm3,16(%rsi)
    742 	pxor	%xmm3,%xmm3
    743 	movups	%xmm4,32(%rsi)
    744 	pxor	%xmm4,%xmm4
    745 	movups	%xmm5,48(%rsi)
    746 	pxor	%xmm5,%xmm5
    747 	movups	%xmm6,64(%rsi)
    748 	pxor	%xmm6,%xmm6
    749 	movups	%xmm7,80(%rsi)
    750 	pxor	%xmm7,%xmm7
    751 	movups	%xmm8,96(%rsi)
    752 	pxor	%xmm8,%xmm8
    753 	pxor	%xmm9,%xmm9
    754 	jmp	L$ecb_ret
    755 .p2align	4
    756 L$ecb_dec_one:
    757 	movups	(%rcx),%xmm0
    758 	movups	16(%rcx),%xmm1
    759 	leaq	32(%rcx),%rcx
    760 	xorps	%xmm0,%xmm2
    761 L$oop_dec1_4:
    762 .byte	102,15,56,222,209
    763 	decl	%eax
    764 	movups	(%rcx),%xmm1
    765 	leaq	16(%rcx),%rcx
    766 	jnz	L$oop_dec1_4
    767 .byte	102,15,56,223,209
    768 	movups	%xmm2,(%rsi)
    769 	pxor	%xmm2,%xmm2
    770 	jmp	L$ecb_ret
    771 .p2align	4
    772 L$ecb_dec_two:
    773 	call	_aesni_decrypt2
    774 	movups	%xmm2,(%rsi)
    775 	pxor	%xmm2,%xmm2
    776 	movups	%xmm3,16(%rsi)
    777 	pxor	%xmm3,%xmm3
    778 	jmp	L$ecb_ret
    779 .p2align	4
    780 L$ecb_dec_three:
    781 	call	_aesni_decrypt3
    782 	movups	%xmm2,(%rsi)
    783 	pxor	%xmm2,%xmm2
    784 	movups	%xmm3,16(%rsi)
    785 	pxor	%xmm3,%xmm3
    786 	movups	%xmm4,32(%rsi)
    787 	pxor	%xmm4,%xmm4
    788 	jmp	L$ecb_ret
    789 .p2align	4
    790 L$ecb_dec_four:
    791 	call	_aesni_decrypt4
    792 	movups	%xmm2,(%rsi)
    793 	pxor	%xmm2,%xmm2
    794 	movups	%xmm3,16(%rsi)
    795 	pxor	%xmm3,%xmm3
    796 	movups	%xmm4,32(%rsi)
    797 	pxor	%xmm4,%xmm4
    798 	movups	%xmm5,48(%rsi)
    799 	pxor	%xmm5,%xmm5
    800 	jmp	L$ecb_ret
    801 .p2align	4
    802 L$ecb_dec_five:
    803 	xorps	%xmm7,%xmm7
    804 	call	_aesni_decrypt6
    805 	movups	%xmm2,(%rsi)
    806 	pxor	%xmm2,%xmm2
    807 	movups	%xmm3,16(%rsi)
    808 	pxor	%xmm3,%xmm3
    809 	movups	%xmm4,32(%rsi)
    810 	pxor	%xmm4,%xmm4
    811 	movups	%xmm5,48(%rsi)
    812 	pxor	%xmm5,%xmm5
    813 	movups	%xmm6,64(%rsi)
    814 	pxor	%xmm6,%xmm6
    815 	pxor	%xmm7,%xmm7
    816 	jmp	L$ecb_ret
    817 .p2align	4
    818 L$ecb_dec_six:
    819 	call	_aesni_decrypt6
    820 	movups	%xmm2,(%rsi)
    821 	pxor	%xmm2,%xmm2
    822 	movups	%xmm3,16(%rsi)
    823 	pxor	%xmm3,%xmm3
    824 	movups	%xmm4,32(%rsi)
    825 	pxor	%xmm4,%xmm4
    826 	movups	%xmm5,48(%rsi)
    827 	pxor	%xmm5,%xmm5
    828 	movups	%xmm6,64(%rsi)
    829 	pxor	%xmm6,%xmm6
    830 	movups	%xmm7,80(%rsi)
    831 	pxor	%xmm7,%xmm7
    832 
    833 L$ecb_ret:
    834 	xorps	%xmm0,%xmm0
    835 	pxor	%xmm1,%xmm1
    836 	.byte	0xf3,0xc3
    837 
    838 .globl	_aesni_ccm64_encrypt_blocks
    839 .private_extern _aesni_ccm64_encrypt_blocks
    840 
    841 .p2align	4
    842 _aesni_ccm64_encrypt_blocks:
    843 	movl	240(%rcx),%eax
    844 	movdqu	(%r8),%xmm6
    845 	movdqa	L$increment64(%rip),%xmm9
    846 	movdqa	L$bswap_mask(%rip),%xmm7
    847 
    848 	shll	$4,%eax
    849 	movl	$16,%r10d
    850 	leaq	0(%rcx),%r11
    851 	movdqu	(%r9),%xmm3
    852 	movdqa	%xmm6,%xmm2
    853 	leaq	32(%rcx,%rax,1),%rcx
    854 .byte	102,15,56,0,247
    855 	subq	%rax,%r10
    856 	jmp	L$ccm64_enc_outer
    857 .p2align	4
    858 L$ccm64_enc_outer:
    859 	movups	(%r11),%xmm0
    860 	movq	%r10,%rax
    861 	movups	(%rdi),%xmm8
    862 
    863 	xorps	%xmm0,%xmm2
    864 	movups	16(%r11),%xmm1
    865 	xorps	%xmm8,%xmm0
    866 	xorps	%xmm0,%xmm3
    867 	movups	32(%r11),%xmm0
    868 
    869 L$ccm64_enc2_loop:
    870 .byte	102,15,56,220,209
    871 .byte	102,15,56,220,217
    872 	movups	(%rcx,%rax,1),%xmm1
    873 	addq	$32,%rax
    874 .byte	102,15,56,220,208
    875 .byte	102,15,56,220,216
    876 	movups	-16(%rcx,%rax,1),%xmm0
    877 	jnz	L$ccm64_enc2_loop
    878 .byte	102,15,56,220,209
    879 .byte	102,15,56,220,217
    880 	paddq	%xmm9,%xmm6
    881 	decq	%rdx
    882 .byte	102,15,56,221,208
    883 .byte	102,15,56,221,216
    884 
    885 	leaq	16(%rdi),%rdi
    886 	xorps	%xmm2,%xmm8
    887 	movdqa	%xmm6,%xmm2
    888 	movups	%xmm8,(%rsi)
    889 .byte	102,15,56,0,215
    890 	leaq	16(%rsi),%rsi
    891 	jnz	L$ccm64_enc_outer
    892 
    893 	pxor	%xmm0,%xmm0
    894 	pxor	%xmm1,%xmm1
    895 	pxor	%xmm2,%xmm2
    896 	movups	%xmm3,(%r9)
    897 	pxor	%xmm3,%xmm3
    898 	pxor	%xmm8,%xmm8
    899 	pxor	%xmm6,%xmm6
    900 	.byte	0xf3,0xc3
    901 
    902 .globl	_aesni_ccm64_decrypt_blocks
    903 .private_extern _aesni_ccm64_decrypt_blocks
    904 
    905 .p2align	4
    906 _aesni_ccm64_decrypt_blocks:
    907 	movl	240(%rcx),%eax
    908 	movups	(%r8),%xmm6
    909 	movdqu	(%r9),%xmm3
    910 	movdqa	L$increment64(%rip),%xmm9
    911 	movdqa	L$bswap_mask(%rip),%xmm7
    912 
    913 	movaps	%xmm6,%xmm2
    914 	movl	%eax,%r10d
    915 	movq	%rcx,%r11
    916 .byte	102,15,56,0,247
    917 	movups	(%rcx),%xmm0
    918 	movups	16(%rcx),%xmm1
    919 	leaq	32(%rcx),%rcx
    920 	xorps	%xmm0,%xmm2
    921 L$oop_enc1_5:
    922 .byte	102,15,56,220,209
    923 	decl	%eax
    924 	movups	(%rcx),%xmm1
    925 	leaq	16(%rcx),%rcx
    926 	jnz	L$oop_enc1_5
    927 .byte	102,15,56,221,209
    928 	shll	$4,%r10d
    929 	movl	$16,%eax
    930 	movups	(%rdi),%xmm8
    931 	paddq	%xmm9,%xmm6
    932 	leaq	16(%rdi),%rdi
    933 	subq	%r10,%rax
    934 	leaq	32(%r11,%r10,1),%rcx
    935 	movq	%rax,%r10
    936 	jmp	L$ccm64_dec_outer
    937 .p2align	4
    938 L$ccm64_dec_outer:
    939 	xorps	%xmm2,%xmm8
    940 	movdqa	%xmm6,%xmm2
    941 	movups	%xmm8,(%rsi)
    942 	leaq	16(%rsi),%rsi
    943 .byte	102,15,56,0,215
    944 
    945 	subq	$1,%rdx
    946 	jz	L$ccm64_dec_break
    947 
    948 	movups	(%r11),%xmm0
    949 	movq	%r10,%rax
    950 	movups	16(%r11),%xmm1
    951 	xorps	%xmm0,%xmm8
    952 	xorps	%xmm0,%xmm2
    953 	xorps	%xmm8,%xmm3
    954 	movups	32(%r11),%xmm0
    955 	jmp	L$ccm64_dec2_loop
    956 .p2align	4
    957 L$ccm64_dec2_loop:
    958 .byte	102,15,56,220,209
    959 .byte	102,15,56,220,217
    960 	movups	(%rcx,%rax,1),%xmm1
    961 	addq	$32,%rax
    962 .byte	102,15,56,220,208
    963 .byte	102,15,56,220,216
    964 	movups	-16(%rcx,%rax,1),%xmm0
    965 	jnz	L$ccm64_dec2_loop
    966 	movups	(%rdi),%xmm8
    967 	paddq	%xmm9,%xmm6
    968 .byte	102,15,56,220,209
    969 .byte	102,15,56,220,217
    970 .byte	102,15,56,221,208
    971 .byte	102,15,56,221,216
    972 	leaq	16(%rdi),%rdi
    973 	jmp	L$ccm64_dec_outer
    974 
    975 .p2align	4
    976 L$ccm64_dec_break:
    977 
    978 	movl	240(%r11),%eax
    979 	movups	(%r11),%xmm0
    980 	movups	16(%r11),%xmm1
    981 	xorps	%xmm0,%xmm8
    982 	leaq	32(%r11),%r11
    983 	xorps	%xmm8,%xmm3
    984 L$oop_enc1_6:
    985 .byte	102,15,56,220,217
    986 	decl	%eax
    987 	movups	(%r11),%xmm1
    988 	leaq	16(%r11),%r11
    989 	jnz	L$oop_enc1_6
    990 .byte	102,15,56,221,217
    991 	pxor	%xmm0,%xmm0
    992 	pxor	%xmm1,%xmm1
    993 	pxor	%xmm2,%xmm2
    994 	movups	%xmm3,(%r9)
    995 	pxor	%xmm3,%xmm3
    996 	pxor	%xmm8,%xmm8
    997 	pxor	%xmm6,%xmm6
    998 	.byte	0xf3,0xc3
    999 
   1000 .globl	_aesni_ctr32_encrypt_blocks
   1001 .private_extern _aesni_ctr32_encrypt_blocks
   1002 
   1003 .p2align	4
   1004 _aesni_ctr32_encrypt_blocks:
   1005 	cmpq	$1,%rdx
   1006 	jne	L$ctr32_bulk
   1007 
   1008 
   1009 
   1010 	movups	(%r8),%xmm2
   1011 	movups	(%rdi),%xmm3
   1012 	movl	240(%rcx),%edx
   1013 	movups	(%rcx),%xmm0
   1014 	movups	16(%rcx),%xmm1
   1015 	leaq	32(%rcx),%rcx
   1016 	xorps	%xmm0,%xmm2
   1017 L$oop_enc1_7:
   1018 .byte	102,15,56,220,209
   1019 	decl	%edx
   1020 	movups	(%rcx),%xmm1
   1021 	leaq	16(%rcx),%rcx
   1022 	jnz	L$oop_enc1_7
   1023 .byte	102,15,56,221,209
   1024 	pxor	%xmm0,%xmm0
   1025 	pxor	%xmm1,%xmm1
   1026 	xorps	%xmm3,%xmm2
   1027 	pxor	%xmm3,%xmm3
   1028 	movups	%xmm2,(%rsi)
   1029 	xorps	%xmm2,%xmm2
   1030 	jmp	L$ctr32_epilogue
   1031 
   1032 .p2align	4
   1033 L$ctr32_bulk:
   1034 	leaq	(%rsp),%r11
   1035 	pushq	%rbp
   1036 	subq	$128,%rsp
   1037 	andq	$-16,%rsp
   1038 
   1039 
   1040 
   1041 
   1042 	movdqu	(%r8),%xmm2
   1043 	movdqu	(%rcx),%xmm0
   1044 	movl	12(%r8),%r8d
   1045 	pxor	%xmm0,%xmm2
   1046 	movl	12(%rcx),%ebp
   1047 	movdqa	%xmm2,0(%rsp)
   1048 	bswapl	%r8d
   1049 	movdqa	%xmm2,%xmm3
   1050 	movdqa	%xmm2,%xmm4
   1051 	movdqa	%xmm2,%xmm5
   1052 	movdqa	%xmm2,64(%rsp)
   1053 	movdqa	%xmm2,80(%rsp)
   1054 	movdqa	%xmm2,96(%rsp)
   1055 	movq	%rdx,%r10
   1056 	movdqa	%xmm2,112(%rsp)
   1057 
   1058 	leaq	1(%r8),%rax
   1059 	leaq	2(%r8),%rdx
   1060 	bswapl	%eax
   1061 	bswapl	%edx
   1062 	xorl	%ebp,%eax
   1063 	xorl	%ebp,%edx
   1064 .byte	102,15,58,34,216,3
   1065 	leaq	3(%r8),%rax
   1066 	movdqa	%xmm3,16(%rsp)
   1067 .byte	102,15,58,34,226,3
   1068 	bswapl	%eax
   1069 	movq	%r10,%rdx
   1070 	leaq	4(%r8),%r10
   1071 	movdqa	%xmm4,32(%rsp)
   1072 	xorl	%ebp,%eax
   1073 	bswapl	%r10d
   1074 .byte	102,15,58,34,232,3
   1075 	xorl	%ebp,%r10d
   1076 	movdqa	%xmm5,48(%rsp)
   1077 	leaq	5(%r8),%r9
   1078 	movl	%r10d,64+12(%rsp)
   1079 	bswapl	%r9d
   1080 	leaq	6(%r8),%r10
   1081 	movl	240(%rcx),%eax
   1082 	xorl	%ebp,%r9d
   1083 	bswapl	%r10d
   1084 	movl	%r9d,80+12(%rsp)
   1085 	xorl	%ebp,%r10d
   1086 	leaq	7(%r8),%r9
   1087 	movl	%r10d,96+12(%rsp)
   1088 	bswapl	%r9d
   1089 	leaq	_OPENSSL_ia32cap_P(%rip),%r10
   1090 	movl	4(%r10),%r10d
   1091 	xorl	%ebp,%r9d
   1092 	andl	$71303168,%r10d
   1093 	movl	%r9d,112+12(%rsp)
   1094 
   1095 	movups	16(%rcx),%xmm1
   1096 
   1097 	movdqa	64(%rsp),%xmm6
   1098 	movdqa	80(%rsp),%xmm7
   1099 
   1100 	cmpq	$8,%rdx
   1101 	jb	L$ctr32_tail
   1102 
   1103 	subq	$6,%rdx
   1104 	cmpl	$4194304,%r10d
   1105 	je	L$ctr32_6x
   1106 
   1107 	leaq	128(%rcx),%rcx
   1108 	subq	$2,%rdx
   1109 	jmp	L$ctr32_loop8
   1110 
   1111 .p2align	4
   1112 L$ctr32_6x:
   1113 	shll	$4,%eax
   1114 	movl	$48,%r10d
   1115 	bswapl	%ebp
   1116 	leaq	32(%rcx,%rax,1),%rcx
   1117 	subq	%rax,%r10
   1118 	jmp	L$ctr32_loop6
   1119 
   1120 .p2align	4
   1121 L$ctr32_loop6:
   1122 	addl	$6,%r8d
   1123 	movups	-48(%rcx,%r10,1),%xmm0
   1124 .byte	102,15,56,220,209
   1125 	movl	%r8d,%eax
   1126 	xorl	%ebp,%eax
   1127 .byte	102,15,56,220,217
   1128 .byte	0x0f,0x38,0xf1,0x44,0x24,12
   1129 	leal	1(%r8),%eax
   1130 .byte	102,15,56,220,225
   1131 	xorl	%ebp,%eax
   1132 .byte	0x0f,0x38,0xf1,0x44,0x24,28
   1133 .byte	102,15,56,220,233
   1134 	leal	2(%r8),%eax
   1135 	xorl	%ebp,%eax
   1136 .byte	102,15,56,220,241
   1137 .byte	0x0f,0x38,0xf1,0x44,0x24,44
   1138 	leal	3(%r8),%eax
   1139 .byte	102,15,56,220,249
   1140 	movups	-32(%rcx,%r10,1),%xmm1
   1141 	xorl	%ebp,%eax
   1142 
   1143 .byte	102,15,56,220,208
   1144 .byte	0x0f,0x38,0xf1,0x44,0x24,60
   1145 	leal	4(%r8),%eax
   1146 .byte	102,15,56,220,216
   1147 	xorl	%ebp,%eax
   1148 .byte	0x0f,0x38,0xf1,0x44,0x24,76
   1149 .byte	102,15,56,220,224
   1150 	leal	5(%r8),%eax
   1151 	xorl	%ebp,%eax
   1152 .byte	102,15,56,220,232
   1153 .byte	0x0f,0x38,0xf1,0x44,0x24,92
   1154 	movq	%r10,%rax
   1155 .byte	102,15,56,220,240
   1156 .byte	102,15,56,220,248
   1157 	movups	-16(%rcx,%r10,1),%xmm0
   1158 
   1159 	call	L$enc_loop6
   1160 
   1161 	movdqu	(%rdi),%xmm8
   1162 	movdqu	16(%rdi),%xmm9
   1163 	movdqu	32(%rdi),%xmm10
   1164 	movdqu	48(%rdi),%xmm11
   1165 	movdqu	64(%rdi),%xmm12
   1166 	movdqu	80(%rdi),%xmm13
   1167 	leaq	96(%rdi),%rdi
   1168 	movups	-64(%rcx,%r10,1),%xmm1
   1169 	pxor	%xmm2,%xmm8
   1170 	movaps	0(%rsp),%xmm2
   1171 	pxor	%xmm3,%xmm9
   1172 	movaps	16(%rsp),%xmm3
   1173 	pxor	%xmm4,%xmm10
   1174 	movaps	32(%rsp),%xmm4
   1175 	pxor	%xmm5,%xmm11
   1176 	movaps	48(%rsp),%xmm5
   1177 	pxor	%xmm6,%xmm12
   1178 	movaps	64(%rsp),%xmm6
   1179 	pxor	%xmm7,%xmm13
   1180 	movaps	80(%rsp),%xmm7
   1181 	movdqu	%xmm8,(%rsi)
   1182 	movdqu	%xmm9,16(%rsi)
   1183 	movdqu	%xmm10,32(%rsi)
   1184 	movdqu	%xmm11,48(%rsi)
   1185 	movdqu	%xmm12,64(%rsi)
   1186 	movdqu	%xmm13,80(%rsi)
   1187 	leaq	96(%rsi),%rsi
   1188 
   1189 	subq	$6,%rdx
   1190 	jnc	L$ctr32_loop6
   1191 
   1192 	addq	$6,%rdx
   1193 	jz	L$ctr32_done
   1194 
   1195 	leal	-48(%r10),%eax
   1196 	leaq	-80(%rcx,%r10,1),%rcx
   1197 	negl	%eax
   1198 	shrl	$4,%eax
   1199 	jmp	L$ctr32_tail
   1200 
   1201 .p2align	5
   1202 L$ctr32_loop8:
   1203 	addl	$8,%r8d
   1204 	movdqa	96(%rsp),%xmm8
   1205 .byte	102,15,56,220,209
   1206 	movl	%r8d,%r9d
   1207 	movdqa	112(%rsp),%xmm9
   1208 .byte	102,15,56,220,217
   1209 	bswapl	%r9d
   1210 	movups	32-128(%rcx),%xmm0
   1211 .byte	102,15,56,220,225
   1212 	xorl	%ebp,%r9d
   1213 	nop
   1214 .byte	102,15,56,220,233
   1215 	movl	%r9d,0+12(%rsp)
   1216 	leaq	1(%r8),%r9
   1217 .byte	102,15,56,220,241
   1218 .byte	102,15,56,220,249
   1219 .byte	102,68,15,56,220,193
   1220 .byte	102,68,15,56,220,201
   1221 	movups	48-128(%rcx),%xmm1
   1222 	bswapl	%r9d
   1223 .byte	102,15,56,220,208
   1224 .byte	102,15,56,220,216
   1225 	xorl	%ebp,%r9d
   1226 .byte	0x66,0x90
   1227 .byte	102,15,56,220,224
   1228 .byte	102,15,56,220,232
   1229 	movl	%r9d,16+12(%rsp)
   1230 	leaq	2(%r8),%r9
   1231 .byte	102,15,56,220,240
   1232 .byte	102,15,56,220,248
   1233 .byte	102,68,15,56,220,192
   1234 .byte	102,68,15,56,220,200
   1235 	movups	64-128(%rcx),%xmm0
   1236 	bswapl	%r9d
   1237 .byte	102,15,56,220,209
   1238 .byte	102,15,56,220,217
   1239 	xorl	%ebp,%r9d
   1240 .byte	0x66,0x90
   1241 .byte	102,15,56,220,225
   1242 .byte	102,15,56,220,233
   1243 	movl	%r9d,32+12(%rsp)
   1244 	leaq	3(%r8),%r9
   1245 .byte	102,15,56,220,241
   1246 .byte	102,15,56,220,249
   1247 .byte	102,68,15,56,220,193
   1248 .byte	102,68,15,56,220,201
   1249 	movups	80-128(%rcx),%xmm1
   1250 	bswapl	%r9d
   1251 .byte	102,15,56,220,208
   1252 .byte	102,15,56,220,216
   1253 	xorl	%ebp,%r9d
   1254 .byte	0x66,0x90
   1255 .byte	102,15,56,220,224
   1256 .byte	102,15,56,220,232
   1257 	movl	%r9d,48+12(%rsp)
   1258 	leaq	4(%r8),%r9
   1259 .byte	102,15,56,220,240
   1260 .byte	102,15,56,220,248
   1261 .byte	102,68,15,56,220,192
   1262 .byte	102,68,15,56,220,200
   1263 	movups	96-128(%rcx),%xmm0
   1264 	bswapl	%r9d
   1265 .byte	102,15,56,220,209
   1266 .byte	102,15,56,220,217
   1267 	xorl	%ebp,%r9d
   1268 .byte	0x66,0x90
   1269 .byte	102,15,56,220,225
   1270 .byte	102,15,56,220,233
   1271 	movl	%r9d,64+12(%rsp)
   1272 	leaq	5(%r8),%r9
   1273 .byte	102,15,56,220,241
   1274 .byte	102,15,56,220,249
   1275 .byte	102,68,15,56,220,193
   1276 .byte	102,68,15,56,220,201
   1277 	movups	112-128(%rcx),%xmm1
   1278 	bswapl	%r9d
   1279 .byte	102,15,56,220,208
   1280 .byte	102,15,56,220,216
   1281 	xorl	%ebp,%r9d
   1282 .byte	0x66,0x90
   1283 .byte	102,15,56,220,224
   1284 .byte	102,15,56,220,232
   1285 	movl	%r9d,80+12(%rsp)
   1286 	leaq	6(%r8),%r9
   1287 .byte	102,15,56,220,240
   1288 .byte	102,15,56,220,248
   1289 .byte	102,68,15,56,220,192
   1290 .byte	102,68,15,56,220,200
   1291 	movups	128-128(%rcx),%xmm0
   1292 	bswapl	%r9d
   1293 .byte	102,15,56,220,209
   1294 .byte	102,15,56,220,217
   1295 	xorl	%ebp,%r9d
   1296 .byte	0x66,0x90
   1297 .byte	102,15,56,220,225
   1298 .byte	102,15,56,220,233
   1299 	movl	%r9d,96+12(%rsp)
   1300 	leaq	7(%r8),%r9
   1301 .byte	102,15,56,220,241
   1302 .byte	102,15,56,220,249
   1303 .byte	102,68,15,56,220,193
   1304 .byte	102,68,15,56,220,201
   1305 	movups	144-128(%rcx),%xmm1
   1306 	bswapl	%r9d
   1307 .byte	102,15,56,220,208
   1308 .byte	102,15,56,220,216
   1309 .byte	102,15,56,220,224
   1310 	xorl	%ebp,%r9d
   1311 	movdqu	0(%rdi),%xmm10
   1312 .byte	102,15,56,220,232
   1313 	movl	%r9d,112+12(%rsp)
   1314 	cmpl	$11,%eax
   1315 .byte	102,15,56,220,240
   1316 .byte	102,15,56,220,248
   1317 .byte	102,68,15,56,220,192
   1318 .byte	102,68,15,56,220,200
   1319 	movups	160-128(%rcx),%xmm0
   1320 
   1321 	jb	L$ctr32_enc_done
   1322 
   1323 .byte	102,15,56,220,209
   1324 .byte	102,15,56,220,217
   1325 .byte	102,15,56,220,225
   1326 .byte	102,15,56,220,233
   1327 .byte	102,15,56,220,241
   1328 .byte	102,15,56,220,249
   1329 .byte	102,68,15,56,220,193
   1330 .byte	102,68,15,56,220,201
   1331 	movups	176-128(%rcx),%xmm1
   1332 
   1333 .byte	102,15,56,220,208
   1334 .byte	102,15,56,220,216
   1335 .byte	102,15,56,220,224
   1336 .byte	102,15,56,220,232
   1337 .byte	102,15,56,220,240
   1338 .byte	102,15,56,220,248
   1339 .byte	102,68,15,56,220,192
   1340 .byte	102,68,15,56,220,200
   1341 	movups	192-128(%rcx),%xmm0
   1342 	je	L$ctr32_enc_done
   1343 
   1344 .byte	102,15,56,220,209
   1345 .byte	102,15,56,220,217
   1346 .byte	102,15,56,220,225
   1347 .byte	102,15,56,220,233
   1348 .byte	102,15,56,220,241
   1349 .byte	102,15,56,220,249
   1350 .byte	102,68,15,56,220,193
   1351 .byte	102,68,15,56,220,201
   1352 	movups	208-128(%rcx),%xmm1
   1353 
   1354 .byte	102,15,56,220,208
   1355 .byte	102,15,56,220,216
   1356 .byte	102,15,56,220,224
   1357 .byte	102,15,56,220,232
   1358 .byte	102,15,56,220,240
   1359 .byte	102,15,56,220,248
   1360 .byte	102,68,15,56,220,192
   1361 .byte	102,68,15,56,220,200
   1362 	movups	224-128(%rcx),%xmm0
   1363 	jmp	L$ctr32_enc_done
   1364 
   1365 .p2align	4
   1366 L$ctr32_enc_done:
   1367 	movdqu	16(%rdi),%xmm11
   1368 	pxor	%xmm0,%xmm10
   1369 	movdqu	32(%rdi),%xmm12
   1370 	pxor	%xmm0,%xmm11
   1371 	movdqu	48(%rdi),%xmm13
   1372 	pxor	%xmm0,%xmm12
   1373 	movdqu	64(%rdi),%xmm14
   1374 	pxor	%xmm0,%xmm13
   1375 	movdqu	80(%rdi),%xmm15
   1376 	pxor	%xmm0,%xmm14
   1377 	pxor	%xmm0,%xmm15
   1378 .byte	102,15,56,220,209
   1379 .byte	102,15,56,220,217
   1380 .byte	102,15,56,220,225
   1381 .byte	102,15,56,220,233
   1382 .byte	102,15,56,220,241
   1383 .byte	102,15,56,220,249
   1384 .byte	102,68,15,56,220,193
   1385 .byte	102,68,15,56,220,201
   1386 	movdqu	96(%rdi),%xmm1
   1387 	leaq	128(%rdi),%rdi
   1388 
   1389 .byte	102,65,15,56,221,210
   1390 	pxor	%xmm0,%xmm1
   1391 	movdqu	112-128(%rdi),%xmm10
   1392 .byte	102,65,15,56,221,219
   1393 	pxor	%xmm0,%xmm10
   1394 	movdqa	0(%rsp),%xmm11
   1395 .byte	102,65,15,56,221,228
   1396 .byte	102,65,15,56,221,237
   1397 	movdqa	16(%rsp),%xmm12
   1398 	movdqa	32(%rsp),%xmm13
   1399 .byte	102,65,15,56,221,246
   1400 .byte	102,65,15,56,221,255
   1401 	movdqa	48(%rsp),%xmm14
   1402 	movdqa	64(%rsp),%xmm15
   1403 .byte	102,68,15,56,221,193
   1404 	movdqa	80(%rsp),%xmm0
   1405 	movups	16-128(%rcx),%xmm1
   1406 .byte	102,69,15,56,221,202
   1407 
   1408 	movups	%xmm2,(%rsi)
   1409 	movdqa	%xmm11,%xmm2
   1410 	movups	%xmm3,16(%rsi)
   1411 	movdqa	%xmm12,%xmm3
   1412 	movups	%xmm4,32(%rsi)
   1413 	movdqa	%xmm13,%xmm4
   1414 	movups	%xmm5,48(%rsi)
   1415 	movdqa	%xmm14,%xmm5
   1416 	movups	%xmm6,64(%rsi)
   1417 	movdqa	%xmm15,%xmm6
   1418 	movups	%xmm7,80(%rsi)
   1419 	movdqa	%xmm0,%xmm7
   1420 	movups	%xmm8,96(%rsi)
   1421 	movups	%xmm9,112(%rsi)
   1422 	leaq	128(%rsi),%rsi
   1423 
   1424 	subq	$8,%rdx
   1425 	jnc	L$ctr32_loop8
   1426 
   1427 	addq	$8,%rdx
   1428 	jz	L$ctr32_done
   1429 	leaq	-128(%rcx),%rcx
   1430 
   1431 L$ctr32_tail:
   1432 
   1433 
   1434 	leaq	16(%rcx),%rcx
   1435 	cmpq	$4,%rdx
   1436 	jb	L$ctr32_loop3
   1437 	je	L$ctr32_loop4
   1438 
   1439 
   1440 	shll	$4,%eax
   1441 	movdqa	96(%rsp),%xmm8
   1442 	pxor	%xmm9,%xmm9
   1443 
   1444 	movups	16(%rcx),%xmm0
   1445 .byte	102,15,56,220,209
   1446 .byte	102,15,56,220,217
   1447 	leaq	32-16(%rcx,%rax,1),%rcx
   1448 	negq	%rax
   1449 .byte	102,15,56,220,225
   1450 	addq	$16,%rax
   1451 	movups	(%rdi),%xmm10
   1452 .byte	102,15,56,220,233
   1453 .byte	102,15,56,220,241
   1454 	movups	16(%rdi),%xmm11
   1455 	movups	32(%rdi),%xmm12
   1456 .byte	102,15,56,220,249
   1457 .byte	102,68,15,56,220,193
   1458 
   1459 	call	L$enc_loop8_enter
   1460 
   1461 	movdqu	48(%rdi),%xmm13
   1462 	pxor	%xmm10,%xmm2
   1463 	movdqu	64(%rdi),%xmm10
   1464 	pxor	%xmm11,%xmm3
   1465 	movdqu	%xmm2,(%rsi)
   1466 	pxor	%xmm12,%xmm4
   1467 	movdqu	%xmm3,16(%rsi)
   1468 	pxor	%xmm13,%xmm5
   1469 	movdqu	%xmm4,32(%rsi)
   1470 	pxor	%xmm10,%xmm6
   1471 	movdqu	%xmm5,48(%rsi)
   1472 	movdqu	%xmm6,64(%rsi)
   1473 	cmpq	$6,%rdx
   1474 	jb	L$ctr32_done
   1475 
   1476 	movups	80(%rdi),%xmm11
   1477 	xorps	%xmm11,%xmm7
   1478 	movups	%xmm7,80(%rsi)
   1479 	je	L$ctr32_done
   1480 
   1481 	movups	96(%rdi),%xmm12
   1482 	xorps	%xmm12,%xmm8
   1483 	movups	%xmm8,96(%rsi)
   1484 	jmp	L$ctr32_done
   1485 
   1486 .p2align	5
   1487 L$ctr32_loop4:
   1488 .byte	102,15,56,220,209
   1489 	leaq	16(%rcx),%rcx
   1490 	decl	%eax
   1491 .byte	102,15,56,220,217
   1492 .byte	102,15,56,220,225
   1493 .byte	102,15,56,220,233
   1494 	movups	(%rcx),%xmm1
   1495 	jnz	L$ctr32_loop4
   1496 .byte	102,15,56,221,209
   1497 .byte	102,15,56,221,217
   1498 	movups	(%rdi),%xmm10
   1499 	movups	16(%rdi),%xmm11
   1500 .byte	102,15,56,221,225
   1501 .byte	102,15,56,221,233
   1502 	movups	32(%rdi),%xmm12
   1503 	movups	48(%rdi),%xmm13
   1504 
   1505 	xorps	%xmm10,%xmm2
   1506 	movups	%xmm2,(%rsi)
   1507 	xorps	%xmm11,%xmm3
   1508 	movups	%xmm3,16(%rsi)
   1509 	pxor	%xmm12,%xmm4
   1510 	movdqu	%xmm4,32(%rsi)
   1511 	pxor	%xmm13,%xmm5
   1512 	movdqu	%xmm5,48(%rsi)
   1513 	jmp	L$ctr32_done
   1514 
   1515 .p2align	5
   1516 L$ctr32_loop3:
   1517 .byte	102,15,56,220,209
   1518 	leaq	16(%rcx),%rcx
   1519 	decl	%eax
   1520 .byte	102,15,56,220,217
   1521 .byte	102,15,56,220,225
   1522 	movups	(%rcx),%xmm1
   1523 	jnz	L$ctr32_loop3
   1524 .byte	102,15,56,221,209
   1525 .byte	102,15,56,221,217
   1526 .byte	102,15,56,221,225
   1527 
   1528 	movups	(%rdi),%xmm10
   1529 	xorps	%xmm10,%xmm2
   1530 	movups	%xmm2,(%rsi)
   1531 	cmpq	$2,%rdx
   1532 	jb	L$ctr32_done
   1533 
   1534 	movups	16(%rdi),%xmm11
   1535 	xorps	%xmm11,%xmm3
   1536 	movups	%xmm3,16(%rsi)
   1537 	je	L$ctr32_done
   1538 
   1539 	movups	32(%rdi),%xmm12
   1540 	xorps	%xmm12,%xmm4
   1541 	movups	%xmm4,32(%rsi)
   1542 
   1543 L$ctr32_done:
   1544 	xorps	%xmm0,%xmm0
   1545 	xorl	%ebp,%ebp
   1546 	pxor	%xmm1,%xmm1
   1547 	pxor	%xmm2,%xmm2
   1548 	pxor	%xmm3,%xmm3
   1549 	pxor	%xmm4,%xmm4
   1550 	pxor	%xmm5,%xmm5
   1551 	pxor	%xmm6,%xmm6
   1552 	pxor	%xmm7,%xmm7
   1553 	movaps	%xmm0,0(%rsp)
   1554 	pxor	%xmm8,%xmm8
   1555 	movaps	%xmm0,16(%rsp)
   1556 	pxor	%xmm9,%xmm9
   1557 	movaps	%xmm0,32(%rsp)
   1558 	pxor	%xmm10,%xmm10
   1559 	movaps	%xmm0,48(%rsp)
   1560 	pxor	%xmm11,%xmm11
   1561 	movaps	%xmm0,64(%rsp)
   1562 	pxor	%xmm12,%xmm12
   1563 	movaps	%xmm0,80(%rsp)
   1564 	pxor	%xmm13,%xmm13
   1565 	movaps	%xmm0,96(%rsp)
   1566 	pxor	%xmm14,%xmm14
   1567 	movaps	%xmm0,112(%rsp)
   1568 	pxor	%xmm15,%xmm15
   1569 	movq	-8(%r11),%rbp
   1570 	leaq	(%r11),%rsp
   1571 L$ctr32_epilogue:
   1572 	.byte	0xf3,0xc3
   1573 
   1574 .globl	_aesni_xts_encrypt
   1575 .private_extern _aesni_xts_encrypt
   1576 
   1577 .p2align	4
   1578 _aesni_xts_encrypt:
   1579 	leaq	(%rsp),%r11
   1580 	pushq	%rbp
   1581 	subq	$112,%rsp
   1582 	andq	$-16,%rsp
   1583 	movups	(%r9),%xmm2
   1584 	movl	240(%r8),%eax
   1585 	movl	240(%rcx),%r10d
   1586 	movups	(%r8),%xmm0
   1587 	movups	16(%r8),%xmm1
   1588 	leaq	32(%r8),%r8
   1589 	xorps	%xmm0,%xmm2
   1590 L$oop_enc1_8:
   1591 .byte	102,15,56,220,209
   1592 	decl	%eax
   1593 	movups	(%r8),%xmm1
   1594 	leaq	16(%r8),%r8
   1595 	jnz	L$oop_enc1_8
   1596 .byte	102,15,56,221,209
   1597 	movups	(%rcx),%xmm0
   1598 	movq	%rcx,%rbp
   1599 	movl	%r10d,%eax
   1600 	shll	$4,%r10d
   1601 	movq	%rdx,%r9
   1602 	andq	$-16,%rdx
   1603 
   1604 	movups	16(%rcx,%r10,1),%xmm1
   1605 
   1606 	movdqa	L$xts_magic(%rip),%xmm8
   1607 	movdqa	%xmm2,%xmm15
   1608 	pshufd	$0x5f,%xmm2,%xmm9
   1609 	pxor	%xmm0,%xmm1
   1610 	movdqa	%xmm9,%xmm14
   1611 	paddd	%xmm9,%xmm9
   1612 	movdqa	%xmm15,%xmm10
   1613 	psrad	$31,%xmm14
   1614 	paddq	%xmm15,%xmm15
   1615 	pand	%xmm8,%xmm14
   1616 	pxor	%xmm0,%xmm10
   1617 	pxor	%xmm14,%xmm15
   1618 	movdqa	%xmm9,%xmm14
   1619 	paddd	%xmm9,%xmm9
   1620 	movdqa	%xmm15,%xmm11
   1621 	psrad	$31,%xmm14
   1622 	paddq	%xmm15,%xmm15
   1623 	pand	%xmm8,%xmm14
   1624 	pxor	%xmm0,%xmm11
   1625 	pxor	%xmm14,%xmm15
   1626 	movdqa	%xmm9,%xmm14
   1627 	paddd	%xmm9,%xmm9
   1628 	movdqa	%xmm15,%xmm12
   1629 	psrad	$31,%xmm14
   1630 	paddq	%xmm15,%xmm15
   1631 	pand	%xmm8,%xmm14
   1632 	pxor	%xmm0,%xmm12
   1633 	pxor	%xmm14,%xmm15
   1634 	movdqa	%xmm9,%xmm14
   1635 	paddd	%xmm9,%xmm9
   1636 	movdqa	%xmm15,%xmm13
   1637 	psrad	$31,%xmm14
   1638 	paddq	%xmm15,%xmm15
   1639 	pand	%xmm8,%xmm14
   1640 	pxor	%xmm0,%xmm13
   1641 	pxor	%xmm14,%xmm15
   1642 	movdqa	%xmm15,%xmm14
   1643 	psrad	$31,%xmm9
   1644 	paddq	%xmm15,%xmm15
   1645 	pand	%xmm8,%xmm9
   1646 	pxor	%xmm0,%xmm14
   1647 	pxor	%xmm9,%xmm15
   1648 	movaps	%xmm1,96(%rsp)
   1649 
   1650 	subq	$96,%rdx
   1651 	jc	L$xts_enc_short
   1652 
   1653 	movl	$16+96,%eax
   1654 	leaq	32(%rbp,%r10,1),%rcx
   1655 	subq	%r10,%rax
   1656 	movups	16(%rbp),%xmm1
   1657 	movq	%rax,%r10
   1658 	leaq	L$xts_magic(%rip),%r8
   1659 	jmp	L$xts_enc_grandloop
   1660 
   1661 .p2align	5
   1662 L$xts_enc_grandloop:
   1663 	movdqu	0(%rdi),%xmm2
   1664 	movdqa	%xmm0,%xmm8
   1665 	movdqu	16(%rdi),%xmm3
   1666 	pxor	%xmm10,%xmm2
   1667 	movdqu	32(%rdi),%xmm4
   1668 	pxor	%xmm11,%xmm3
   1669 .byte	102,15,56,220,209
   1670 	movdqu	48(%rdi),%xmm5
   1671 	pxor	%xmm12,%xmm4
   1672 .byte	102,15,56,220,217
   1673 	movdqu	64(%rdi),%xmm6
   1674 	pxor	%xmm13,%xmm5
   1675 .byte	102,15,56,220,225
   1676 	movdqu	80(%rdi),%xmm7
   1677 	pxor	%xmm15,%xmm8
   1678 	movdqa	96(%rsp),%xmm9
   1679 	pxor	%xmm14,%xmm6
   1680 .byte	102,15,56,220,233
   1681 	movups	32(%rbp),%xmm0
   1682 	leaq	96(%rdi),%rdi
   1683 	pxor	%xmm8,%xmm7
   1684 
   1685 	pxor	%xmm9,%xmm10
   1686 .byte	102,15,56,220,241
   1687 	pxor	%xmm9,%xmm11
   1688 	movdqa	%xmm10,0(%rsp)
   1689 .byte	102,15,56,220,249
   1690 	movups	48(%rbp),%xmm1
   1691 	pxor	%xmm9,%xmm12
   1692 
   1693 .byte	102,15,56,220,208
   1694 	pxor	%xmm9,%xmm13
   1695 	movdqa	%xmm11,16(%rsp)
   1696 .byte	102,15,56,220,216
   1697 	pxor	%xmm9,%xmm14
   1698 	movdqa	%xmm12,32(%rsp)
   1699 .byte	102,15,56,220,224
   1700 .byte	102,15,56,220,232
   1701 	pxor	%xmm9,%xmm8
   1702 	movdqa	%xmm14,64(%rsp)
   1703 .byte	102,15,56,220,240
   1704 .byte	102,15,56,220,248
   1705 	movups	64(%rbp),%xmm0
   1706 	movdqa	%xmm8,80(%rsp)
   1707 	pshufd	$0x5f,%xmm15,%xmm9
   1708 	jmp	L$xts_enc_loop6
   1709 .p2align	5
   1710 L$xts_enc_loop6:
   1711 .byte	102,15,56,220,209
   1712 .byte	102,15,56,220,217
   1713 .byte	102,15,56,220,225
   1714 .byte	102,15,56,220,233
   1715 .byte	102,15,56,220,241
   1716 .byte	102,15,56,220,249
   1717 	movups	-64(%rcx,%rax,1),%xmm1
   1718 	addq	$32,%rax
   1719 
   1720 .byte	102,15,56,220,208
   1721 .byte	102,15,56,220,216
   1722 .byte	102,15,56,220,224
   1723 .byte	102,15,56,220,232
   1724 .byte	102,15,56,220,240
   1725 .byte	102,15,56,220,248
   1726 	movups	-80(%rcx,%rax,1),%xmm0
   1727 	jnz	L$xts_enc_loop6
   1728 
   1729 	movdqa	(%r8),%xmm8
   1730 	movdqa	%xmm9,%xmm14
   1731 	paddd	%xmm9,%xmm9
   1732 .byte	102,15,56,220,209
   1733 	paddq	%xmm15,%xmm15
   1734 	psrad	$31,%xmm14
   1735 .byte	102,15,56,220,217
   1736 	pand	%xmm8,%xmm14
   1737 	movups	(%rbp),%xmm10
   1738 .byte	102,15,56,220,225
   1739 .byte	102,15,56,220,233
   1740 .byte	102,15,56,220,241
   1741 	pxor	%xmm14,%xmm15
   1742 	movaps	%xmm10,%xmm11
   1743 .byte	102,15,56,220,249
   1744 	movups	-64(%rcx),%xmm1
   1745 
   1746 	movdqa	%xmm9,%xmm14
   1747 .byte	102,15,56,220,208
   1748 	paddd	%xmm9,%xmm9
   1749 	pxor	%xmm15,%xmm10
   1750 .byte	102,15,56,220,216
   1751 	psrad	$31,%xmm14
   1752 	paddq	%xmm15,%xmm15
   1753 .byte	102,15,56,220,224
   1754 .byte	102,15,56,220,232
   1755 	pand	%xmm8,%xmm14
   1756 	movaps	%xmm11,%xmm12
   1757 .byte	102,15,56,220,240
   1758 	pxor	%xmm14,%xmm15
   1759 	movdqa	%xmm9,%xmm14
   1760 .byte	102,15,56,220,248
   1761 	movups	-48(%rcx),%xmm0
   1762 
   1763 	paddd	%xmm9,%xmm9
   1764 .byte	102,15,56,220,209
   1765 	pxor	%xmm15,%xmm11
   1766 	psrad	$31,%xmm14
   1767 .byte	102,15,56,220,217
   1768 	paddq	%xmm15,%xmm15
   1769 	pand	%xmm8,%xmm14
   1770 .byte	102,15,56,220,225
   1771 .byte	102,15,56,220,233
   1772 	movdqa	%xmm13,48(%rsp)
   1773 	pxor	%xmm14,%xmm15
   1774 .byte	102,15,56,220,241
   1775 	movaps	%xmm12,%xmm13
   1776 	movdqa	%xmm9,%xmm14
   1777 .byte	102,15,56,220,249
   1778 	movups	-32(%rcx),%xmm1
   1779 
   1780 	paddd	%xmm9,%xmm9
   1781 .byte	102,15,56,220,208
   1782 	pxor	%xmm15,%xmm12
   1783 	psrad	$31,%xmm14
   1784 .byte	102,15,56,220,216
   1785 	paddq	%xmm15,%xmm15
   1786 	pand	%xmm8,%xmm14
   1787 .byte	102,15,56,220,224
   1788 .byte	102,15,56,220,232
   1789 .byte	102,15,56,220,240
   1790 	pxor	%xmm14,%xmm15
   1791 	movaps	%xmm13,%xmm14
   1792 .byte	102,15,56,220,248
   1793 
   1794 	movdqa	%xmm9,%xmm0
   1795 	paddd	%xmm9,%xmm9
   1796 .byte	102,15,56,220,209
   1797 	pxor	%xmm15,%xmm13
   1798 	psrad	$31,%xmm0
   1799 .byte	102,15,56,220,217
   1800 	paddq	%xmm15,%xmm15
   1801 	pand	%xmm8,%xmm0
   1802 .byte	102,15,56,220,225
   1803 .byte	102,15,56,220,233
   1804 	pxor	%xmm0,%xmm15
   1805 	movups	(%rbp),%xmm0
   1806 .byte	102,15,56,220,241
   1807 .byte	102,15,56,220,249
   1808 	movups	16(%rbp),%xmm1
   1809 
   1810 	pxor	%xmm15,%xmm14
   1811 .byte	102,15,56,221,84,36,0
   1812 	psrad	$31,%xmm9
   1813 	paddq	%xmm15,%xmm15
   1814 .byte	102,15,56,221,92,36,16
   1815 .byte	102,15,56,221,100,36,32
   1816 	pand	%xmm8,%xmm9
   1817 	movq	%r10,%rax
   1818 .byte	102,15,56,221,108,36,48
   1819 .byte	102,15,56,221,116,36,64
   1820 .byte	102,15,56,221,124,36,80
   1821 	pxor	%xmm9,%xmm15
   1822 
   1823 	leaq	96(%rsi),%rsi
   1824 	movups	%xmm2,-96(%rsi)
   1825 	movups	%xmm3,-80(%rsi)
   1826 	movups	%xmm4,-64(%rsi)
   1827 	movups	%xmm5,-48(%rsi)
   1828 	movups	%xmm6,-32(%rsi)
   1829 	movups	%xmm7,-16(%rsi)
   1830 	subq	$96,%rdx
   1831 	jnc	L$xts_enc_grandloop
   1832 
   1833 	movl	$16+96,%eax
   1834 	subl	%r10d,%eax
   1835 	movq	%rbp,%rcx
   1836 	shrl	$4,%eax
   1837 
   1838 L$xts_enc_short:
   1839 
   1840 	movl	%eax,%r10d
   1841 	pxor	%xmm0,%xmm10
   1842 	addq	$96,%rdx
   1843 	jz	L$xts_enc_done
   1844 
   1845 	pxor	%xmm0,%xmm11
   1846 	cmpq	$0x20,%rdx
   1847 	jb	L$xts_enc_one
   1848 	pxor	%xmm0,%xmm12
   1849 	je	L$xts_enc_two
   1850 
   1851 	pxor	%xmm0,%xmm13
   1852 	cmpq	$0x40,%rdx
   1853 	jb	L$xts_enc_three
   1854 	pxor	%xmm0,%xmm14
   1855 	je	L$xts_enc_four
   1856 
   1857 	movdqu	(%rdi),%xmm2
   1858 	movdqu	16(%rdi),%xmm3
   1859 	movdqu	32(%rdi),%xmm4
   1860 	pxor	%xmm10,%xmm2
   1861 	movdqu	48(%rdi),%xmm5
   1862 	pxor	%xmm11,%xmm3
   1863 	movdqu	64(%rdi),%xmm6
   1864 	leaq	80(%rdi),%rdi
   1865 	pxor	%xmm12,%xmm4
   1866 	pxor	%xmm13,%xmm5
   1867 	pxor	%xmm14,%xmm6
   1868 	pxor	%xmm7,%xmm7
   1869 
   1870 	call	_aesni_encrypt6
   1871 
   1872 	xorps	%xmm10,%xmm2
   1873 	movdqa	%xmm15,%xmm10
   1874 	xorps	%xmm11,%xmm3
   1875 	xorps	%xmm12,%xmm4
   1876 	movdqu	%xmm2,(%rsi)
   1877 	xorps	%xmm13,%xmm5
   1878 	movdqu	%xmm3,16(%rsi)
   1879 	xorps	%xmm14,%xmm6
   1880 	movdqu	%xmm4,32(%rsi)
   1881 	movdqu	%xmm5,48(%rsi)
   1882 	movdqu	%xmm6,64(%rsi)
   1883 	leaq	80(%rsi),%rsi
   1884 	jmp	L$xts_enc_done
   1885 
   1886 .p2align	4
   1887 L$xts_enc_one:
   1888 	movups	(%rdi),%xmm2
   1889 	leaq	16(%rdi),%rdi
   1890 	xorps	%xmm10,%xmm2
   1891 	movups	(%rcx),%xmm0
   1892 	movups	16(%rcx),%xmm1
   1893 	leaq	32(%rcx),%rcx
   1894 	xorps	%xmm0,%xmm2
   1895 L$oop_enc1_9:
   1896 .byte	102,15,56,220,209
   1897 	decl	%eax
   1898 	movups	(%rcx),%xmm1
   1899 	leaq	16(%rcx),%rcx
   1900 	jnz	L$oop_enc1_9
   1901 .byte	102,15,56,221,209
   1902 	xorps	%xmm10,%xmm2
   1903 	movdqa	%xmm11,%xmm10
   1904 	movups	%xmm2,(%rsi)
   1905 	leaq	16(%rsi),%rsi
   1906 	jmp	L$xts_enc_done
   1907 
   1908 .p2align	4
   1909 L$xts_enc_two:
   1910 	movups	(%rdi),%xmm2
   1911 	movups	16(%rdi),%xmm3
   1912 	leaq	32(%rdi),%rdi
   1913 	xorps	%xmm10,%xmm2
   1914 	xorps	%xmm11,%xmm3
   1915 
   1916 	call	_aesni_encrypt2
   1917 
   1918 	xorps	%xmm10,%xmm2
   1919 	movdqa	%xmm12,%xmm10
   1920 	xorps	%xmm11,%xmm3
   1921 	movups	%xmm2,(%rsi)
   1922 	movups	%xmm3,16(%rsi)
   1923 	leaq	32(%rsi),%rsi
   1924 	jmp	L$xts_enc_done
   1925 
   1926 .p2align	4
   1927 L$xts_enc_three:
   1928 	movups	(%rdi),%xmm2
   1929 	movups	16(%rdi),%xmm3
   1930 	movups	32(%rdi),%xmm4
   1931 	leaq	48(%rdi),%rdi
   1932 	xorps	%xmm10,%xmm2
   1933 	xorps	%xmm11,%xmm3
   1934 	xorps	%xmm12,%xmm4
   1935 
   1936 	call	_aesni_encrypt3
   1937 
   1938 	xorps	%xmm10,%xmm2
   1939 	movdqa	%xmm13,%xmm10
   1940 	xorps	%xmm11,%xmm3
   1941 	xorps	%xmm12,%xmm4
   1942 	movups	%xmm2,(%rsi)
   1943 	movups	%xmm3,16(%rsi)
   1944 	movups	%xmm4,32(%rsi)
   1945 	leaq	48(%rsi),%rsi
   1946 	jmp	L$xts_enc_done
   1947 
   1948 .p2align	4
   1949 L$xts_enc_four:
   1950 	movups	(%rdi),%xmm2
   1951 	movups	16(%rdi),%xmm3
   1952 	movups	32(%rdi),%xmm4
   1953 	xorps	%xmm10,%xmm2
   1954 	movups	48(%rdi),%xmm5
   1955 	leaq	64(%rdi),%rdi
   1956 	xorps	%xmm11,%xmm3
   1957 	xorps	%xmm12,%xmm4
   1958 	xorps	%xmm13,%xmm5
   1959 
   1960 	call	_aesni_encrypt4
   1961 
   1962 	pxor	%xmm10,%xmm2
   1963 	movdqa	%xmm14,%xmm10
   1964 	pxor	%xmm11,%xmm3
   1965 	pxor	%xmm12,%xmm4
   1966 	movdqu	%xmm2,(%rsi)
   1967 	pxor	%xmm13,%xmm5
   1968 	movdqu	%xmm3,16(%rsi)
   1969 	movdqu	%xmm4,32(%rsi)
   1970 	movdqu	%xmm5,48(%rsi)
   1971 	leaq	64(%rsi),%rsi
   1972 	jmp	L$xts_enc_done
   1973 
   1974 .p2align	4
   1975 L$xts_enc_done:
   1976 	andq	$15,%r9
   1977 	jz	L$xts_enc_ret
   1978 	movq	%r9,%rdx
   1979 
   1980 L$xts_enc_steal:
   1981 	movzbl	(%rdi),%eax
   1982 	movzbl	-16(%rsi),%ecx
   1983 	leaq	1(%rdi),%rdi
   1984 	movb	%al,-16(%rsi)
   1985 	movb	%cl,0(%rsi)
   1986 	leaq	1(%rsi),%rsi
   1987 	subq	$1,%rdx
   1988 	jnz	L$xts_enc_steal
   1989 
   1990 	subq	%r9,%rsi
   1991 	movq	%rbp,%rcx
   1992 	movl	%r10d,%eax
   1993 
   1994 	movups	-16(%rsi),%xmm2
   1995 	xorps	%xmm10,%xmm2
   1996 	movups	(%rcx),%xmm0
   1997 	movups	16(%rcx),%xmm1
   1998 	leaq	32(%rcx),%rcx
   1999 	xorps	%xmm0,%xmm2
   2000 L$oop_enc1_10:
   2001 .byte	102,15,56,220,209
   2002 	decl	%eax
   2003 	movups	(%rcx),%xmm1
   2004 	leaq	16(%rcx),%rcx
   2005 	jnz	L$oop_enc1_10
   2006 .byte	102,15,56,221,209
   2007 	xorps	%xmm10,%xmm2
   2008 	movups	%xmm2,-16(%rsi)
   2009 
   2010 L$xts_enc_ret:
   2011 	xorps	%xmm0,%xmm0
   2012 	pxor	%xmm1,%xmm1
   2013 	pxor	%xmm2,%xmm2
   2014 	pxor	%xmm3,%xmm3
   2015 	pxor	%xmm4,%xmm4
   2016 	pxor	%xmm5,%xmm5
   2017 	pxor	%xmm6,%xmm6
   2018 	pxor	%xmm7,%xmm7
   2019 	movaps	%xmm0,0(%rsp)
   2020 	pxor	%xmm8,%xmm8
   2021 	movaps	%xmm0,16(%rsp)
   2022 	pxor	%xmm9,%xmm9
   2023 	movaps	%xmm0,32(%rsp)
   2024 	pxor	%xmm10,%xmm10
   2025 	movaps	%xmm0,48(%rsp)
   2026 	pxor	%xmm11,%xmm11
   2027 	movaps	%xmm0,64(%rsp)
   2028 	pxor	%xmm12,%xmm12
   2029 	movaps	%xmm0,80(%rsp)
   2030 	pxor	%xmm13,%xmm13
   2031 	movaps	%xmm0,96(%rsp)
   2032 	pxor	%xmm14,%xmm14
   2033 	pxor	%xmm15,%xmm15
   2034 	movq	-8(%r11),%rbp
   2035 	leaq	(%r11),%rsp
   2036 L$xts_enc_epilogue:
   2037 	.byte	0xf3,0xc3
   2038 
   2039 .globl	_aesni_xts_decrypt
   2040 .private_extern _aesni_xts_decrypt
   2041 
   2042 .p2align	4
   2043 _aesni_xts_decrypt:
   2044 	leaq	(%rsp),%r11
   2045 	pushq	%rbp
   2046 	subq	$112,%rsp
   2047 	andq	$-16,%rsp
   2048 	movups	(%r9),%xmm2
   2049 	movl	240(%r8),%eax
   2050 	movl	240(%rcx),%r10d
   2051 	movups	(%r8),%xmm0
   2052 	movups	16(%r8),%xmm1
   2053 	leaq	32(%r8),%r8
   2054 	xorps	%xmm0,%xmm2
   2055 L$oop_enc1_11:
   2056 .byte	102,15,56,220,209
   2057 	decl	%eax
   2058 	movups	(%r8),%xmm1
   2059 	leaq	16(%r8),%r8
   2060 	jnz	L$oop_enc1_11
   2061 .byte	102,15,56,221,209
   2062 	xorl	%eax,%eax
   2063 	testq	$15,%rdx
   2064 	setnz	%al
   2065 	shlq	$4,%rax
   2066 	subq	%rax,%rdx
   2067 
   2068 	movups	(%rcx),%xmm0
   2069 	movq	%rcx,%rbp
   2070 	movl	%r10d,%eax
   2071 	shll	$4,%r10d
   2072 	movq	%rdx,%r9
   2073 	andq	$-16,%rdx
   2074 
   2075 	movups	16(%rcx,%r10,1),%xmm1
   2076 
   2077 	movdqa	L$xts_magic(%rip),%xmm8
   2078 	movdqa	%xmm2,%xmm15
   2079 	pshufd	$0x5f,%xmm2,%xmm9
   2080 	pxor	%xmm0,%xmm1
   2081 	movdqa	%xmm9,%xmm14
   2082 	paddd	%xmm9,%xmm9
   2083 	movdqa	%xmm15,%xmm10
   2084 	psrad	$31,%xmm14
   2085 	paddq	%xmm15,%xmm15
   2086 	pand	%xmm8,%xmm14
   2087 	pxor	%xmm0,%xmm10
   2088 	pxor	%xmm14,%xmm15
   2089 	movdqa	%xmm9,%xmm14
   2090 	paddd	%xmm9,%xmm9
   2091 	movdqa	%xmm15,%xmm11
   2092 	psrad	$31,%xmm14
   2093 	paddq	%xmm15,%xmm15
   2094 	pand	%xmm8,%xmm14
   2095 	pxor	%xmm0,%xmm11
   2096 	pxor	%xmm14,%xmm15
   2097 	movdqa	%xmm9,%xmm14
   2098 	paddd	%xmm9,%xmm9
   2099 	movdqa	%xmm15,%xmm12
   2100 	psrad	$31,%xmm14
   2101 	paddq	%xmm15,%xmm15
   2102 	pand	%xmm8,%xmm14
   2103 	pxor	%xmm0,%xmm12
   2104 	pxor	%xmm14,%xmm15
   2105 	movdqa	%xmm9,%xmm14
   2106 	paddd	%xmm9,%xmm9
   2107 	movdqa	%xmm15,%xmm13
   2108 	psrad	$31,%xmm14
   2109 	paddq	%xmm15,%xmm15
   2110 	pand	%xmm8,%xmm14
   2111 	pxor	%xmm0,%xmm13
   2112 	pxor	%xmm14,%xmm15
   2113 	movdqa	%xmm15,%xmm14
   2114 	psrad	$31,%xmm9
   2115 	paddq	%xmm15,%xmm15
   2116 	pand	%xmm8,%xmm9
   2117 	pxor	%xmm0,%xmm14
   2118 	pxor	%xmm9,%xmm15
   2119 	movaps	%xmm1,96(%rsp)
   2120 
   2121 	subq	$96,%rdx
   2122 	jc	L$xts_dec_short
   2123 
   2124 	movl	$16+96,%eax
   2125 	leaq	32(%rbp,%r10,1),%rcx
   2126 	subq	%r10,%rax
   2127 	movups	16(%rbp),%xmm1
   2128 	movq	%rax,%r10
   2129 	leaq	L$xts_magic(%rip),%r8
   2130 	jmp	L$xts_dec_grandloop
   2131 
   2132 .p2align	5
   2133 L$xts_dec_grandloop:
   2134 	movdqu	0(%rdi),%xmm2
   2135 	movdqa	%xmm0,%xmm8
   2136 	movdqu	16(%rdi),%xmm3
   2137 	pxor	%xmm10,%xmm2
   2138 	movdqu	32(%rdi),%xmm4
   2139 	pxor	%xmm11,%xmm3
   2140 .byte	102,15,56,222,209
   2141 	movdqu	48(%rdi),%xmm5
   2142 	pxor	%xmm12,%xmm4
   2143 .byte	102,15,56,222,217
   2144 	movdqu	64(%rdi),%xmm6
   2145 	pxor	%xmm13,%xmm5
   2146 .byte	102,15,56,222,225
   2147 	movdqu	80(%rdi),%xmm7
   2148 	pxor	%xmm15,%xmm8
   2149 	movdqa	96(%rsp),%xmm9
   2150 	pxor	%xmm14,%xmm6
   2151 .byte	102,15,56,222,233
   2152 	movups	32(%rbp),%xmm0
   2153 	leaq	96(%rdi),%rdi
   2154 	pxor	%xmm8,%xmm7
   2155 
   2156 	pxor	%xmm9,%xmm10
   2157 .byte	102,15,56,222,241
   2158 	pxor	%xmm9,%xmm11
   2159 	movdqa	%xmm10,0(%rsp)
   2160 .byte	102,15,56,222,249
   2161 	movups	48(%rbp),%xmm1
   2162 	pxor	%xmm9,%xmm12
   2163 
   2164 .byte	102,15,56,222,208
   2165 	pxor	%xmm9,%xmm13
   2166 	movdqa	%xmm11,16(%rsp)
   2167 .byte	102,15,56,222,216
   2168 	pxor	%xmm9,%xmm14
   2169 	movdqa	%xmm12,32(%rsp)
   2170 .byte	102,15,56,222,224
   2171 .byte	102,15,56,222,232
   2172 	pxor	%xmm9,%xmm8
   2173 	movdqa	%xmm14,64(%rsp)
   2174 .byte	102,15,56,222,240
   2175 .byte	102,15,56,222,248
   2176 	movups	64(%rbp),%xmm0
   2177 	movdqa	%xmm8,80(%rsp)
   2178 	pshufd	$0x5f,%xmm15,%xmm9
   2179 	jmp	L$xts_dec_loop6
   2180 .p2align	5
   2181 L$xts_dec_loop6:
   2182 .byte	102,15,56,222,209
   2183 .byte	102,15,56,222,217
   2184 .byte	102,15,56,222,225
   2185 .byte	102,15,56,222,233
   2186 .byte	102,15,56,222,241
   2187 .byte	102,15,56,222,249
   2188 	movups	-64(%rcx,%rax,1),%xmm1
   2189 	addq	$32,%rax
   2190 
   2191 .byte	102,15,56,222,208
   2192 .byte	102,15,56,222,216
   2193 .byte	102,15,56,222,224
   2194 .byte	102,15,56,222,232
   2195 .byte	102,15,56,222,240
   2196 .byte	102,15,56,222,248
   2197 	movups	-80(%rcx,%rax,1),%xmm0
   2198 	jnz	L$xts_dec_loop6
   2199 
   2200 	movdqa	(%r8),%xmm8
   2201 	movdqa	%xmm9,%xmm14
   2202 	paddd	%xmm9,%xmm9
   2203 .byte	102,15,56,222,209
   2204 	paddq	%xmm15,%xmm15
   2205 	psrad	$31,%xmm14
   2206 .byte	102,15,56,222,217
   2207 	pand	%xmm8,%xmm14
   2208 	movups	(%rbp),%xmm10
   2209 .byte	102,15,56,222,225
   2210 .byte	102,15,56,222,233
   2211 .byte	102,15,56,222,241
   2212 	pxor	%xmm14,%xmm15
   2213 	movaps	%xmm10,%xmm11
   2214 .byte	102,15,56,222,249
   2215 	movups	-64(%rcx),%xmm1
   2216 
   2217 	movdqa	%xmm9,%xmm14
   2218 .byte	102,15,56,222,208
   2219 	paddd	%xmm9,%xmm9
   2220 	pxor	%xmm15,%xmm10
   2221 .byte	102,15,56,222,216
   2222 	psrad	$31,%xmm14
   2223 	paddq	%xmm15,%xmm15
   2224 .byte	102,15,56,222,224
   2225 .byte	102,15,56,222,232
   2226 	pand	%xmm8,%xmm14
   2227 	movaps	%xmm11,%xmm12
   2228 .byte	102,15,56,222,240
   2229 	pxor	%xmm14,%xmm15
   2230 	movdqa	%xmm9,%xmm14
   2231 .byte	102,15,56,222,248
   2232 	movups	-48(%rcx),%xmm0
   2233 
   2234 	paddd	%xmm9,%xmm9
   2235 .byte	102,15,56,222,209
   2236 	pxor	%xmm15,%xmm11
   2237 	psrad	$31,%xmm14
   2238 .byte	102,15,56,222,217
   2239 	paddq	%xmm15,%xmm15
   2240 	pand	%xmm8,%xmm14
   2241 .byte	102,15,56,222,225
   2242 .byte	102,15,56,222,233
   2243 	movdqa	%xmm13,48(%rsp)
   2244 	pxor	%xmm14,%xmm15
   2245 .byte	102,15,56,222,241
   2246 	movaps	%xmm12,%xmm13
   2247 	movdqa	%xmm9,%xmm14
   2248 .byte	102,15,56,222,249
   2249 	movups	-32(%rcx),%xmm1
   2250 
   2251 	paddd	%xmm9,%xmm9
   2252 .byte	102,15,56,222,208
   2253 	pxor	%xmm15,%xmm12
   2254 	psrad	$31,%xmm14
   2255 .byte	102,15,56,222,216
   2256 	paddq	%xmm15,%xmm15
   2257 	pand	%xmm8,%xmm14
   2258 .byte	102,15,56,222,224
   2259 .byte	102,15,56,222,232
   2260 .byte	102,15,56,222,240
   2261 	pxor	%xmm14,%xmm15
   2262 	movaps	%xmm13,%xmm14
   2263 .byte	102,15,56,222,248
   2264 
   2265 	movdqa	%xmm9,%xmm0
   2266 	paddd	%xmm9,%xmm9
   2267 .byte	102,15,56,222,209
   2268 	pxor	%xmm15,%xmm13
   2269 	psrad	$31,%xmm0
   2270 .byte	102,15,56,222,217
   2271 	paddq	%xmm15,%xmm15
   2272 	pand	%xmm8,%xmm0
   2273 .byte	102,15,56,222,225
   2274 .byte	102,15,56,222,233
   2275 	pxor	%xmm0,%xmm15
   2276 	movups	(%rbp),%xmm0
   2277 .byte	102,15,56,222,241
   2278 .byte	102,15,56,222,249
   2279 	movups	16(%rbp),%xmm1
   2280 
   2281 	pxor	%xmm15,%xmm14
   2282 .byte	102,15,56,223,84,36,0
   2283 	psrad	$31,%xmm9
   2284 	paddq	%xmm15,%xmm15
   2285 .byte	102,15,56,223,92,36,16
   2286 .byte	102,15,56,223,100,36,32
   2287 	pand	%xmm8,%xmm9
   2288 	movq	%r10,%rax
   2289 .byte	102,15,56,223,108,36,48
   2290 .byte	102,15,56,223,116,36,64
   2291 .byte	102,15,56,223,124,36,80
   2292 	pxor	%xmm9,%xmm15
   2293 
   2294 	leaq	96(%rsi),%rsi
   2295 	movups	%xmm2,-96(%rsi)
   2296 	movups	%xmm3,-80(%rsi)
   2297 	movups	%xmm4,-64(%rsi)
   2298 	movups	%xmm5,-48(%rsi)
   2299 	movups	%xmm6,-32(%rsi)
   2300 	movups	%xmm7,-16(%rsi)
   2301 	subq	$96,%rdx
   2302 	jnc	L$xts_dec_grandloop
   2303 
   2304 	movl	$16+96,%eax
   2305 	subl	%r10d,%eax
   2306 	movq	%rbp,%rcx
   2307 	shrl	$4,%eax
   2308 
   2309 L$xts_dec_short:
   2310 
   2311 	movl	%eax,%r10d
   2312 	pxor	%xmm0,%xmm10
   2313 	pxor	%xmm0,%xmm11
   2314 	addq	$96,%rdx
   2315 	jz	L$xts_dec_done
   2316 
   2317 	pxor	%xmm0,%xmm12
   2318 	cmpq	$0x20,%rdx
   2319 	jb	L$xts_dec_one
   2320 	pxor	%xmm0,%xmm13
   2321 	je	L$xts_dec_two
   2322 
   2323 	pxor	%xmm0,%xmm14
   2324 	cmpq	$0x40,%rdx
   2325 	jb	L$xts_dec_three
   2326 	je	L$xts_dec_four
   2327 
   2328 	movdqu	(%rdi),%xmm2
   2329 	movdqu	16(%rdi),%xmm3
   2330 	movdqu	32(%rdi),%xmm4
   2331 	pxor	%xmm10,%xmm2
   2332 	movdqu	48(%rdi),%xmm5
   2333 	pxor	%xmm11,%xmm3
   2334 	movdqu	64(%rdi),%xmm6
   2335 	leaq	80(%rdi),%rdi
   2336 	pxor	%xmm12,%xmm4
   2337 	pxor	%xmm13,%xmm5
   2338 	pxor	%xmm14,%xmm6
   2339 
   2340 	call	_aesni_decrypt6
   2341 
   2342 	xorps	%xmm10,%xmm2
   2343 	xorps	%xmm11,%xmm3
   2344 	xorps	%xmm12,%xmm4
   2345 	movdqu	%xmm2,(%rsi)
   2346 	xorps	%xmm13,%xmm5
   2347 	movdqu	%xmm3,16(%rsi)
   2348 	xorps	%xmm14,%xmm6
   2349 	movdqu	%xmm4,32(%rsi)
   2350 	pxor	%xmm14,%xmm14
   2351 	movdqu	%xmm5,48(%rsi)
   2352 	pcmpgtd	%xmm15,%xmm14
   2353 	movdqu	%xmm6,64(%rsi)
   2354 	leaq	80(%rsi),%rsi
   2355 	pshufd	$0x13,%xmm14,%xmm11
   2356 	andq	$15,%r9
   2357 	jz	L$xts_dec_ret
   2358 
   2359 	movdqa	%xmm15,%xmm10
   2360 	paddq	%xmm15,%xmm15
   2361 	pand	%xmm8,%xmm11
   2362 	pxor	%xmm15,%xmm11
   2363 	jmp	L$xts_dec_done2
   2364 
   2365 .p2align	4
   2366 L$xts_dec_one:
   2367 	movups	(%rdi),%xmm2
   2368 	leaq	16(%rdi),%rdi
   2369 	xorps	%xmm10,%xmm2
   2370 	movups	(%rcx),%xmm0
   2371 	movups	16(%rcx),%xmm1
   2372 	leaq	32(%rcx),%rcx
   2373 	xorps	%xmm0,%xmm2
   2374 L$oop_dec1_12:
   2375 .byte	102,15,56,222,209
   2376 	decl	%eax
   2377 	movups	(%rcx),%xmm1
   2378 	leaq	16(%rcx),%rcx
   2379 	jnz	L$oop_dec1_12
   2380 .byte	102,15,56,223,209
   2381 	xorps	%xmm10,%xmm2
   2382 	movdqa	%xmm11,%xmm10
   2383 	movups	%xmm2,(%rsi)
   2384 	movdqa	%xmm12,%xmm11
   2385 	leaq	16(%rsi),%rsi
   2386 	jmp	L$xts_dec_done
   2387 
   2388 .p2align	4
   2389 L$xts_dec_two:
   2390 	movups	(%rdi),%xmm2
   2391 	movups	16(%rdi),%xmm3
   2392 	leaq	32(%rdi),%rdi
   2393 	xorps	%xmm10,%xmm2
   2394 	xorps	%xmm11,%xmm3
   2395 
   2396 	call	_aesni_decrypt2
   2397 
   2398 	xorps	%xmm10,%xmm2
   2399 	movdqa	%xmm12,%xmm10
   2400 	xorps	%xmm11,%xmm3
   2401 	movdqa	%xmm13,%xmm11
   2402 	movups	%xmm2,(%rsi)
   2403 	movups	%xmm3,16(%rsi)
   2404 	leaq	32(%rsi),%rsi
   2405 	jmp	L$xts_dec_done
   2406 
   2407 .p2align	4
   2408 L$xts_dec_three:
   2409 	movups	(%rdi),%xmm2
   2410 	movups	16(%rdi),%xmm3
   2411 	movups	32(%rdi),%xmm4
   2412 	leaq	48(%rdi),%rdi
   2413 	xorps	%xmm10,%xmm2
   2414 	xorps	%xmm11,%xmm3
   2415 	xorps	%xmm12,%xmm4
   2416 
   2417 	call	_aesni_decrypt3
   2418 
   2419 	xorps	%xmm10,%xmm2
   2420 	movdqa	%xmm13,%xmm10
   2421 	xorps	%xmm11,%xmm3
   2422 	movdqa	%xmm14,%xmm11
   2423 	xorps	%xmm12,%xmm4
   2424 	movups	%xmm2,(%rsi)
   2425 	movups	%xmm3,16(%rsi)
   2426 	movups	%xmm4,32(%rsi)
   2427 	leaq	48(%rsi),%rsi
   2428 	jmp	L$xts_dec_done
   2429 
   2430 .p2align	4
   2431 L$xts_dec_four:
   2432 	movups	(%rdi),%xmm2
   2433 	movups	16(%rdi),%xmm3
   2434 	movups	32(%rdi),%xmm4
   2435 	xorps	%xmm10,%xmm2
   2436 	movups	48(%rdi),%xmm5
   2437 	leaq	64(%rdi),%rdi
   2438 	xorps	%xmm11,%xmm3
   2439 	xorps	%xmm12,%xmm4
   2440 	xorps	%xmm13,%xmm5
   2441 
   2442 	call	_aesni_decrypt4
   2443 
   2444 	pxor	%xmm10,%xmm2
   2445 	movdqa	%xmm14,%xmm10
   2446 	pxor	%xmm11,%xmm3
   2447 	movdqa	%xmm15,%xmm11
   2448 	pxor	%xmm12,%xmm4
   2449 	movdqu	%xmm2,(%rsi)
   2450 	pxor	%xmm13,%xmm5
   2451 	movdqu	%xmm3,16(%rsi)
   2452 	movdqu	%xmm4,32(%rsi)
   2453 	movdqu	%xmm5,48(%rsi)
   2454 	leaq	64(%rsi),%rsi
   2455 	jmp	L$xts_dec_done
   2456 
   2457 .p2align	4
   2458 L$xts_dec_done:
   2459 	andq	$15,%r9
   2460 	jz	L$xts_dec_ret
   2461 L$xts_dec_done2:
   2462 	movq	%r9,%rdx
   2463 	movq	%rbp,%rcx
   2464 	movl	%r10d,%eax
   2465 
   2466 	movups	(%rdi),%xmm2
   2467 	xorps	%xmm11,%xmm2
   2468 	movups	(%rcx),%xmm0
   2469 	movups	16(%rcx),%xmm1
   2470 	leaq	32(%rcx),%rcx
   2471 	xorps	%xmm0,%xmm2
   2472 L$oop_dec1_13:
   2473 .byte	102,15,56,222,209
   2474 	decl	%eax
   2475 	movups	(%rcx),%xmm1
   2476 	leaq	16(%rcx),%rcx
   2477 	jnz	L$oop_dec1_13
   2478 .byte	102,15,56,223,209
   2479 	xorps	%xmm11,%xmm2
   2480 	movups	%xmm2,(%rsi)
   2481 
   2482 L$xts_dec_steal:
   2483 	movzbl	16(%rdi),%eax
   2484 	movzbl	(%rsi),%ecx
   2485 	leaq	1(%rdi),%rdi
   2486 	movb	%al,(%rsi)
   2487 	movb	%cl,16(%rsi)
   2488 	leaq	1(%rsi),%rsi
   2489 	subq	$1,%rdx
   2490 	jnz	L$xts_dec_steal
   2491 
   2492 	subq	%r9,%rsi
   2493 	movq	%rbp,%rcx
   2494 	movl	%r10d,%eax
   2495 
   2496 	movups	(%rsi),%xmm2
   2497 	xorps	%xmm10,%xmm2
   2498 	movups	(%rcx),%xmm0
   2499 	movups	16(%rcx),%xmm1
   2500 	leaq	32(%rcx),%rcx
   2501 	xorps	%xmm0,%xmm2
   2502 L$oop_dec1_14:
   2503 .byte	102,15,56,222,209
   2504 	decl	%eax
   2505 	movups	(%rcx),%xmm1
   2506 	leaq	16(%rcx),%rcx
   2507 	jnz	L$oop_dec1_14
   2508 .byte	102,15,56,223,209
   2509 	xorps	%xmm10,%xmm2
   2510 	movups	%xmm2,(%rsi)
   2511 
   2512 L$xts_dec_ret:
   2513 	xorps	%xmm0,%xmm0
   2514 	pxor	%xmm1,%xmm1
   2515 	pxor	%xmm2,%xmm2
   2516 	pxor	%xmm3,%xmm3
   2517 	pxor	%xmm4,%xmm4
   2518 	pxor	%xmm5,%xmm5
   2519 	pxor	%xmm6,%xmm6
   2520 	pxor	%xmm7,%xmm7
   2521 	movaps	%xmm0,0(%rsp)
   2522 	pxor	%xmm8,%xmm8
   2523 	movaps	%xmm0,16(%rsp)
   2524 	pxor	%xmm9,%xmm9
   2525 	movaps	%xmm0,32(%rsp)
   2526 	pxor	%xmm10,%xmm10
   2527 	movaps	%xmm0,48(%rsp)
   2528 	pxor	%xmm11,%xmm11
   2529 	movaps	%xmm0,64(%rsp)
   2530 	pxor	%xmm12,%xmm12
   2531 	movaps	%xmm0,80(%rsp)
   2532 	pxor	%xmm13,%xmm13
   2533 	movaps	%xmm0,96(%rsp)
   2534 	pxor	%xmm14,%xmm14
   2535 	pxor	%xmm15,%xmm15
   2536 	movq	-8(%r11),%rbp
   2537 	leaq	(%r11),%rsp
   2538 L$xts_dec_epilogue:
   2539 	.byte	0xf3,0xc3
   2540 
   2541 .globl	_aesni_ocb_encrypt
   2542 .private_extern _aesni_ocb_encrypt
   2543 
   2544 .p2align	5
   2545 _aesni_ocb_encrypt:
   2546 	leaq	(%rsp),%rax
   2547 	pushq	%rbx
   2548 	pushq	%rbp
   2549 	pushq	%r12
   2550 	pushq	%r13
   2551 	pushq	%r14
   2552 	movq	8(%rax),%rbx
   2553 	movq	8+8(%rax),%rbp
   2554 
   2555 	movl	240(%rcx),%r10d
   2556 	movq	%rcx,%r11
   2557 	shll	$4,%r10d
   2558 	movups	(%rcx),%xmm9
   2559 	movups	16(%rcx,%r10,1),%xmm1
   2560 
   2561 	movdqu	(%r9),%xmm15
   2562 	pxor	%xmm1,%xmm9
   2563 	pxor	%xmm1,%xmm15
   2564 
   2565 	movl	$16+32,%eax
   2566 	leaq	32(%r11,%r10,1),%rcx
   2567 	movups	16(%r11),%xmm1
   2568 	subq	%r10,%rax
   2569 	movq	%rax,%r10
   2570 
   2571 	movdqu	(%rbx),%xmm10
   2572 	movdqu	(%rbp),%xmm8
   2573 
   2574 	testq	$1,%r8
   2575 	jnz	L$ocb_enc_odd
   2576 
   2577 	bsfq	%r8,%r12
   2578 	addq	$1,%r8
   2579 	shlq	$4,%r12
   2580 	movdqu	(%rbx,%r12,1),%xmm7
   2581 	movdqu	(%rdi),%xmm2
   2582 	leaq	16(%rdi),%rdi
   2583 
   2584 	call	__ocb_encrypt1
   2585 
   2586 	movdqa	%xmm7,%xmm15
   2587 	movups	%xmm2,(%rsi)
   2588 	leaq	16(%rsi),%rsi
   2589 	subq	$1,%rdx
   2590 	jz	L$ocb_enc_done
   2591 
   2592 L$ocb_enc_odd:
   2593 	leaq	1(%r8),%r12
   2594 	leaq	3(%r8),%r13
   2595 	leaq	5(%r8),%r14
   2596 	leaq	6(%r8),%r8
   2597 	bsfq	%r12,%r12
   2598 	bsfq	%r13,%r13
   2599 	bsfq	%r14,%r14
   2600 	shlq	$4,%r12
   2601 	shlq	$4,%r13
   2602 	shlq	$4,%r14
   2603 
   2604 	subq	$6,%rdx
   2605 	jc	L$ocb_enc_short
   2606 	jmp	L$ocb_enc_grandloop
   2607 
   2608 .p2align	5
   2609 L$ocb_enc_grandloop:
   2610 	movdqu	0(%rdi),%xmm2
   2611 	movdqu	16(%rdi),%xmm3
   2612 	movdqu	32(%rdi),%xmm4
   2613 	movdqu	48(%rdi),%xmm5
   2614 	movdqu	64(%rdi),%xmm6
   2615 	movdqu	80(%rdi),%xmm7
   2616 	leaq	96(%rdi),%rdi
   2617 
   2618 	call	__ocb_encrypt6
   2619 
   2620 	movups	%xmm2,0(%rsi)
   2621 	movups	%xmm3,16(%rsi)
   2622 	movups	%xmm4,32(%rsi)
   2623 	movups	%xmm5,48(%rsi)
   2624 	movups	%xmm6,64(%rsi)
   2625 	movups	%xmm7,80(%rsi)
   2626 	leaq	96(%rsi),%rsi
   2627 	subq	$6,%rdx
   2628 	jnc	L$ocb_enc_grandloop
   2629 
   2630 L$ocb_enc_short:
   2631 	addq	$6,%rdx
   2632 	jz	L$ocb_enc_done
   2633 
   2634 	movdqu	0(%rdi),%xmm2
   2635 	cmpq	$2,%rdx
   2636 	jb	L$ocb_enc_one
   2637 	movdqu	16(%rdi),%xmm3
   2638 	je	L$ocb_enc_two
   2639 
   2640 	movdqu	32(%rdi),%xmm4
   2641 	cmpq	$4,%rdx
   2642 	jb	L$ocb_enc_three
   2643 	movdqu	48(%rdi),%xmm5
   2644 	je	L$ocb_enc_four
   2645 
   2646 	movdqu	64(%rdi),%xmm6
   2647 	pxor	%xmm7,%xmm7
   2648 
   2649 	call	__ocb_encrypt6
   2650 
   2651 	movdqa	%xmm14,%xmm15
   2652 	movups	%xmm2,0(%rsi)
   2653 	movups	%xmm3,16(%rsi)
   2654 	movups	%xmm4,32(%rsi)
   2655 	movups	%xmm5,48(%rsi)
   2656 	movups	%xmm6,64(%rsi)
   2657 
   2658 	jmp	L$ocb_enc_done
   2659 
   2660 .p2align	4
   2661 L$ocb_enc_one:
   2662 	movdqa	%xmm10,%xmm7
   2663 
   2664 	call	__ocb_encrypt1
   2665 
   2666 	movdqa	%xmm7,%xmm15
   2667 	movups	%xmm2,0(%rsi)
   2668 	jmp	L$ocb_enc_done
   2669 
   2670 .p2align	4
   2671 L$ocb_enc_two:
   2672 	pxor	%xmm4,%xmm4
   2673 	pxor	%xmm5,%xmm5
   2674 
   2675 	call	__ocb_encrypt4
   2676 
   2677 	movdqa	%xmm11,%xmm15
   2678 	movups	%xmm2,0(%rsi)
   2679 	movups	%xmm3,16(%rsi)
   2680 
   2681 	jmp	L$ocb_enc_done
   2682 
   2683 .p2align	4
   2684 L$ocb_enc_three:
   2685 	pxor	%xmm5,%xmm5
   2686 
   2687 	call	__ocb_encrypt4
   2688 
   2689 	movdqa	%xmm12,%xmm15
   2690 	movups	%xmm2,0(%rsi)
   2691 	movups	%xmm3,16(%rsi)
   2692 	movups	%xmm4,32(%rsi)
   2693 
   2694 	jmp	L$ocb_enc_done
   2695 
   2696 .p2align	4
   2697 L$ocb_enc_four:
   2698 	call	__ocb_encrypt4
   2699 
   2700 	movdqa	%xmm13,%xmm15
   2701 	movups	%xmm2,0(%rsi)
   2702 	movups	%xmm3,16(%rsi)
   2703 	movups	%xmm4,32(%rsi)
   2704 	movups	%xmm5,48(%rsi)
   2705 
   2706 L$ocb_enc_done:
   2707 	pxor	%xmm0,%xmm15
   2708 	movdqu	%xmm8,(%rbp)
   2709 	movdqu	%xmm15,(%r9)
   2710 
   2711 	xorps	%xmm0,%xmm0
   2712 	pxor	%xmm1,%xmm1
   2713 	pxor	%xmm2,%xmm2
   2714 	pxor	%xmm3,%xmm3
   2715 	pxor	%xmm4,%xmm4
   2716 	pxor	%xmm5,%xmm5
   2717 	pxor	%xmm6,%xmm6
   2718 	pxor	%xmm7,%xmm7
   2719 	pxor	%xmm8,%xmm8
   2720 	pxor	%xmm9,%xmm9
   2721 	pxor	%xmm10,%xmm10
   2722 	pxor	%xmm11,%xmm11
   2723 	pxor	%xmm12,%xmm12
   2724 	pxor	%xmm13,%xmm13
   2725 	pxor	%xmm14,%xmm14
   2726 	pxor	%xmm15,%xmm15
   2727 	leaq	40(%rsp),%rax
   2728 	movq	-40(%rax),%r14
   2729 	movq	-32(%rax),%r13
   2730 	movq	-24(%rax),%r12
   2731 	movq	-16(%rax),%rbp
   2732 	movq	-8(%rax),%rbx
   2733 	leaq	(%rax),%rsp
   2734 L$ocb_enc_epilogue:
   2735 	.byte	0xf3,0xc3
   2736 
   2737 
   2738 
   2739 .p2align	5
   2740 __ocb_encrypt6:
   2741 	pxor	%xmm9,%xmm15
   2742 	movdqu	(%rbx,%r12,1),%xmm11
   2743 	movdqa	%xmm10,%xmm12
   2744 	movdqu	(%rbx,%r13,1),%xmm13
   2745 	movdqa	%xmm10,%xmm14
   2746 	pxor	%xmm15,%xmm10
   2747 	movdqu	(%rbx,%r14,1),%xmm15
   2748 	pxor	%xmm10,%xmm11
   2749 	pxor	%xmm2,%xmm8
   2750 	pxor	%xmm10,%xmm2
   2751 	pxor	%xmm11,%xmm12
   2752 	pxor	%xmm3,%xmm8
   2753 	pxor	%xmm11,%xmm3
   2754 	pxor	%xmm12,%xmm13
   2755 	pxor	%xmm4,%xmm8
   2756 	pxor	%xmm12,%xmm4
   2757 	pxor	%xmm13,%xmm14
   2758 	pxor	%xmm5,%xmm8
   2759 	pxor	%xmm13,%xmm5
   2760 	pxor	%xmm14,%xmm15
   2761 	pxor	%xmm6,%xmm8
   2762 	pxor	%xmm14,%xmm6
   2763 	pxor	%xmm7,%xmm8
   2764 	pxor	%xmm15,%xmm7
   2765 	movups	32(%r11),%xmm0
   2766 
   2767 	leaq	1(%r8),%r12
   2768 	leaq	3(%r8),%r13
   2769 	leaq	5(%r8),%r14
   2770 	addq	$6,%r8
   2771 	pxor	%xmm9,%xmm10
   2772 	bsfq	%r12,%r12
   2773 	bsfq	%r13,%r13
   2774 	bsfq	%r14,%r14
   2775 
   2776 .byte	102,15,56,220,209
   2777 .byte	102,15,56,220,217
   2778 .byte	102,15,56,220,225
   2779 .byte	102,15,56,220,233
   2780 	pxor	%xmm9,%xmm11
   2781 	pxor	%xmm9,%xmm12
   2782 .byte	102,15,56,220,241
   2783 	pxor	%xmm9,%xmm13
   2784 	pxor	%xmm9,%xmm14
   2785 .byte	102,15,56,220,249
   2786 	movups	48(%r11),%xmm1
   2787 	pxor	%xmm9,%xmm15
   2788 
   2789 .byte	102,15,56,220,208
   2790 .byte	102,15,56,220,216
   2791 .byte	102,15,56,220,224
   2792 .byte	102,15,56,220,232
   2793 .byte	102,15,56,220,240
   2794 .byte	102,15,56,220,248
   2795 	movups	64(%r11),%xmm0
   2796 	shlq	$4,%r12
   2797 	shlq	$4,%r13
   2798 	jmp	L$ocb_enc_loop6
   2799 
   2800 .p2align	5
   2801 L$ocb_enc_loop6:
   2802 .byte	102,15,56,220,209
   2803 .byte	102,15,56,220,217
   2804 .byte	102,15,56,220,225
   2805 .byte	102,15,56,220,233
   2806 .byte	102,15,56,220,241
   2807 .byte	102,15,56,220,249
   2808 	movups	(%rcx,%rax,1),%xmm1
   2809 	addq	$32,%rax
   2810 
   2811 .byte	102,15,56,220,208
   2812 .byte	102,15,56,220,216
   2813 .byte	102,15,56,220,224
   2814 .byte	102,15,56,220,232
   2815 .byte	102,15,56,220,240
   2816 .byte	102,15,56,220,248
   2817 	movups	-16(%rcx,%rax,1),%xmm0
   2818 	jnz	L$ocb_enc_loop6
   2819 
   2820 .byte	102,15,56,220,209
   2821 .byte	102,15,56,220,217
   2822 .byte	102,15,56,220,225
   2823 .byte	102,15,56,220,233
   2824 .byte	102,15,56,220,241
   2825 .byte	102,15,56,220,249
   2826 	movups	16(%r11),%xmm1
   2827 	shlq	$4,%r14
   2828 
   2829 .byte	102,65,15,56,221,210
   2830 	movdqu	(%rbx),%xmm10
   2831 	movq	%r10,%rax
   2832 .byte	102,65,15,56,221,219
   2833 .byte	102,65,15,56,221,228
   2834 .byte	102,65,15,56,221,237
   2835 .byte	102,65,15,56,221,246
   2836 .byte	102,65,15,56,221,255
   2837 	.byte	0xf3,0xc3
   2838 
   2839 
   2840 
   2841 .p2align	5
   2842 __ocb_encrypt4:
   2843 	pxor	%xmm9,%xmm15
   2844 	movdqu	(%rbx,%r12,1),%xmm11
   2845 	movdqa	%xmm10,%xmm12
   2846 	movdqu	(%rbx,%r13,1),%xmm13
   2847 	pxor	%xmm15,%xmm10
   2848 	pxor	%xmm10,%xmm11
   2849 	pxor	%xmm2,%xmm8
   2850 	pxor	%xmm10,%xmm2
   2851 	pxor	%xmm11,%xmm12
   2852 	pxor	%xmm3,%xmm8
   2853 	pxor	%xmm11,%xmm3
   2854 	pxor	%xmm12,%xmm13
   2855 	pxor	%xmm4,%xmm8
   2856 	pxor	%xmm12,%xmm4
   2857 	pxor	%xmm5,%xmm8
   2858 	pxor	%xmm13,%xmm5
   2859 	movups	32(%r11),%xmm0
   2860 
   2861 	pxor	%xmm9,%xmm10
   2862 	pxor	%xmm9,%xmm11
   2863 	pxor	%xmm9,%xmm12
   2864 	pxor	%xmm9,%xmm13
   2865 
   2866 .byte	102,15,56,220,209
   2867 .byte	102,15,56,220,217
   2868 .byte	102,15,56,220,225
   2869 .byte	102,15,56,220,233
   2870 	movups	48(%r11),%xmm1
   2871 
   2872 .byte	102,15,56,220,208
   2873 .byte	102,15,56,220,216
   2874 .byte	102,15,56,220,224
   2875 .byte	102,15,56,220,232
   2876 	movups	64(%r11),%xmm0
   2877 	jmp	L$ocb_enc_loop4
   2878 
   2879 .p2align	5
   2880 L$ocb_enc_loop4:
   2881 .byte	102,15,56,220,209
   2882 .byte	102,15,56,220,217
   2883 .byte	102,15,56,220,225
   2884 .byte	102,15,56,220,233
   2885 	movups	(%rcx,%rax,1),%xmm1
   2886 	addq	$32,%rax
   2887 
   2888 .byte	102,15,56,220,208
   2889 .byte	102,15,56,220,216
   2890 .byte	102,15,56,220,224
   2891 .byte	102,15,56,220,232
   2892 	movups	-16(%rcx,%rax,1),%xmm0
   2893 	jnz	L$ocb_enc_loop4
   2894 
   2895 .byte	102,15,56,220,209
   2896 .byte	102,15,56,220,217
   2897 .byte	102,15,56,220,225
   2898 .byte	102,15,56,220,233
   2899 	movups	16(%r11),%xmm1
   2900 	movq	%r10,%rax
   2901 
   2902 .byte	102,65,15,56,221,210
   2903 .byte	102,65,15,56,221,219
   2904 .byte	102,65,15,56,221,228
   2905 .byte	102,65,15,56,221,237
   2906 	.byte	0xf3,0xc3
   2907 
   2908 
   2909 
   2910 .p2align	5
   2911 __ocb_encrypt1:
   2912 	pxor	%xmm15,%xmm7
   2913 	pxor	%xmm9,%xmm7
   2914 	pxor	%xmm2,%xmm8
   2915 	pxor	%xmm7,%xmm2
   2916 	movups	32(%r11),%xmm0
   2917 
   2918 .byte	102,15,56,220,209
   2919 	movups	48(%r11),%xmm1
   2920 	pxor	%xmm9,%xmm7
   2921 
   2922 .byte	102,15,56,220,208
   2923 	movups	64(%r11),%xmm0
   2924 	jmp	L$ocb_enc_loop1
   2925 
   2926 .p2align	5
   2927 L$ocb_enc_loop1:
   2928 .byte	102,15,56,220,209
   2929 	movups	(%rcx,%rax,1),%xmm1
   2930 	addq	$32,%rax
   2931 
   2932 .byte	102,15,56,220,208
   2933 	movups	-16(%rcx,%rax,1),%xmm0
   2934 	jnz	L$ocb_enc_loop1
   2935 
   2936 .byte	102,15,56,220,209
   2937 	movups	16(%r11),%xmm1
   2938 	movq	%r10,%rax
   2939 
   2940 .byte	102,15,56,221,215
   2941 	.byte	0xf3,0xc3
   2942 
   2943 
   2944 .globl	_aesni_ocb_decrypt
   2945 .private_extern _aesni_ocb_decrypt
   2946 
   2947 .p2align	5
   2948 _aesni_ocb_decrypt:
   2949 	leaq	(%rsp),%rax
   2950 	pushq	%rbx
   2951 	pushq	%rbp
   2952 	pushq	%r12
   2953 	pushq	%r13
   2954 	pushq	%r14
   2955 	movq	8(%rax),%rbx
   2956 	movq	8+8(%rax),%rbp
   2957 
   2958 	movl	240(%rcx),%r10d
   2959 	movq	%rcx,%r11
   2960 	shll	$4,%r10d
   2961 	movups	(%rcx),%xmm9
   2962 	movups	16(%rcx,%r10,1),%xmm1
   2963 
   2964 	movdqu	(%r9),%xmm15
   2965 	pxor	%xmm1,%xmm9
   2966 	pxor	%xmm1,%xmm15
   2967 
   2968 	movl	$16+32,%eax
   2969 	leaq	32(%r11,%r10,1),%rcx
   2970 	movups	16(%r11),%xmm1
   2971 	subq	%r10,%rax
   2972 	movq	%rax,%r10
   2973 
   2974 	movdqu	(%rbx),%xmm10
   2975 	movdqu	(%rbp),%xmm8
   2976 
   2977 	testq	$1,%r8
   2978 	jnz	L$ocb_dec_odd
   2979 
   2980 	bsfq	%r8,%r12
   2981 	addq	$1,%r8
   2982 	shlq	$4,%r12
   2983 	movdqu	(%rbx,%r12,1),%xmm7
   2984 	movdqu	(%rdi),%xmm2
   2985 	leaq	16(%rdi),%rdi
   2986 
   2987 	call	__ocb_decrypt1
   2988 
   2989 	movdqa	%xmm7,%xmm15
   2990 	movups	%xmm2,(%rsi)
   2991 	xorps	%xmm2,%xmm8
   2992 	leaq	16(%rsi),%rsi
   2993 	subq	$1,%rdx
   2994 	jz	L$ocb_dec_done
   2995 
   2996 L$ocb_dec_odd:
   2997 	leaq	1(%r8),%r12
   2998 	leaq	3(%r8),%r13
   2999 	leaq	5(%r8),%r14
   3000 	leaq	6(%r8),%r8
   3001 	bsfq	%r12,%r12
   3002 	bsfq	%r13,%r13
   3003 	bsfq	%r14,%r14
   3004 	shlq	$4,%r12
   3005 	shlq	$4,%r13
   3006 	shlq	$4,%r14
   3007 
   3008 	subq	$6,%rdx
   3009 	jc	L$ocb_dec_short
   3010 	jmp	L$ocb_dec_grandloop
   3011 
   3012 .p2align	5
   3013 L$ocb_dec_grandloop:
   3014 	movdqu	0(%rdi),%xmm2
   3015 	movdqu	16(%rdi),%xmm3
   3016 	movdqu	32(%rdi),%xmm4
   3017 	movdqu	48(%rdi),%xmm5
   3018 	movdqu	64(%rdi),%xmm6
   3019 	movdqu	80(%rdi),%xmm7
   3020 	leaq	96(%rdi),%rdi
   3021 
   3022 	call	__ocb_decrypt6
   3023 
   3024 	movups	%xmm2,0(%rsi)
   3025 	pxor	%xmm2,%xmm8
   3026 	movups	%xmm3,16(%rsi)
   3027 	pxor	%xmm3,%xmm8
   3028 	movups	%xmm4,32(%rsi)
   3029 	pxor	%xmm4,%xmm8
   3030 	movups	%xmm5,48(%rsi)
   3031 	pxor	%xmm5,%xmm8
   3032 	movups	%xmm6,64(%rsi)
   3033 	pxor	%xmm6,%xmm8
   3034 	movups	%xmm7,80(%rsi)
   3035 	pxor	%xmm7,%xmm8
   3036 	leaq	96(%rsi),%rsi
   3037 	subq	$6,%rdx
   3038 	jnc	L$ocb_dec_grandloop
   3039 
   3040 L$ocb_dec_short:
   3041 	addq	$6,%rdx
   3042 	jz	L$ocb_dec_done
   3043 
   3044 	movdqu	0(%rdi),%xmm2
   3045 	cmpq	$2,%rdx
   3046 	jb	L$ocb_dec_one
   3047 	movdqu	16(%rdi),%xmm3
   3048 	je	L$ocb_dec_two
   3049 
   3050 	movdqu	32(%rdi),%xmm4
   3051 	cmpq	$4,%rdx
   3052 	jb	L$ocb_dec_three
   3053 	movdqu	48(%rdi),%xmm5
   3054 	je	L$ocb_dec_four
   3055 
   3056 	movdqu	64(%rdi),%xmm6
   3057 	pxor	%xmm7,%xmm7
   3058 
   3059 	call	__ocb_decrypt6
   3060 
   3061 	movdqa	%xmm14,%xmm15
   3062 	movups	%xmm2,0(%rsi)
   3063 	pxor	%xmm2,%xmm8
   3064 	movups	%xmm3,16(%rsi)
   3065 	pxor	%xmm3,%xmm8
   3066 	movups	%xmm4,32(%rsi)
   3067 	pxor	%xmm4,%xmm8
   3068 	movups	%xmm5,48(%rsi)
   3069 	pxor	%xmm5,%xmm8
   3070 	movups	%xmm6,64(%rsi)
   3071 	pxor	%xmm6,%xmm8
   3072 
   3073 	jmp	L$ocb_dec_done
   3074 
   3075 .p2align	4
   3076 L$ocb_dec_one:
   3077 	movdqa	%xmm10,%xmm7
   3078 
   3079 	call	__ocb_decrypt1
   3080 
   3081 	movdqa	%xmm7,%xmm15
   3082 	movups	%xmm2,0(%rsi)
   3083 	xorps	%xmm2,%xmm8
   3084 	jmp	L$ocb_dec_done
   3085 
   3086 .p2align	4
   3087 L$ocb_dec_two:
   3088 	pxor	%xmm4,%xmm4
   3089 	pxor	%xmm5,%xmm5
   3090 
   3091 	call	__ocb_decrypt4
   3092 
   3093 	movdqa	%xmm11,%xmm15
   3094 	movups	%xmm2,0(%rsi)
   3095 	xorps	%xmm2,%xmm8
   3096 	movups	%xmm3,16(%rsi)
   3097 	xorps	%xmm3,%xmm8
   3098 
   3099 	jmp	L$ocb_dec_done
   3100 
   3101 .p2align	4
   3102 L$ocb_dec_three:
   3103 	pxor	%xmm5,%xmm5
   3104 
   3105 	call	__ocb_decrypt4
   3106 
   3107 	movdqa	%xmm12,%xmm15
   3108 	movups	%xmm2,0(%rsi)
   3109 	xorps	%xmm2,%xmm8
   3110 	movups	%xmm3,16(%rsi)
   3111 	xorps	%xmm3,%xmm8
   3112 	movups	%xmm4,32(%rsi)
   3113 	xorps	%xmm4,%xmm8
   3114 
   3115 	jmp	L$ocb_dec_done
   3116 
   3117 .p2align	4
   3118 L$ocb_dec_four:
   3119 	call	__ocb_decrypt4
   3120 
   3121 	movdqa	%xmm13,%xmm15
   3122 	movups	%xmm2,0(%rsi)
   3123 	pxor	%xmm2,%xmm8
   3124 	movups	%xmm3,16(%rsi)
   3125 	pxor	%xmm3,%xmm8
   3126 	movups	%xmm4,32(%rsi)
   3127 	pxor	%xmm4,%xmm8
   3128 	movups	%xmm5,48(%rsi)
   3129 	pxor	%xmm5,%xmm8
   3130 
   3131 L$ocb_dec_done:
   3132 	pxor	%xmm0,%xmm15
   3133 	movdqu	%xmm8,(%rbp)
   3134 	movdqu	%xmm15,(%r9)
   3135 
   3136 	xorps	%xmm0,%xmm0
   3137 	pxor	%xmm1,%xmm1
   3138 	pxor	%xmm2,%xmm2
   3139 	pxor	%xmm3,%xmm3
   3140 	pxor	%xmm4,%xmm4
   3141 	pxor	%xmm5,%xmm5
   3142 	pxor	%xmm6,%xmm6
   3143 	pxor	%xmm7,%xmm7
   3144 	pxor	%xmm8,%xmm8
   3145 	pxor	%xmm9,%xmm9
   3146 	pxor	%xmm10,%xmm10
   3147 	pxor	%xmm11,%xmm11
   3148 	pxor	%xmm12,%xmm12
   3149 	pxor	%xmm13,%xmm13
   3150 	pxor	%xmm14,%xmm14
   3151 	pxor	%xmm15,%xmm15
   3152 	leaq	40(%rsp),%rax
   3153 	movq	-40(%rax),%r14
   3154 	movq	-32(%rax),%r13
   3155 	movq	-24(%rax),%r12
   3156 	movq	-16(%rax),%rbp
   3157 	movq	-8(%rax),%rbx
   3158 	leaq	(%rax),%rsp
   3159 L$ocb_dec_epilogue:
   3160 	.byte	0xf3,0xc3
   3161 
   3162 
   3163 
   3164 .p2align	5
   3165 __ocb_decrypt6:
   3166 	pxor	%xmm9,%xmm15
   3167 	movdqu	(%rbx,%r12,1),%xmm11
   3168 	movdqa	%xmm10,%xmm12
   3169 	movdqu	(%rbx,%r13,1),%xmm13
   3170 	movdqa	%xmm10,%xmm14
   3171 	pxor	%xmm15,%xmm10
   3172 	movdqu	(%rbx,%r14,1),%xmm15
   3173 	pxor	%xmm10,%xmm11
   3174 	pxor	%xmm10,%xmm2
   3175 	pxor	%xmm11,%xmm12
   3176 	pxor	%xmm11,%xmm3
   3177 	pxor	%xmm12,%xmm13
   3178 	pxor	%xmm12,%xmm4
   3179 	pxor	%xmm13,%xmm14
   3180 	pxor	%xmm13,%xmm5
   3181 	pxor	%xmm14,%xmm15
   3182 	pxor	%xmm14,%xmm6
   3183 	pxor	%xmm15,%xmm7
   3184 	movups	32(%r11),%xmm0
   3185 
   3186 	leaq	1(%r8),%r12
   3187 	leaq	3(%r8),%r13
   3188 	leaq	5(%r8),%r14
   3189 	addq	$6,%r8
   3190 	pxor	%xmm9,%xmm10
   3191 	bsfq	%r12,%r12
   3192 	bsfq	%r13,%r13
   3193 	bsfq	%r14,%r14
   3194 
   3195 .byte	102,15,56,222,209
   3196 .byte	102,15,56,222,217
   3197 .byte	102,15,56,222,225
   3198 .byte	102,15,56,222,233
   3199 	pxor	%xmm9,%xmm11
   3200 	pxor	%xmm9,%xmm12
   3201 .byte	102,15,56,222,241
   3202 	pxor	%xmm9,%xmm13
   3203 	pxor	%xmm9,%xmm14
   3204 .byte	102,15,56,222,249
   3205 	movups	48(%r11),%xmm1
   3206 	pxor	%xmm9,%xmm15
   3207 
   3208 .byte	102,15,56,222,208
   3209 .byte	102,15,56,222,216
   3210 .byte	102,15,56,222,224
   3211 .byte	102,15,56,222,232
   3212 .byte	102,15,56,222,240
   3213 .byte	102,15,56,222,248
   3214 	movups	64(%r11),%xmm0
   3215 	shlq	$4,%r12
   3216 	shlq	$4,%r13
   3217 	jmp	L$ocb_dec_loop6
   3218 
   3219 .p2align	5
   3220 L$ocb_dec_loop6:
   3221 .byte	102,15,56,222,209
   3222 .byte	102,15,56,222,217
   3223 .byte	102,15,56,222,225
   3224 .byte	102,15,56,222,233
   3225 .byte	102,15,56,222,241
   3226 .byte	102,15,56,222,249
   3227 	movups	(%rcx,%rax,1),%xmm1
   3228 	addq	$32,%rax
   3229 
   3230 .byte	102,15,56,222,208
   3231 .byte	102,15,56,222,216
   3232 .byte	102,15,56,222,224
   3233 .byte	102,15,56,222,232
   3234 .byte	102,15,56,222,240
   3235 .byte	102,15,56,222,248
   3236 	movups	-16(%rcx,%rax,1),%xmm0
   3237 	jnz	L$ocb_dec_loop6
   3238 
   3239 .byte	102,15,56,222,209
   3240 .byte	102,15,56,222,217
   3241 .byte	102,15,56,222,225
   3242 .byte	102,15,56,222,233
   3243 .byte	102,15,56,222,241
   3244 .byte	102,15,56,222,249
   3245 	movups	16(%r11),%xmm1
   3246 	shlq	$4,%r14
   3247 
   3248 .byte	102,65,15,56,223,210
   3249 	movdqu	(%rbx),%xmm10
   3250 	movq	%r10,%rax
   3251 .byte	102,65,15,56,223,219
   3252 .byte	102,65,15,56,223,228
   3253 .byte	102,65,15,56,223,237
   3254 .byte	102,65,15,56,223,246
   3255 .byte	102,65,15,56,223,255
   3256 	.byte	0xf3,0xc3
   3257 
   3258 
   3259 
   3260 .p2align	5
   3261 __ocb_decrypt4:
   3262 	pxor	%xmm9,%xmm15
   3263 	movdqu	(%rbx,%r12,1),%xmm11
   3264 	movdqa	%xmm10,%xmm12
   3265 	movdqu	(%rbx,%r13,1),%xmm13
   3266 	pxor	%xmm15,%xmm10
   3267 	pxor	%xmm10,%xmm11
   3268 	pxor	%xmm10,%xmm2
   3269 	pxor	%xmm11,%xmm12
   3270 	pxor	%xmm11,%xmm3
   3271 	pxor	%xmm12,%xmm13
   3272 	pxor	%xmm12,%xmm4
   3273 	pxor	%xmm13,%xmm5
   3274 	movups	32(%r11),%xmm0
   3275 
   3276 	pxor	%xmm9,%xmm10
   3277 	pxor	%xmm9,%xmm11
   3278 	pxor	%xmm9,%xmm12
   3279 	pxor	%xmm9,%xmm13
   3280 
   3281 .byte	102,15,56,222,209
   3282 .byte	102,15,56,222,217
   3283 .byte	102,15,56,222,225
   3284 .byte	102,15,56,222,233
   3285 	movups	48(%r11),%xmm1
   3286 
   3287 .byte	102,15,56,222,208
   3288 .byte	102,15,56,222,216
   3289 .byte	102,15,56,222,224
   3290 .byte	102,15,56,222,232
   3291 	movups	64(%r11),%xmm0
   3292 	jmp	L$ocb_dec_loop4
   3293 
   3294 .p2align	5
   3295 L$ocb_dec_loop4:
   3296 .byte	102,15,56,222,209
   3297 .byte	102,15,56,222,217
   3298 .byte	102,15,56,222,225
   3299 .byte	102,15,56,222,233
   3300 	movups	(%rcx,%rax,1),%xmm1
   3301 	addq	$32,%rax
   3302 
   3303 .byte	102,15,56,222,208
   3304 .byte	102,15,56,222,216
   3305 .byte	102,15,56,222,224
   3306 .byte	102,15,56,222,232
   3307 	movups	-16(%rcx,%rax,1),%xmm0
   3308 	jnz	L$ocb_dec_loop4
   3309 
   3310 .byte	102,15,56,222,209
   3311 .byte	102,15,56,222,217
   3312 .byte	102,15,56,222,225
   3313 .byte	102,15,56,222,233
   3314 	movups	16(%r11),%xmm1
   3315 	movq	%r10,%rax
   3316 
   3317 .byte	102,65,15,56,223,210
   3318 .byte	102,65,15,56,223,219
   3319 .byte	102,65,15,56,223,228
   3320 .byte	102,65,15,56,223,237
   3321 	.byte	0xf3,0xc3
   3322 
   3323 
   3324 
   3325 .p2align	5
   3326 __ocb_decrypt1:
   3327 	pxor	%xmm15,%xmm7
   3328 	pxor	%xmm9,%xmm7
   3329 	pxor	%xmm7,%xmm2
   3330 	movups	32(%r11),%xmm0
   3331 
   3332 .byte	102,15,56,222,209
   3333 	movups	48(%r11),%xmm1
   3334 	pxor	%xmm9,%xmm7
   3335 
   3336 .byte	102,15,56,222,208
   3337 	movups	64(%r11),%xmm0
   3338 	jmp	L$ocb_dec_loop1
   3339 
   3340 .p2align	5
   3341 L$ocb_dec_loop1:
   3342 .byte	102,15,56,222,209
   3343 	movups	(%rcx,%rax,1),%xmm1
   3344 	addq	$32,%rax
   3345 
   3346 .byte	102,15,56,222,208
   3347 	movups	-16(%rcx,%rax,1),%xmm0
   3348 	jnz	L$ocb_dec_loop1
   3349 
   3350 .byte	102,15,56,222,209
   3351 	movups	16(%r11),%xmm1
   3352 	movq	%r10,%rax
   3353 
   3354 .byte	102,15,56,223,215
   3355 	.byte	0xf3,0xc3
   3356 
   3357 .globl	_aesni_cbc_encrypt
   3358 .private_extern _aesni_cbc_encrypt
   3359 
   3360 .p2align	4
   3361 _aesni_cbc_encrypt:
   3362 	testq	%rdx,%rdx
   3363 	jz	L$cbc_ret
   3364 
   3365 	movl	240(%rcx),%r10d
   3366 	movq	%rcx,%r11
   3367 	testl	%r9d,%r9d
   3368 	jz	L$cbc_decrypt
   3369 
   3370 	movups	(%r8),%xmm2
   3371 	movl	%r10d,%eax
   3372 	cmpq	$16,%rdx
   3373 	jb	L$cbc_enc_tail
   3374 	subq	$16,%rdx
   3375 	jmp	L$cbc_enc_loop
   3376 .p2align	4
   3377 L$cbc_enc_loop:
   3378 	movups	(%rdi),%xmm3
   3379 	leaq	16(%rdi),%rdi
   3380 
   3381 	movups	(%rcx),%xmm0
   3382 	movups	16(%rcx),%xmm1
   3383 	xorps	%xmm0,%xmm3
   3384 	leaq	32(%rcx),%rcx
   3385 	xorps	%xmm3,%xmm2
   3386 L$oop_enc1_15:
   3387 .byte	102,15,56,220,209
   3388 	decl	%eax
   3389 	movups	(%rcx),%xmm1
   3390 	leaq	16(%rcx),%rcx
   3391 	jnz	L$oop_enc1_15
   3392 .byte	102,15,56,221,209
   3393 	movl	%r10d,%eax
   3394 	movq	%r11,%rcx
   3395 	movups	%xmm2,0(%rsi)
   3396 	leaq	16(%rsi),%rsi
   3397 	subq	$16,%rdx
   3398 	jnc	L$cbc_enc_loop
   3399 	addq	$16,%rdx
   3400 	jnz	L$cbc_enc_tail
   3401 	pxor	%xmm0,%xmm0
   3402 	pxor	%xmm1,%xmm1
   3403 	movups	%xmm2,(%r8)
   3404 	pxor	%xmm2,%xmm2
   3405 	pxor	%xmm3,%xmm3
   3406 	jmp	L$cbc_ret
   3407 
   3408 L$cbc_enc_tail:
   3409 	movq	%rdx,%rcx
   3410 	xchgq	%rdi,%rsi
   3411 .long	0x9066A4F3
   3412 	movl	$16,%ecx
   3413 	subq	%rdx,%rcx
   3414 	xorl	%eax,%eax
   3415 .long	0x9066AAF3
   3416 	leaq	-16(%rdi),%rdi
   3417 	movl	%r10d,%eax
   3418 	movq	%rdi,%rsi
   3419 	movq	%r11,%rcx
   3420 	xorq	%rdx,%rdx
   3421 	jmp	L$cbc_enc_loop
   3422 
   3423 .p2align	4
   3424 L$cbc_decrypt:
   3425 	cmpq	$16,%rdx
   3426 	jne	L$cbc_decrypt_bulk
   3427 
   3428 
   3429 
   3430 	movdqu	(%rdi),%xmm2
   3431 	movdqu	(%r8),%xmm3
   3432 	movdqa	%xmm2,%xmm4
   3433 	movups	(%rcx),%xmm0
   3434 	movups	16(%rcx),%xmm1
   3435 	leaq	32(%rcx),%rcx
   3436 	xorps	%xmm0,%xmm2
   3437 L$oop_dec1_16:
   3438 .byte	102,15,56,222,209
   3439 	decl	%r10d
   3440 	movups	(%rcx),%xmm1
   3441 	leaq	16(%rcx),%rcx
   3442 	jnz	L$oop_dec1_16
   3443 .byte	102,15,56,223,209
   3444 	pxor	%xmm0,%xmm0
   3445 	pxor	%xmm1,%xmm1
   3446 	movdqu	%xmm4,(%r8)
   3447 	xorps	%xmm3,%xmm2
   3448 	pxor	%xmm3,%xmm3
   3449 	movups	%xmm2,(%rsi)
   3450 	pxor	%xmm2,%xmm2
   3451 	jmp	L$cbc_ret
   3452 .p2align	4
   3453 L$cbc_decrypt_bulk:
   3454 	leaq	(%rsp),%r11
   3455 	pushq	%rbp
   3456 	subq	$16,%rsp
   3457 	andq	$-16,%rsp
   3458 	movq	%rcx,%rbp
   3459 	movups	(%r8),%xmm10
   3460 	movl	%r10d,%eax
   3461 	cmpq	$0x50,%rdx
   3462 	jbe	L$cbc_dec_tail
   3463 
   3464 	movups	(%rcx),%xmm0
   3465 	movdqu	0(%rdi),%xmm2
   3466 	movdqu	16(%rdi),%xmm3
   3467 	movdqa	%xmm2,%xmm11
   3468 	movdqu	32(%rdi),%xmm4
   3469 	movdqa	%xmm3,%xmm12
   3470 	movdqu	48(%rdi),%xmm5
   3471 	movdqa	%xmm4,%xmm13
   3472 	movdqu	64(%rdi),%xmm6
   3473 	movdqa	%xmm5,%xmm14
   3474 	movdqu	80(%rdi),%xmm7
   3475 	movdqa	%xmm6,%xmm15
   3476 	leaq	_OPENSSL_ia32cap_P(%rip),%r9
   3477 	movl	4(%r9),%r9d
   3478 	cmpq	$0x70,%rdx
   3479 	jbe	L$cbc_dec_six_or_seven
   3480 
   3481 	andl	$71303168,%r9d
   3482 	subq	$0x50,%rdx
   3483 	cmpl	$4194304,%r9d
   3484 	je	L$cbc_dec_loop6_enter
   3485 	subq	$0x20,%rdx
   3486 	leaq	112(%rcx),%rcx
   3487 	jmp	L$cbc_dec_loop8_enter
   3488 .p2align	4
   3489 L$cbc_dec_loop8:
   3490 	movups	%xmm9,(%rsi)
   3491 	leaq	16(%rsi),%rsi
   3492 L$cbc_dec_loop8_enter:
   3493 	movdqu	96(%rdi),%xmm8
   3494 	pxor	%xmm0,%xmm2
   3495 	movdqu	112(%rdi),%xmm9
   3496 	pxor	%xmm0,%xmm3
   3497 	movups	16-112(%rcx),%xmm1
   3498 	pxor	%xmm0,%xmm4
   3499 	movq	$-1,%rbp
   3500 	cmpq	$0x70,%rdx
   3501 	pxor	%xmm0,%xmm5
   3502 	pxor	%xmm0,%xmm6
   3503 	pxor	%xmm0,%xmm7
   3504 	pxor	%xmm0,%xmm8
   3505 
   3506 .byte	102,15,56,222,209
   3507 	pxor	%xmm0,%xmm9
   3508 	movups	32-112(%rcx),%xmm0
   3509 .byte	102,15,56,222,217
   3510 .byte	102,15,56,222,225
   3511 .byte	102,15,56,222,233
   3512 .byte	102,15,56,222,241
   3513 .byte	102,15,56,222,249
   3514 .byte	102,68,15,56,222,193
   3515 	adcq	$0,%rbp
   3516 	andq	$128,%rbp
   3517 .byte	102,68,15,56,222,201
   3518 	addq	%rdi,%rbp
   3519 	movups	48-112(%rcx),%xmm1
   3520 .byte	102,15,56,222,208
   3521 .byte	102,15,56,222,216
   3522 .byte	102,15,56,222,224
   3523 .byte	102,15,56,222,232
   3524 .byte	102,15,56,222,240
   3525 .byte	102,15,56,222,248
   3526 .byte	102,68,15,56,222,192
   3527 .byte	102,68,15,56,222,200
   3528 	movups	64-112(%rcx),%xmm0
   3529 	nop
   3530 .byte	102,15,56,222,209
   3531 .byte	102,15,56,222,217
   3532 .byte	102,15,56,222,225
   3533 .byte	102,15,56,222,233
   3534 .byte	102,15,56,222,241
   3535 .byte	102,15,56,222,249
   3536 .byte	102,68,15,56,222,193
   3537 .byte	102,68,15,56,222,201
   3538 	movups	80-112(%rcx),%xmm1
   3539 	nop
   3540 .byte	102,15,56,222,208
   3541 .byte	102,15,56,222,216
   3542 .byte	102,15,56,222,224
   3543 .byte	102,15,56,222,232
   3544 .byte	102,15,56,222,240
   3545 .byte	102,15,56,222,248
   3546 .byte	102,68,15,56,222,192
   3547 .byte	102,68,15,56,222,200
   3548 	movups	96-112(%rcx),%xmm0
   3549 	nop
   3550 .byte	102,15,56,222,209
   3551 .byte	102,15,56,222,217
   3552 .byte	102,15,56,222,225
   3553 .byte	102,15,56,222,233
   3554 .byte	102,15,56,222,241
   3555 .byte	102,15,56,222,249
   3556 .byte	102,68,15,56,222,193
   3557 .byte	102,68,15,56,222,201
   3558 	movups	112-112(%rcx),%xmm1
   3559 	nop
   3560 .byte	102,15,56,222,208
   3561 .byte	102,15,56,222,216
   3562 .byte	102,15,56,222,224
   3563 .byte	102,15,56,222,232
   3564 .byte	102,15,56,222,240
   3565 .byte	102,15,56,222,248
   3566 .byte	102,68,15,56,222,192
   3567 .byte	102,68,15,56,222,200
   3568 	movups	128-112(%rcx),%xmm0
   3569 	nop
   3570 .byte	102,15,56,222,209
   3571 .byte	102,15,56,222,217
   3572 .byte	102,15,56,222,225
   3573 .byte	102,15,56,222,233
   3574 .byte	102,15,56,222,241
   3575 .byte	102,15,56,222,249
   3576 .byte	102,68,15,56,222,193
   3577 .byte	102,68,15,56,222,201
   3578 	movups	144-112(%rcx),%xmm1
   3579 	cmpl	$11,%eax
   3580 .byte	102,15,56,222,208
   3581 .byte	102,15,56,222,216
   3582 .byte	102,15,56,222,224
   3583 .byte	102,15,56,222,232
   3584 .byte	102,15,56,222,240
   3585 .byte	102,15,56,222,248
   3586 .byte	102,68,15,56,222,192
   3587 .byte	102,68,15,56,222,200
   3588 	movups	160-112(%rcx),%xmm0
   3589 	jb	L$cbc_dec_done
   3590 .byte	102,15,56,222,209
   3591 .byte	102,15,56,222,217
   3592 .byte	102,15,56,222,225
   3593 .byte	102,15,56,222,233
   3594 .byte	102,15,56,222,241
   3595 .byte	102,15,56,222,249
   3596 .byte	102,68,15,56,222,193
   3597 .byte	102,68,15,56,222,201
   3598 	movups	176-112(%rcx),%xmm1
   3599 	nop
   3600 .byte	102,15,56,222,208
   3601 .byte	102,15,56,222,216
   3602 .byte	102,15,56,222,224
   3603 .byte	102,15,56,222,232
   3604 .byte	102,15,56,222,240
   3605 .byte	102,15,56,222,248
   3606 .byte	102,68,15,56,222,192
   3607 .byte	102,68,15,56,222,200
   3608 	movups	192-112(%rcx),%xmm0
   3609 	je	L$cbc_dec_done
   3610 .byte	102,15,56,222,209
   3611 .byte	102,15,56,222,217
   3612 .byte	102,15,56,222,225
   3613 .byte	102,15,56,222,233
   3614 .byte	102,15,56,222,241
   3615 .byte	102,15,56,222,249
   3616 .byte	102,68,15,56,222,193
   3617 .byte	102,68,15,56,222,201
   3618 	movups	208-112(%rcx),%xmm1
   3619 	nop
   3620 .byte	102,15,56,222,208
   3621 .byte	102,15,56,222,216
   3622 .byte	102,15,56,222,224
   3623 .byte	102,15,56,222,232
   3624 .byte	102,15,56,222,240
   3625 .byte	102,15,56,222,248
   3626 .byte	102,68,15,56,222,192
   3627 .byte	102,68,15,56,222,200
   3628 	movups	224-112(%rcx),%xmm0
   3629 	jmp	L$cbc_dec_done
   3630 .p2align	4
   3631 L$cbc_dec_done:
   3632 .byte	102,15,56,222,209
   3633 .byte	102,15,56,222,217
   3634 	pxor	%xmm0,%xmm10
   3635 	pxor	%xmm0,%xmm11
   3636 .byte	102,15,56,222,225
   3637 .byte	102,15,56,222,233
   3638 	pxor	%xmm0,%xmm12
   3639 	pxor	%xmm0,%xmm13
   3640 .byte	102,15,56,222,241
   3641 .byte	102,15,56,222,249
   3642 	pxor	%xmm0,%xmm14
   3643 	pxor	%xmm0,%xmm15
   3644 .byte	102,68,15,56,222,193
   3645 .byte	102,68,15,56,222,201
   3646 	movdqu	80(%rdi),%xmm1
   3647 
   3648 .byte	102,65,15,56,223,210
   3649 	movdqu	96(%rdi),%xmm10
   3650 	pxor	%xmm0,%xmm1
   3651 .byte	102,65,15,56,223,219
   3652 	pxor	%xmm0,%xmm10
   3653 	movdqu	112(%rdi),%xmm0
   3654 .byte	102,65,15,56,223,228
   3655 	leaq	128(%rdi),%rdi
   3656 	movdqu	0(%rbp),%xmm11
   3657 .byte	102,65,15,56,223,237
   3658 .byte	102,65,15,56,223,246
   3659 	movdqu	16(%rbp),%xmm12
   3660 	movdqu	32(%rbp),%xmm13
   3661 .byte	102,65,15,56,223,255
   3662 .byte	102,68,15,56,223,193
   3663 	movdqu	48(%rbp),%xmm14
   3664 	movdqu	64(%rbp),%xmm15
   3665 .byte	102,69,15,56,223,202
   3666 	movdqa	%xmm0,%xmm10
   3667 	movdqu	80(%rbp),%xmm1
   3668 	movups	-112(%rcx),%xmm0
   3669 
   3670 	movups	%xmm2,(%rsi)
   3671 	movdqa	%xmm11,%xmm2
   3672 	movups	%xmm3,16(%rsi)
   3673 	movdqa	%xmm12,%xmm3
   3674 	movups	%xmm4,32(%rsi)
   3675 	movdqa	%xmm13,%xmm4
   3676 	movups	%xmm5,48(%rsi)
   3677 	movdqa	%xmm14,%xmm5
   3678 	movups	%xmm6,64(%rsi)
   3679 	movdqa	%xmm15,%xmm6
   3680 	movups	%xmm7,80(%rsi)
   3681 	movdqa	%xmm1,%xmm7
   3682 	movups	%xmm8,96(%rsi)
   3683 	leaq	112(%rsi),%rsi
   3684 
   3685 	subq	$0x80,%rdx
   3686 	ja	L$cbc_dec_loop8
   3687 
   3688 	movaps	%xmm9,%xmm2
   3689 	leaq	-112(%rcx),%rcx
   3690 	addq	$0x70,%rdx
   3691 	jle	L$cbc_dec_clear_tail_collected
   3692 	movups	%xmm9,(%rsi)
   3693 	leaq	16(%rsi),%rsi
   3694 	cmpq	$0x50,%rdx
   3695 	jbe	L$cbc_dec_tail
   3696 
   3697 	movaps	%xmm11,%xmm2
   3698 L$cbc_dec_six_or_seven:
   3699 	cmpq	$0x60,%rdx
   3700 	ja	L$cbc_dec_seven
   3701 
   3702 	movaps	%xmm7,%xmm8
   3703 	call	_aesni_decrypt6
   3704 	pxor	%xmm10,%xmm2
   3705 	movaps	%xmm8,%xmm10
   3706 	pxor	%xmm11,%xmm3
   3707 	movdqu	%xmm2,(%rsi)
   3708 	pxor	%xmm12,%xmm4
   3709 	movdqu	%xmm3,16(%rsi)
   3710 	pxor	%xmm3,%xmm3
   3711 	pxor	%xmm13,%xmm5
   3712 	movdqu	%xmm4,32(%rsi)
   3713 	pxor	%xmm4,%xmm4
   3714 	pxor	%xmm14,%xmm6
   3715 	movdqu	%xmm5,48(%rsi)
   3716 	pxor	%xmm5,%xmm5
   3717 	pxor	%xmm15,%xmm7
   3718 	movdqu	%xmm6,64(%rsi)
   3719 	pxor	%xmm6,%xmm6
   3720 	leaq	80(%rsi),%rsi
   3721 	movdqa	%xmm7,%xmm2
   3722 	pxor	%xmm7,%xmm7
   3723 	jmp	L$cbc_dec_tail_collected
   3724 
   3725 .p2align	4
   3726 L$cbc_dec_seven:
   3727 	movups	96(%rdi),%xmm8
   3728 	xorps	%xmm9,%xmm9
   3729 	call	_aesni_decrypt8
   3730 	movups	80(%rdi),%xmm9
   3731 	pxor	%xmm10,%xmm2
   3732 	movups	96(%rdi),%xmm10
   3733 	pxor	%xmm11,%xmm3
   3734 	movdqu	%xmm2,(%rsi)
   3735 	pxor	%xmm12,%xmm4
   3736 	movdqu	%xmm3,16(%rsi)
   3737 	pxor	%xmm3,%xmm3
   3738 	pxor	%xmm13,%xmm5
   3739 	movdqu	%xmm4,32(%rsi)
   3740 	pxor	%xmm4,%xmm4
   3741 	pxor	%xmm14,%xmm6
   3742 	movdqu	%xmm5,48(%rsi)
   3743 	pxor	%xmm5,%xmm5
   3744 	pxor	%xmm15,%xmm7
   3745 	movdqu	%xmm6,64(%rsi)
   3746 	pxor	%xmm6,%xmm6
   3747 	pxor	%xmm9,%xmm8
   3748 	movdqu	%xmm7,80(%rsi)
   3749 	pxor	%xmm7,%xmm7
   3750 	leaq	96(%rsi),%rsi
   3751 	movdqa	%xmm8,%xmm2
   3752 	pxor	%xmm8,%xmm8
   3753 	pxor	%xmm9,%xmm9
   3754 	jmp	L$cbc_dec_tail_collected
   3755 
   3756 .p2align	4
   3757 L$cbc_dec_loop6:
   3758 	movups	%xmm7,(%rsi)
   3759 	leaq	16(%rsi),%rsi
   3760 	movdqu	0(%rdi),%xmm2
   3761 	movdqu	16(%rdi),%xmm3
   3762 	movdqa	%xmm2,%xmm11
   3763 	movdqu	32(%rdi),%xmm4
   3764 	movdqa	%xmm3,%xmm12
   3765 	movdqu	48(%rdi),%xmm5
   3766 	movdqa	%xmm4,%xmm13
   3767 	movdqu	64(%rdi),%xmm6
   3768 	movdqa	%xmm5,%xmm14
   3769 	movdqu	80(%rdi),%xmm7
   3770 	movdqa	%xmm6,%xmm15
   3771 L$cbc_dec_loop6_enter:
   3772 	leaq	96(%rdi),%rdi
   3773 	movdqa	%xmm7,%xmm8
   3774 
   3775 	call	_aesni_decrypt6
   3776 
   3777 	pxor	%xmm10,%xmm2
   3778 	movdqa	%xmm8,%xmm10
   3779 	pxor	%xmm11,%xmm3
   3780 	movdqu	%xmm2,(%rsi)
   3781 	pxor	%xmm12,%xmm4
   3782 	movdqu	%xmm3,16(%rsi)
   3783 	pxor	%xmm13,%xmm5
   3784 	movdqu	%xmm4,32(%rsi)
   3785 	pxor	%xmm14,%xmm6
   3786 	movq	%rbp,%rcx
   3787 	movdqu	%xmm5,48(%rsi)
   3788 	pxor	%xmm15,%xmm7
   3789 	movl	%r10d,%eax
   3790 	movdqu	%xmm6,64(%rsi)
   3791 	leaq	80(%rsi),%rsi
   3792 	subq	$0x60,%rdx
   3793 	ja	L$cbc_dec_loop6
   3794 
   3795 	movdqa	%xmm7,%xmm2
   3796 	addq	$0x50,%rdx
   3797 	jle	L$cbc_dec_clear_tail_collected
   3798 	movups	%xmm7,(%rsi)
   3799 	leaq	16(%rsi),%rsi
   3800 
   3801 L$cbc_dec_tail:
   3802 	movups	(%rdi),%xmm2
   3803 	subq	$0x10,%rdx
   3804 	jbe	L$cbc_dec_one
   3805 
   3806 	movups	16(%rdi),%xmm3
   3807 	movaps	%xmm2,%xmm11
   3808 	subq	$0x10,%rdx
   3809 	jbe	L$cbc_dec_two
   3810 
   3811 	movups	32(%rdi),%xmm4
   3812 	movaps	%xmm3,%xmm12
   3813 	subq	$0x10,%rdx
   3814 	jbe	L$cbc_dec_three
   3815 
   3816 	movups	48(%rdi),%xmm5
   3817 	movaps	%xmm4,%xmm13
   3818 	subq	$0x10,%rdx
   3819 	jbe	L$cbc_dec_four
   3820 
   3821 	movups	64(%rdi),%xmm6
   3822 	movaps	%xmm5,%xmm14
   3823 	movaps	%xmm6,%xmm15
   3824 	xorps	%xmm7,%xmm7
   3825 	call	_aesni_decrypt6
   3826 	pxor	%xmm10,%xmm2
   3827 	movaps	%xmm15,%xmm10
   3828 	pxor	%xmm11,%xmm3
   3829 	movdqu	%xmm2,(%rsi)
   3830 	pxor	%xmm12,%xmm4
   3831 	movdqu	%xmm3,16(%rsi)
   3832 	pxor	%xmm3,%xmm3
   3833 	pxor	%xmm13,%xmm5
   3834 	movdqu	%xmm4,32(%rsi)
   3835 	pxor	%xmm4,%xmm4
   3836 	pxor	%xmm14,%xmm6
   3837 	movdqu	%xmm5,48(%rsi)
   3838 	pxor	%xmm5,%xmm5
   3839 	leaq	64(%rsi),%rsi
   3840 	movdqa	%xmm6,%xmm2
   3841 	pxor	%xmm6,%xmm6
   3842 	pxor	%xmm7,%xmm7
   3843 	subq	$0x10,%rdx
   3844 	jmp	L$cbc_dec_tail_collected
   3845 
   3846 .p2align	4
   3847 L$cbc_dec_one:
   3848 	movaps	%xmm2,%xmm11
   3849 	movups	(%rcx),%xmm0
   3850 	movups	16(%rcx),%xmm1
   3851 	leaq	32(%rcx),%rcx
   3852 	xorps	%xmm0,%xmm2
   3853 L$oop_dec1_17:
   3854 .byte	102,15,56,222,209
   3855 	decl	%eax
   3856 	movups	(%rcx),%xmm1
   3857 	leaq	16(%rcx),%rcx
   3858 	jnz	L$oop_dec1_17
   3859 .byte	102,15,56,223,209
   3860 	xorps	%xmm10,%xmm2
   3861 	movaps	%xmm11,%xmm10
   3862 	jmp	L$cbc_dec_tail_collected
   3863 .p2align	4
   3864 L$cbc_dec_two:
   3865 	movaps	%xmm3,%xmm12
   3866 	call	_aesni_decrypt2
   3867 	pxor	%xmm10,%xmm2
   3868 	movaps	%xmm12,%xmm10
   3869 	pxor	%xmm11,%xmm3
   3870 	movdqu	%xmm2,(%rsi)
   3871 	movdqa	%xmm3,%xmm2
   3872 	pxor	%xmm3,%xmm3
   3873 	leaq	16(%rsi),%rsi
   3874 	jmp	L$cbc_dec_tail_collected
   3875 .p2align	4
   3876 L$cbc_dec_three:
   3877 	movaps	%xmm4,%xmm13
   3878 	call	_aesni_decrypt3
   3879 	pxor	%xmm10,%xmm2
   3880 	movaps	%xmm13,%xmm10
   3881 	pxor	%xmm11,%xmm3
   3882 	movdqu	%xmm2,(%rsi)
   3883 	pxor	%xmm12,%xmm4
   3884 	movdqu	%xmm3,16(%rsi)
   3885 	pxor	%xmm3,%xmm3
   3886 	movdqa	%xmm4,%xmm2
   3887 	pxor	%xmm4,%xmm4
   3888 	leaq	32(%rsi),%rsi
   3889 	jmp	L$cbc_dec_tail_collected
   3890 .p2align	4
   3891 L$cbc_dec_four:
   3892 	movaps	%xmm5,%xmm14
   3893 	call	_aesni_decrypt4
   3894 	pxor	%xmm10,%xmm2
   3895 	movaps	%xmm14,%xmm10
   3896 	pxor	%xmm11,%xmm3
   3897 	movdqu	%xmm2,(%rsi)
   3898 	pxor	%xmm12,%xmm4
   3899 	movdqu	%xmm3,16(%rsi)
   3900 	pxor	%xmm3,%xmm3
   3901 	pxor	%xmm13,%xmm5
   3902 	movdqu	%xmm4,32(%rsi)
   3903 	pxor	%xmm4,%xmm4
   3904 	movdqa	%xmm5,%xmm2
   3905 	pxor	%xmm5,%xmm5
   3906 	leaq	48(%rsi),%rsi
   3907 	jmp	L$cbc_dec_tail_collected
   3908 
   3909 .p2align	4
   3910 L$cbc_dec_clear_tail_collected:
   3911 	pxor	%xmm3,%xmm3
   3912 	pxor	%xmm4,%xmm4
   3913 	pxor	%xmm5,%xmm5
   3914 	pxor	%xmm6,%xmm6
   3915 	pxor	%xmm7,%xmm7
   3916 	pxor	%xmm8,%xmm8
   3917 	pxor	%xmm9,%xmm9
   3918 L$cbc_dec_tail_collected:
   3919 	movups	%xmm10,(%r8)
   3920 	andq	$15,%rdx
   3921 	jnz	L$cbc_dec_tail_partial
   3922 	movups	%xmm2,(%rsi)
   3923 	pxor	%xmm2,%xmm2
   3924 	jmp	L$cbc_dec_ret
   3925 .p2align	4
   3926 L$cbc_dec_tail_partial:
   3927 	movaps	%xmm2,(%rsp)
   3928 	pxor	%xmm2,%xmm2
   3929 	movq	$16,%rcx
   3930 	movq	%rsi,%rdi
   3931 	subq	%rdx,%rcx
   3932 	leaq	(%rsp),%rsi
   3933 .long	0x9066A4F3
   3934 	movdqa	%xmm2,(%rsp)
   3935 
   3936 L$cbc_dec_ret:
   3937 	xorps	%xmm0,%xmm0
   3938 	pxor	%xmm1,%xmm1
   3939 	movq	-8(%r11),%rbp
   3940 	leaq	(%r11),%rsp
   3941 L$cbc_ret:
   3942 	.byte	0xf3,0xc3
   3943 
   3944 .globl	_aesni_set_decrypt_key
   3945 .private_extern _aesni_set_decrypt_key
   3946 
   3947 .p2align	4
   3948 _aesni_set_decrypt_key:
   3949 .byte	0x48,0x83,0xEC,0x08
   3950 	call	__aesni_set_encrypt_key
   3951 	shll	$4,%esi
   3952 	testl	%eax,%eax
   3953 	jnz	L$dec_key_ret
   3954 	leaq	16(%rdx,%rsi,1),%rdi
   3955 
   3956 	movups	(%rdx),%xmm0
   3957 	movups	(%rdi),%xmm1
   3958 	movups	%xmm0,(%rdi)
   3959 	movups	%xmm1,(%rdx)
   3960 	leaq	16(%rdx),%rdx
   3961 	leaq	-16(%rdi),%rdi
   3962 
   3963 L$dec_key_inverse:
   3964 	movups	(%rdx),%xmm0
   3965 	movups	(%rdi),%xmm1
   3966 .byte	102,15,56,219,192
   3967 .byte	102,15,56,219,201
   3968 	leaq	16(%rdx),%rdx
   3969 	leaq	-16(%rdi),%rdi
   3970 	movups	%xmm0,16(%rdi)
   3971 	movups	%xmm1,-16(%rdx)
   3972 	cmpq	%rdx,%rdi
   3973 	ja	L$dec_key_inverse
   3974 
   3975 	movups	(%rdx),%xmm0
   3976 .byte	102,15,56,219,192
   3977 	pxor	%xmm1,%xmm1
   3978 	movups	%xmm0,(%rdi)
   3979 	pxor	%xmm0,%xmm0
   3980 L$dec_key_ret:
   3981 	addq	$8,%rsp
   3982 	.byte	0xf3,0xc3
   3983 L$SEH_end_set_decrypt_key:
   3984 
   3985 .globl	_aesni_set_encrypt_key
   3986 .private_extern _aesni_set_encrypt_key
   3987 
   3988 .p2align	4
   3989 _aesni_set_encrypt_key:
   3990 __aesni_set_encrypt_key:
   3991 .byte	0x48,0x83,0xEC,0x08
   3992 	movq	$-1,%rax
   3993 	testq	%rdi,%rdi
   3994 	jz	L$enc_key_ret
   3995 	testq	%rdx,%rdx
   3996 	jz	L$enc_key_ret
   3997 
   3998 	movups	(%rdi),%xmm0
   3999 	xorps	%xmm4,%xmm4
   4000 	leaq	_OPENSSL_ia32cap_P(%rip),%r10
   4001 	movl	4(%r10),%r10d
   4002 	andl	$268437504,%r10d
   4003 	leaq	16(%rdx),%rax
   4004 	cmpl	$256,%esi
   4005 	je	L$14rounds
   4006 	cmpl	$192,%esi
   4007 	je	L$12rounds
   4008 	cmpl	$128,%esi
   4009 	jne	L$bad_keybits
   4010 
   4011 L$10rounds:
   4012 	movl	$9,%esi
   4013 	cmpl	$268435456,%r10d
   4014 	je	L$10rounds_alt
   4015 
   4016 	movups	%xmm0,(%rdx)
   4017 .byte	102,15,58,223,200,1
   4018 	call	L$key_expansion_128_cold
   4019 .byte	102,15,58,223,200,2
   4020 	call	L$key_expansion_128
   4021 .byte	102,15,58,223,200,4
   4022 	call	L$key_expansion_128
   4023 .byte	102,15,58,223,200,8
   4024 	call	L$key_expansion_128
   4025 .byte	102,15,58,223,200,16
   4026 	call	L$key_expansion_128
   4027 .byte	102,15,58,223,200,32
   4028 	call	L$key_expansion_128
   4029 .byte	102,15,58,223,200,64
   4030 	call	L$key_expansion_128
   4031 .byte	102,15,58,223,200,128
   4032 	call	L$key_expansion_128
   4033 .byte	102,15,58,223,200,27
   4034 	call	L$key_expansion_128
   4035 .byte	102,15,58,223,200,54
   4036 	call	L$key_expansion_128
   4037 	movups	%xmm0,(%rax)
   4038 	movl	%esi,80(%rax)
   4039 	xorl	%eax,%eax
   4040 	jmp	L$enc_key_ret
   4041 
   4042 .p2align	4
   4043 L$10rounds_alt:
   4044 	movdqa	L$key_rotate(%rip),%xmm5
   4045 	movl	$8,%r10d
   4046 	movdqa	L$key_rcon1(%rip),%xmm4
   4047 	movdqa	%xmm0,%xmm2
   4048 	movdqu	%xmm0,(%rdx)
   4049 	jmp	L$oop_key128
   4050 
   4051 .p2align	4
   4052 L$oop_key128:
   4053 .byte	102,15,56,0,197
   4054 .byte	102,15,56,221,196
   4055 	pslld	$1,%xmm4
   4056 	leaq	16(%rax),%rax
   4057 
   4058 	movdqa	%xmm2,%xmm3
   4059 	pslldq	$4,%xmm2
   4060 	pxor	%xmm2,%xmm3
   4061 	pslldq	$4,%xmm2
   4062 	pxor	%xmm2,%xmm3
   4063 	pslldq	$4,%xmm2
   4064 	pxor	%xmm3,%xmm2
   4065 
   4066 	pxor	%xmm2,%xmm0
   4067 	movdqu	%xmm0,-16(%rax)
   4068 	movdqa	%xmm0,%xmm2
   4069 
   4070 	decl	%r10d
   4071 	jnz	L$oop_key128
   4072 
   4073 	movdqa	L$key_rcon1b(%rip),%xmm4
   4074 
   4075 .byte	102,15,56,0,197
   4076 .byte	102,15,56,221,196
   4077 	pslld	$1,%xmm4
   4078 
   4079 	movdqa	%xmm2,%xmm3
   4080 	pslldq	$4,%xmm2
   4081 	pxor	%xmm2,%xmm3
   4082 	pslldq	$4,%xmm2
   4083 	pxor	%xmm2,%xmm3
   4084 	pslldq	$4,%xmm2
   4085 	pxor	%xmm3,%xmm2
   4086 
   4087 	pxor	%xmm2,%xmm0
   4088 	movdqu	%xmm0,(%rax)
   4089 
   4090 	movdqa	%xmm0,%xmm2
   4091 .byte	102,15,56,0,197
   4092 .byte	102,15,56,221,196
   4093 
   4094 	movdqa	%xmm2,%xmm3
   4095 	pslldq	$4,%xmm2
   4096 	pxor	%xmm2,%xmm3
   4097 	pslldq	$4,%xmm2
   4098 	pxor	%xmm2,%xmm3
   4099 	pslldq	$4,%xmm2
   4100 	pxor	%xmm3,%xmm2
   4101 
   4102 	pxor	%xmm2,%xmm0
   4103 	movdqu	%xmm0,16(%rax)
   4104 
   4105 	movl	%esi,96(%rax)
   4106 	xorl	%eax,%eax
   4107 	jmp	L$enc_key_ret
   4108 
   4109 .p2align	4
   4110 L$12rounds:
   4111 	movq	16(%rdi),%xmm2
   4112 	movl	$11,%esi
   4113 	cmpl	$268435456,%r10d
   4114 	je	L$12rounds_alt
   4115 
   4116 	movups	%xmm0,(%rdx)
   4117 .byte	102,15,58,223,202,1
   4118 	call	L$key_expansion_192a_cold
   4119 .byte	102,15,58,223,202,2
   4120 	call	L$key_expansion_192b
   4121 .byte	102,15,58,223,202,4
   4122 	call	L$key_expansion_192a
   4123 .byte	102,15,58,223,202,8
   4124 	call	L$key_expansion_192b
   4125 .byte	102,15,58,223,202,16
   4126 	call	L$key_expansion_192a
   4127 .byte	102,15,58,223,202,32
   4128 	call	L$key_expansion_192b
   4129 .byte	102,15,58,223,202,64
   4130 	call	L$key_expansion_192a
   4131 .byte	102,15,58,223,202,128
   4132 	call	L$key_expansion_192b
   4133 	movups	%xmm0,(%rax)
   4134 	movl	%esi,48(%rax)
   4135 	xorq	%rax,%rax
   4136 	jmp	L$enc_key_ret
   4137 
   4138 .p2align	4
   4139 L$12rounds_alt:
   4140 	movdqa	L$key_rotate192(%rip),%xmm5
   4141 	movdqa	L$key_rcon1(%rip),%xmm4
   4142 	movl	$8,%r10d
   4143 	movdqu	%xmm0,(%rdx)
   4144 	jmp	L$oop_key192
   4145 
   4146 .p2align	4
   4147 L$oop_key192:
   4148 	movq	%xmm2,0(%rax)
   4149 	movdqa	%xmm2,%xmm1
   4150 .byte	102,15,56,0,213
   4151 .byte	102,15,56,221,212
   4152 	pslld	$1,%xmm4
   4153 	leaq	24(%rax),%rax
   4154 
   4155 	movdqa	%xmm0,%xmm3
   4156 	pslldq	$4,%xmm0
   4157 	pxor	%xmm0,%xmm3
   4158 	pslldq	$4,%xmm0
   4159 	pxor	%xmm0,%xmm3
   4160 	pslldq	$4,%xmm0
   4161 	pxor	%xmm3,%xmm0
   4162 
   4163 	pshufd	$0xff,%xmm0,%xmm3
   4164 	pxor	%xmm1,%xmm3
   4165 	pslldq	$4,%xmm1
   4166 	pxor	%xmm1,%xmm3
   4167 
   4168 	pxor	%xmm2,%xmm0
   4169 	pxor	%xmm3,%xmm2
   4170 	movdqu	%xmm0,-16(%rax)
   4171 
   4172 	decl	%r10d
   4173 	jnz	L$oop_key192
   4174 
   4175 	movl	%esi,32(%rax)
   4176 	xorl	%eax,%eax
   4177 	jmp	L$enc_key_ret
   4178 
   4179 .p2align	4
   4180 L$14rounds:
   4181 	movups	16(%rdi),%xmm2
   4182 	movl	$13,%esi
   4183 	leaq	16(%rax),%rax
   4184 	cmpl	$268435456,%r10d
   4185 	je	L$14rounds_alt
   4186 
   4187 	movups	%xmm0,(%rdx)
   4188 	movups	%xmm2,16(%rdx)
   4189 .byte	102,15,58,223,202,1
   4190 	call	L$key_expansion_256a_cold
   4191 .byte	102,15,58,223,200,1
   4192 	call	L$key_expansion_256b
   4193 .byte	102,15,58,223,202,2
   4194 	call	L$key_expansion_256a
   4195 .byte	102,15,58,223,200,2
   4196 	call	L$key_expansion_256b
   4197 .byte	102,15,58,223,202,4
   4198 	call	L$key_expansion_256a
   4199 .byte	102,15,58,223,200,4
   4200 	call	L$key_expansion_256b
   4201 .byte	102,15,58,223,202,8
   4202 	call	L$key_expansion_256a
   4203 .byte	102,15,58,223,200,8
   4204 	call	L$key_expansion_256b
   4205 .byte	102,15,58,223,202,16
   4206 	call	L$key_expansion_256a
   4207 .byte	102,15,58,223,200,16
   4208 	call	L$key_expansion_256b
   4209 .byte	102,15,58,223,202,32
   4210 	call	L$key_expansion_256a
   4211 .byte	102,15,58,223,200,32
   4212 	call	L$key_expansion_256b
   4213 .byte	102,15,58,223,202,64
   4214 	call	L$key_expansion_256a
   4215 	movups	%xmm0,(%rax)
   4216 	movl	%esi,16(%rax)
   4217 	xorq	%rax,%rax
   4218 	jmp	L$enc_key_ret
   4219 
   4220 .p2align	4
   4221 L$14rounds_alt:
   4222 	movdqa	L$key_rotate(%rip),%xmm5
   4223 	movdqa	L$key_rcon1(%rip),%xmm4
   4224 	movl	$7,%r10d
   4225 	movdqu	%xmm0,0(%rdx)
   4226 	movdqa	%xmm2,%xmm1
   4227 	movdqu	%xmm2,16(%rdx)
   4228 	jmp	L$oop_key256
   4229 
   4230 .p2align	4
   4231 L$oop_key256:
   4232 .byte	102,15,56,0,213
   4233 .byte	102,15,56,221,212
   4234 
   4235 	movdqa	%xmm0,%xmm3
   4236 	pslldq	$4,%xmm0
   4237 	pxor	%xmm0,%xmm3
   4238 	pslldq	$4,%xmm0
   4239 	pxor	%xmm0,%xmm3
   4240 	pslldq	$4,%xmm0
   4241 	pxor	%xmm3,%xmm0
   4242 	pslld	$1,%xmm4
   4243 
   4244 	pxor	%xmm2,%xmm0
   4245 	movdqu	%xmm0,(%rax)
   4246 
   4247 	decl	%r10d
   4248 	jz	L$done_key256
   4249 
   4250 	pshufd	$0xff,%xmm0,%xmm2
   4251 	pxor	%xmm3,%xmm3
   4252 .byte	102,15,56,221,211
   4253 
   4254 	movdqa	%xmm1,%xmm3
   4255 	pslldq	$4,%xmm1
   4256 	pxor	%xmm1,%xmm3
   4257 	pslldq	$4,%xmm1
   4258 	pxor	%xmm1,%xmm3
   4259 	pslldq	$4,%xmm1
   4260 	pxor	%xmm3,%xmm1
   4261 
   4262 	pxor	%xmm1,%xmm2
   4263 	movdqu	%xmm2,16(%rax)
   4264 	leaq	32(%rax),%rax
   4265 	movdqa	%xmm2,%xmm1
   4266 
   4267 	jmp	L$oop_key256
   4268 
   4269 L$done_key256:
   4270 	movl	%esi,16(%rax)
   4271 	xorl	%eax,%eax
   4272 	jmp	L$enc_key_ret
   4273 
   4274 .p2align	4
   4275 L$bad_keybits:
   4276 	movq	$-2,%rax
   4277 L$enc_key_ret:
   4278 	pxor	%xmm0,%xmm0
   4279 	pxor	%xmm1,%xmm1
   4280 	pxor	%xmm2,%xmm2
   4281 	pxor	%xmm3,%xmm3
   4282 	pxor	%xmm4,%xmm4
   4283 	pxor	%xmm5,%xmm5
   4284 	addq	$8,%rsp
   4285 	.byte	0xf3,0xc3
   4286 L$SEH_end_set_encrypt_key:
   4287 
   4288 .p2align	4
   4289 L$key_expansion_128:
   4290 	movups	%xmm0,(%rax)
   4291 	leaq	16(%rax),%rax
   4292 L$key_expansion_128_cold:
   4293 	shufps	$16,%xmm0,%xmm4
   4294 	xorps	%xmm4,%xmm0
   4295 	shufps	$140,%xmm0,%xmm4
   4296 	xorps	%xmm4,%xmm0
   4297 	shufps	$255,%xmm1,%xmm1
   4298 	xorps	%xmm1,%xmm0
   4299 	.byte	0xf3,0xc3
   4300 
   4301 .p2align	4
   4302 L$key_expansion_192a:
   4303 	movups	%xmm0,(%rax)
   4304 	leaq	16(%rax),%rax
   4305 L$key_expansion_192a_cold:
   4306 	movaps	%xmm2,%xmm5
   4307 L$key_expansion_192b_warm:
   4308 	shufps	$16,%xmm0,%xmm4
   4309 	movdqa	%xmm2,%xmm3
   4310 	xorps	%xmm4,%xmm0
   4311 	shufps	$140,%xmm0,%xmm4
   4312 	pslldq	$4,%xmm3
   4313 	xorps	%xmm4,%xmm0
   4314 	pshufd	$85,%xmm1,%xmm1
   4315 	pxor	%xmm3,%xmm2
   4316 	pxor	%xmm1,%xmm0
   4317 	pshufd	$255,%xmm0,%xmm3
   4318 	pxor	%xmm3,%xmm2
   4319 	.byte	0xf3,0xc3
   4320 
   4321 .p2align	4
   4322 L$key_expansion_192b:
   4323 	movaps	%xmm0,%xmm3
   4324 	shufps	$68,%xmm0,%xmm5
   4325 	movups	%xmm5,(%rax)
   4326 	shufps	$78,%xmm2,%xmm3
   4327 	movups	%xmm3,16(%rax)
   4328 	leaq	32(%rax),%rax
   4329 	jmp	L$key_expansion_192b_warm
   4330 
   4331 .p2align	4
   4332 L$key_expansion_256a:
   4333 	movups	%xmm2,(%rax)
   4334 	leaq	16(%rax),%rax
   4335 L$key_expansion_256a_cold:
   4336 	shufps	$16,%xmm0,%xmm4
   4337 	xorps	%xmm4,%xmm0
   4338 	shufps	$140,%xmm0,%xmm4
   4339 	xorps	%xmm4,%xmm0
   4340 	shufps	$255,%xmm1,%xmm1
   4341 	xorps	%xmm1,%xmm0
   4342 	.byte	0xf3,0xc3
   4343 
   4344 .p2align	4
   4345 L$key_expansion_256b:
   4346 	movups	%xmm0,(%rax)
   4347 	leaq	16(%rax),%rax
   4348 
   4349 	shufps	$16,%xmm2,%xmm4
   4350 	xorps	%xmm4,%xmm2
   4351 	shufps	$140,%xmm2,%xmm4
   4352 	xorps	%xmm4,%xmm2
   4353 	shufps	$170,%xmm1,%xmm1
   4354 	xorps	%xmm1,%xmm2
   4355 	.byte	0xf3,0xc3
   4356 
   4357 
   4358 .p2align	6
   4359 L$bswap_mask:
   4360 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   4361 L$increment32:
   4362 .long	6,6,6,0
   4363 L$increment64:
   4364 .long	1,0,0,0
   4365 L$xts_magic:
   4366 .long	0x87,0,1,0
   4367 L$increment1:
   4368 .byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
   4369 L$key_rotate:
   4370 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
   4371 L$key_rotate192:
   4372 .long	0x04070605,0x04070605,0x04070605,0x04070605
   4373 L$key_rcon1:
   4374 .long	1,1,1,1
   4375 L$key_rcon1b:
   4376 .long	0x1b,0x1b,0x1b,0x1b
   4377 
   4378 .byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   4379 .p2align	6
   4380 #endif
   4381