Home | History | Annotate | Download | only in aes
      1 #if defined(__x86_64__)
      2 .text
      3 
      4 .globl	_aesni_encrypt
      5 .private_extern _aesni_encrypt
      6 
      7 .p2align	4
      8 _aesni_encrypt:
      9 	movups	(%rdi),%xmm2
     10 	movl	240(%rdx),%eax
     11 	movups	(%rdx),%xmm0
     12 	movups	16(%rdx),%xmm1
     13 	leaq	32(%rdx),%rdx
     14 	xorps	%xmm0,%xmm2
     15 L$oop_enc1_1:
     16 .byte	102,15,56,220,209
     17 	decl	%eax
     18 	movups	(%rdx),%xmm1
     19 	leaq	16(%rdx),%rdx
     20 	jnz	L$oop_enc1_1
     21 .byte	102,15,56,221,209
     22 	pxor	%xmm0,%xmm0
     23 	pxor	%xmm1,%xmm1
     24 	movups	%xmm2,(%rsi)
     25 	pxor	%xmm2,%xmm2
     26 	.byte	0xf3,0xc3
     27 
     28 
     29 .globl	_aesni_decrypt
     30 .private_extern _aesni_decrypt
     31 
     32 .p2align	4
     33 _aesni_decrypt:
     34 	movups	(%rdi),%xmm2
     35 	movl	240(%rdx),%eax
     36 	movups	(%rdx),%xmm0
     37 	movups	16(%rdx),%xmm1
     38 	leaq	32(%rdx),%rdx
     39 	xorps	%xmm0,%xmm2
     40 L$oop_dec1_2:
     41 .byte	102,15,56,222,209
     42 	decl	%eax
     43 	movups	(%rdx),%xmm1
     44 	leaq	16(%rdx),%rdx
     45 	jnz	L$oop_dec1_2
     46 .byte	102,15,56,223,209
     47 	pxor	%xmm0,%xmm0
     48 	pxor	%xmm1,%xmm1
     49 	movups	%xmm2,(%rsi)
     50 	pxor	%xmm2,%xmm2
     51 	.byte	0xf3,0xc3
     52 
     53 
     54 .p2align	4
     55 _aesni_encrypt2:
     56 	movups	(%rcx),%xmm0
     57 	shll	$4,%eax
     58 	movups	16(%rcx),%xmm1
     59 	xorps	%xmm0,%xmm2
     60 	xorps	%xmm0,%xmm3
     61 	movups	32(%rcx),%xmm0
     62 	leaq	32(%rcx,%rax,1),%rcx
     63 	negq	%rax
     64 	addq	$16,%rax
     65 
     66 L$enc_loop2:
     67 .byte	102,15,56,220,209
     68 .byte	102,15,56,220,217
     69 	movups	(%rcx,%rax,1),%xmm1
     70 	addq	$32,%rax
     71 .byte	102,15,56,220,208
     72 .byte	102,15,56,220,216
     73 	movups	-16(%rcx,%rax,1),%xmm0
     74 	jnz	L$enc_loop2
     75 
     76 .byte	102,15,56,220,209
     77 .byte	102,15,56,220,217
     78 .byte	102,15,56,221,208
     79 .byte	102,15,56,221,216
     80 	.byte	0xf3,0xc3
     81 
     82 
     83 .p2align	4
     84 _aesni_decrypt2:
     85 	movups	(%rcx),%xmm0
     86 	shll	$4,%eax
     87 	movups	16(%rcx),%xmm1
     88 	xorps	%xmm0,%xmm2
     89 	xorps	%xmm0,%xmm3
     90 	movups	32(%rcx),%xmm0
     91 	leaq	32(%rcx,%rax,1),%rcx
     92 	negq	%rax
     93 	addq	$16,%rax
     94 
     95 L$dec_loop2:
     96 .byte	102,15,56,222,209
     97 .byte	102,15,56,222,217
     98 	movups	(%rcx,%rax,1),%xmm1
     99 	addq	$32,%rax
    100 .byte	102,15,56,222,208
    101 .byte	102,15,56,222,216
    102 	movups	-16(%rcx,%rax,1),%xmm0
    103 	jnz	L$dec_loop2
    104 
    105 .byte	102,15,56,222,209
    106 .byte	102,15,56,222,217
    107 .byte	102,15,56,223,208
    108 .byte	102,15,56,223,216
    109 	.byte	0xf3,0xc3
    110 
    111 
    112 .p2align	4
    113 _aesni_encrypt3:
    114 	movups	(%rcx),%xmm0
    115 	shll	$4,%eax
    116 	movups	16(%rcx),%xmm1
    117 	xorps	%xmm0,%xmm2
    118 	xorps	%xmm0,%xmm3
    119 	xorps	%xmm0,%xmm4
    120 	movups	32(%rcx),%xmm0
    121 	leaq	32(%rcx,%rax,1),%rcx
    122 	negq	%rax
    123 	addq	$16,%rax
    124 
    125 L$enc_loop3:
    126 .byte	102,15,56,220,209
    127 .byte	102,15,56,220,217
    128 .byte	102,15,56,220,225
    129 	movups	(%rcx,%rax,1),%xmm1
    130 	addq	$32,%rax
    131 .byte	102,15,56,220,208
    132 .byte	102,15,56,220,216
    133 .byte	102,15,56,220,224
    134 	movups	-16(%rcx,%rax,1),%xmm0
    135 	jnz	L$enc_loop3
    136 
    137 .byte	102,15,56,220,209
    138 .byte	102,15,56,220,217
    139 .byte	102,15,56,220,225
    140 .byte	102,15,56,221,208
    141 .byte	102,15,56,221,216
    142 .byte	102,15,56,221,224
    143 	.byte	0xf3,0xc3
    144 
    145 
    146 .p2align	4
    147 _aesni_decrypt3:
    148 	movups	(%rcx),%xmm0
    149 	shll	$4,%eax
    150 	movups	16(%rcx),%xmm1
    151 	xorps	%xmm0,%xmm2
    152 	xorps	%xmm0,%xmm3
    153 	xorps	%xmm0,%xmm4
    154 	movups	32(%rcx),%xmm0
    155 	leaq	32(%rcx,%rax,1),%rcx
    156 	negq	%rax
    157 	addq	$16,%rax
    158 
    159 L$dec_loop3:
    160 .byte	102,15,56,222,209
    161 .byte	102,15,56,222,217
    162 .byte	102,15,56,222,225
    163 	movups	(%rcx,%rax,1),%xmm1
    164 	addq	$32,%rax
    165 .byte	102,15,56,222,208
    166 .byte	102,15,56,222,216
    167 .byte	102,15,56,222,224
    168 	movups	-16(%rcx,%rax,1),%xmm0
    169 	jnz	L$dec_loop3
    170 
    171 .byte	102,15,56,222,209
    172 .byte	102,15,56,222,217
    173 .byte	102,15,56,222,225
    174 .byte	102,15,56,223,208
    175 .byte	102,15,56,223,216
    176 .byte	102,15,56,223,224
    177 	.byte	0xf3,0xc3
    178 
    179 
    180 .p2align	4
    181 _aesni_encrypt4:
    182 	movups	(%rcx),%xmm0
    183 	shll	$4,%eax
    184 	movups	16(%rcx),%xmm1
    185 	xorps	%xmm0,%xmm2
    186 	xorps	%xmm0,%xmm3
    187 	xorps	%xmm0,%xmm4
    188 	xorps	%xmm0,%xmm5
    189 	movups	32(%rcx),%xmm0
    190 	leaq	32(%rcx,%rax,1),%rcx
    191 	negq	%rax
    192 .byte	0x0f,0x1f,0x00
    193 	addq	$16,%rax
    194 
    195 L$enc_loop4:
    196 .byte	102,15,56,220,209
    197 .byte	102,15,56,220,217
    198 .byte	102,15,56,220,225
    199 .byte	102,15,56,220,233
    200 	movups	(%rcx,%rax,1),%xmm1
    201 	addq	$32,%rax
    202 .byte	102,15,56,220,208
    203 .byte	102,15,56,220,216
    204 .byte	102,15,56,220,224
    205 .byte	102,15,56,220,232
    206 	movups	-16(%rcx,%rax,1),%xmm0
    207 	jnz	L$enc_loop4
    208 
    209 .byte	102,15,56,220,209
    210 .byte	102,15,56,220,217
    211 .byte	102,15,56,220,225
    212 .byte	102,15,56,220,233
    213 .byte	102,15,56,221,208
    214 .byte	102,15,56,221,216
    215 .byte	102,15,56,221,224
    216 .byte	102,15,56,221,232
    217 	.byte	0xf3,0xc3
    218 
    219 
    220 .p2align	4
    221 _aesni_decrypt4:
    222 	movups	(%rcx),%xmm0
    223 	shll	$4,%eax
    224 	movups	16(%rcx),%xmm1
    225 	xorps	%xmm0,%xmm2
    226 	xorps	%xmm0,%xmm3
    227 	xorps	%xmm0,%xmm4
    228 	xorps	%xmm0,%xmm5
    229 	movups	32(%rcx),%xmm0
    230 	leaq	32(%rcx,%rax,1),%rcx
    231 	negq	%rax
    232 .byte	0x0f,0x1f,0x00
    233 	addq	$16,%rax
    234 
    235 L$dec_loop4:
    236 .byte	102,15,56,222,209
    237 .byte	102,15,56,222,217
    238 .byte	102,15,56,222,225
    239 .byte	102,15,56,222,233
    240 	movups	(%rcx,%rax,1),%xmm1
    241 	addq	$32,%rax
    242 .byte	102,15,56,222,208
    243 .byte	102,15,56,222,216
    244 .byte	102,15,56,222,224
    245 .byte	102,15,56,222,232
    246 	movups	-16(%rcx,%rax,1),%xmm0
    247 	jnz	L$dec_loop4
    248 
    249 .byte	102,15,56,222,209
    250 .byte	102,15,56,222,217
    251 .byte	102,15,56,222,225
    252 .byte	102,15,56,222,233
    253 .byte	102,15,56,223,208
    254 .byte	102,15,56,223,216
    255 .byte	102,15,56,223,224
    256 .byte	102,15,56,223,232
    257 	.byte	0xf3,0xc3
    258 
    259 
    260 .p2align	4
    261 _aesni_encrypt6:
    262 	movups	(%rcx),%xmm0
    263 	shll	$4,%eax
    264 	movups	16(%rcx),%xmm1
    265 	xorps	%xmm0,%xmm2
    266 	pxor	%xmm0,%xmm3
    267 	pxor	%xmm0,%xmm4
    268 .byte	102,15,56,220,209
    269 	leaq	32(%rcx,%rax,1),%rcx
    270 	negq	%rax
    271 .byte	102,15,56,220,217
    272 	pxor	%xmm0,%xmm5
    273 	pxor	%xmm0,%xmm6
    274 .byte	102,15,56,220,225
    275 	pxor	%xmm0,%xmm7
    276 	movups	(%rcx,%rax,1),%xmm0
    277 	addq	$16,%rax
    278 	jmp	L$enc_loop6_enter
    279 .p2align	4
    280 L$enc_loop6:
    281 .byte	102,15,56,220,209
    282 .byte	102,15,56,220,217
    283 .byte	102,15,56,220,225
    284 L$enc_loop6_enter:
    285 .byte	102,15,56,220,233
    286 .byte	102,15,56,220,241
    287 .byte	102,15,56,220,249
    288 	movups	(%rcx,%rax,1),%xmm1
    289 	addq	$32,%rax
    290 .byte	102,15,56,220,208
    291 .byte	102,15,56,220,216
    292 .byte	102,15,56,220,224
    293 .byte	102,15,56,220,232
    294 .byte	102,15,56,220,240
    295 .byte	102,15,56,220,248
    296 	movups	-16(%rcx,%rax,1),%xmm0
    297 	jnz	L$enc_loop6
    298 
    299 .byte	102,15,56,220,209
    300 .byte	102,15,56,220,217
    301 .byte	102,15,56,220,225
    302 .byte	102,15,56,220,233
    303 .byte	102,15,56,220,241
    304 .byte	102,15,56,220,249
    305 .byte	102,15,56,221,208
    306 .byte	102,15,56,221,216
    307 .byte	102,15,56,221,224
    308 .byte	102,15,56,221,232
    309 .byte	102,15,56,221,240
    310 .byte	102,15,56,221,248
    311 	.byte	0xf3,0xc3
    312 
    313 
    314 .p2align	4
    315 _aesni_decrypt6:
    316 	movups	(%rcx),%xmm0
    317 	shll	$4,%eax
    318 	movups	16(%rcx),%xmm1
    319 	xorps	%xmm0,%xmm2
    320 	pxor	%xmm0,%xmm3
    321 	pxor	%xmm0,%xmm4
    322 .byte	102,15,56,222,209
    323 	leaq	32(%rcx,%rax,1),%rcx
    324 	negq	%rax
    325 .byte	102,15,56,222,217
    326 	pxor	%xmm0,%xmm5
    327 	pxor	%xmm0,%xmm6
    328 .byte	102,15,56,222,225
    329 	pxor	%xmm0,%xmm7
    330 	movups	(%rcx,%rax,1),%xmm0
    331 	addq	$16,%rax
    332 	jmp	L$dec_loop6_enter
    333 .p2align	4
    334 L$dec_loop6:
    335 .byte	102,15,56,222,209
    336 .byte	102,15,56,222,217
    337 .byte	102,15,56,222,225
    338 L$dec_loop6_enter:
    339 .byte	102,15,56,222,233
    340 .byte	102,15,56,222,241
    341 .byte	102,15,56,222,249
    342 	movups	(%rcx,%rax,1),%xmm1
    343 	addq	$32,%rax
    344 .byte	102,15,56,222,208
    345 .byte	102,15,56,222,216
    346 .byte	102,15,56,222,224
    347 .byte	102,15,56,222,232
    348 .byte	102,15,56,222,240
    349 .byte	102,15,56,222,248
    350 	movups	-16(%rcx,%rax,1),%xmm0
    351 	jnz	L$dec_loop6
    352 
    353 .byte	102,15,56,222,209
    354 .byte	102,15,56,222,217
    355 .byte	102,15,56,222,225
    356 .byte	102,15,56,222,233
    357 .byte	102,15,56,222,241
    358 .byte	102,15,56,222,249
    359 .byte	102,15,56,223,208
    360 .byte	102,15,56,223,216
    361 .byte	102,15,56,223,224
    362 .byte	102,15,56,223,232
    363 .byte	102,15,56,223,240
    364 .byte	102,15,56,223,248
    365 	.byte	0xf3,0xc3
    366 
    367 
    368 .p2align	4
    369 _aesni_encrypt8:
    370 	movups	(%rcx),%xmm0
    371 	shll	$4,%eax
    372 	movups	16(%rcx),%xmm1
    373 	xorps	%xmm0,%xmm2
    374 	xorps	%xmm0,%xmm3
    375 	pxor	%xmm0,%xmm4
    376 	pxor	%xmm0,%xmm5
    377 	pxor	%xmm0,%xmm6
    378 	leaq	32(%rcx,%rax,1),%rcx
    379 	negq	%rax
    380 .byte	102,15,56,220,209
    381 	pxor	%xmm0,%xmm7
    382 	pxor	%xmm0,%xmm8
    383 .byte	102,15,56,220,217
    384 	pxor	%xmm0,%xmm9
    385 	movups	(%rcx,%rax,1),%xmm0
    386 	addq	$16,%rax
    387 	jmp	L$enc_loop8_inner
    388 .p2align	4
    389 L$enc_loop8:
    390 .byte	102,15,56,220,209
    391 .byte	102,15,56,220,217
    392 L$enc_loop8_inner:
    393 .byte	102,15,56,220,225
    394 .byte	102,15,56,220,233
    395 .byte	102,15,56,220,241
    396 .byte	102,15,56,220,249
    397 .byte	102,68,15,56,220,193
    398 .byte	102,68,15,56,220,201
    399 L$enc_loop8_enter:
    400 	movups	(%rcx,%rax,1),%xmm1
    401 	addq	$32,%rax
    402 .byte	102,15,56,220,208
    403 .byte	102,15,56,220,216
    404 .byte	102,15,56,220,224
    405 .byte	102,15,56,220,232
    406 .byte	102,15,56,220,240
    407 .byte	102,15,56,220,248
    408 .byte	102,68,15,56,220,192
    409 .byte	102,68,15,56,220,200
    410 	movups	-16(%rcx,%rax,1),%xmm0
    411 	jnz	L$enc_loop8
    412 
    413 .byte	102,15,56,220,209
    414 .byte	102,15,56,220,217
    415 .byte	102,15,56,220,225
    416 .byte	102,15,56,220,233
    417 .byte	102,15,56,220,241
    418 .byte	102,15,56,220,249
    419 .byte	102,68,15,56,220,193
    420 .byte	102,68,15,56,220,201
    421 .byte	102,15,56,221,208
    422 .byte	102,15,56,221,216
    423 .byte	102,15,56,221,224
    424 .byte	102,15,56,221,232
    425 .byte	102,15,56,221,240
    426 .byte	102,15,56,221,248
    427 .byte	102,68,15,56,221,192
    428 .byte	102,68,15,56,221,200
    429 	.byte	0xf3,0xc3
    430 
    431 
    432 .p2align	4
    433 _aesni_decrypt8:
    434 	movups	(%rcx),%xmm0
    435 	shll	$4,%eax
    436 	movups	16(%rcx),%xmm1
    437 	xorps	%xmm0,%xmm2
    438 	xorps	%xmm0,%xmm3
    439 	pxor	%xmm0,%xmm4
    440 	pxor	%xmm0,%xmm5
    441 	pxor	%xmm0,%xmm6
    442 	leaq	32(%rcx,%rax,1),%rcx
    443 	negq	%rax
    444 .byte	102,15,56,222,209
    445 	pxor	%xmm0,%xmm7
    446 	pxor	%xmm0,%xmm8
    447 .byte	102,15,56,222,217
    448 	pxor	%xmm0,%xmm9
    449 	movups	(%rcx,%rax,1),%xmm0
    450 	addq	$16,%rax
    451 	jmp	L$dec_loop8_inner
    452 .p2align	4
    453 L$dec_loop8:
    454 .byte	102,15,56,222,209
    455 .byte	102,15,56,222,217
    456 L$dec_loop8_inner:
    457 .byte	102,15,56,222,225
    458 .byte	102,15,56,222,233
    459 .byte	102,15,56,222,241
    460 .byte	102,15,56,222,249
    461 .byte	102,68,15,56,222,193
    462 .byte	102,68,15,56,222,201
    463 L$dec_loop8_enter:
    464 	movups	(%rcx,%rax,1),%xmm1
    465 	addq	$32,%rax
    466 .byte	102,15,56,222,208
    467 .byte	102,15,56,222,216
    468 .byte	102,15,56,222,224
    469 .byte	102,15,56,222,232
    470 .byte	102,15,56,222,240
    471 .byte	102,15,56,222,248
    472 .byte	102,68,15,56,222,192
    473 .byte	102,68,15,56,222,200
    474 	movups	-16(%rcx,%rax,1),%xmm0
    475 	jnz	L$dec_loop8
    476 
    477 .byte	102,15,56,222,209
    478 .byte	102,15,56,222,217
    479 .byte	102,15,56,222,225
    480 .byte	102,15,56,222,233
    481 .byte	102,15,56,222,241
    482 .byte	102,15,56,222,249
    483 .byte	102,68,15,56,222,193
    484 .byte	102,68,15,56,222,201
    485 .byte	102,15,56,223,208
    486 .byte	102,15,56,223,216
    487 .byte	102,15,56,223,224
    488 .byte	102,15,56,223,232
    489 .byte	102,15,56,223,240
    490 .byte	102,15,56,223,248
    491 .byte	102,68,15,56,223,192
    492 .byte	102,68,15,56,223,200
    493 	.byte	0xf3,0xc3
    494 
    495 .globl	_aesni_ecb_encrypt
    496 .private_extern _aesni_ecb_encrypt
    497 
    498 .p2align	4
    499 _aesni_ecb_encrypt:
    500 	andq	$-16,%rdx
    501 	jz	L$ecb_ret
    502 
    503 	movl	240(%rcx),%eax
    504 	movups	(%rcx),%xmm0
    505 	movq	%rcx,%r11
    506 	movl	%eax,%r10d
    507 	testl	%r8d,%r8d
    508 	jz	L$ecb_decrypt
    509 
    510 	cmpq	$0x80,%rdx
    511 	jb	L$ecb_enc_tail
    512 
    513 	movdqu	(%rdi),%xmm2
    514 	movdqu	16(%rdi),%xmm3
    515 	movdqu	32(%rdi),%xmm4
    516 	movdqu	48(%rdi),%xmm5
    517 	movdqu	64(%rdi),%xmm6
    518 	movdqu	80(%rdi),%xmm7
    519 	movdqu	96(%rdi),%xmm8
    520 	movdqu	112(%rdi),%xmm9
    521 	leaq	128(%rdi),%rdi
    522 	subq	$0x80,%rdx
    523 	jmp	L$ecb_enc_loop8_enter
    524 .p2align	4
    525 L$ecb_enc_loop8:
    526 	movups	%xmm2,(%rsi)
    527 	movq	%r11,%rcx
    528 	movdqu	(%rdi),%xmm2
    529 	movl	%r10d,%eax
    530 	movups	%xmm3,16(%rsi)
    531 	movdqu	16(%rdi),%xmm3
    532 	movups	%xmm4,32(%rsi)
    533 	movdqu	32(%rdi),%xmm4
    534 	movups	%xmm5,48(%rsi)
    535 	movdqu	48(%rdi),%xmm5
    536 	movups	%xmm6,64(%rsi)
    537 	movdqu	64(%rdi),%xmm6
    538 	movups	%xmm7,80(%rsi)
    539 	movdqu	80(%rdi),%xmm7
    540 	movups	%xmm8,96(%rsi)
    541 	movdqu	96(%rdi),%xmm8
    542 	movups	%xmm9,112(%rsi)
    543 	leaq	128(%rsi),%rsi
    544 	movdqu	112(%rdi),%xmm9
    545 	leaq	128(%rdi),%rdi
    546 L$ecb_enc_loop8_enter:
    547 
    548 	call	_aesni_encrypt8
    549 
    550 	subq	$0x80,%rdx
    551 	jnc	L$ecb_enc_loop8
    552 
    553 	movups	%xmm2,(%rsi)
    554 	movq	%r11,%rcx
    555 	movups	%xmm3,16(%rsi)
    556 	movl	%r10d,%eax
    557 	movups	%xmm4,32(%rsi)
    558 	movups	%xmm5,48(%rsi)
    559 	movups	%xmm6,64(%rsi)
    560 	movups	%xmm7,80(%rsi)
    561 	movups	%xmm8,96(%rsi)
    562 	movups	%xmm9,112(%rsi)
    563 	leaq	128(%rsi),%rsi
    564 	addq	$0x80,%rdx
    565 	jz	L$ecb_ret
    566 
    567 L$ecb_enc_tail:
    568 	movups	(%rdi),%xmm2
    569 	cmpq	$0x20,%rdx
    570 	jb	L$ecb_enc_one
    571 	movups	16(%rdi),%xmm3
    572 	je	L$ecb_enc_two
    573 	movups	32(%rdi),%xmm4
    574 	cmpq	$0x40,%rdx
    575 	jb	L$ecb_enc_three
    576 	movups	48(%rdi),%xmm5
    577 	je	L$ecb_enc_four
    578 	movups	64(%rdi),%xmm6
    579 	cmpq	$0x60,%rdx
    580 	jb	L$ecb_enc_five
    581 	movups	80(%rdi),%xmm7
    582 	je	L$ecb_enc_six
    583 	movdqu	96(%rdi),%xmm8
    584 	xorps	%xmm9,%xmm9
    585 	call	_aesni_encrypt8
    586 	movups	%xmm2,(%rsi)
    587 	movups	%xmm3,16(%rsi)
    588 	movups	%xmm4,32(%rsi)
    589 	movups	%xmm5,48(%rsi)
    590 	movups	%xmm6,64(%rsi)
    591 	movups	%xmm7,80(%rsi)
    592 	movups	%xmm8,96(%rsi)
    593 	jmp	L$ecb_ret
    594 .p2align	4
    595 L$ecb_enc_one:
    596 	movups	(%rcx),%xmm0
    597 	movups	16(%rcx),%xmm1
    598 	leaq	32(%rcx),%rcx
    599 	xorps	%xmm0,%xmm2
    600 L$oop_enc1_3:
    601 .byte	102,15,56,220,209
    602 	decl	%eax
    603 	movups	(%rcx),%xmm1
    604 	leaq	16(%rcx),%rcx
    605 	jnz	L$oop_enc1_3
    606 .byte	102,15,56,221,209
    607 	movups	%xmm2,(%rsi)
    608 	jmp	L$ecb_ret
    609 .p2align	4
    610 L$ecb_enc_two:
    611 	call	_aesni_encrypt2
    612 	movups	%xmm2,(%rsi)
    613 	movups	%xmm3,16(%rsi)
    614 	jmp	L$ecb_ret
    615 .p2align	4
    616 L$ecb_enc_three:
    617 	call	_aesni_encrypt3
    618 	movups	%xmm2,(%rsi)
    619 	movups	%xmm3,16(%rsi)
    620 	movups	%xmm4,32(%rsi)
    621 	jmp	L$ecb_ret
    622 .p2align	4
    623 L$ecb_enc_four:
    624 	call	_aesni_encrypt4
    625 	movups	%xmm2,(%rsi)
    626 	movups	%xmm3,16(%rsi)
    627 	movups	%xmm4,32(%rsi)
    628 	movups	%xmm5,48(%rsi)
    629 	jmp	L$ecb_ret
    630 .p2align	4
    631 L$ecb_enc_five:
    632 	xorps	%xmm7,%xmm7
    633 	call	_aesni_encrypt6
    634 	movups	%xmm2,(%rsi)
    635 	movups	%xmm3,16(%rsi)
    636 	movups	%xmm4,32(%rsi)
    637 	movups	%xmm5,48(%rsi)
    638 	movups	%xmm6,64(%rsi)
    639 	jmp	L$ecb_ret
    640 .p2align	4
    641 L$ecb_enc_six:
    642 	call	_aesni_encrypt6
    643 	movups	%xmm2,(%rsi)
    644 	movups	%xmm3,16(%rsi)
    645 	movups	%xmm4,32(%rsi)
    646 	movups	%xmm5,48(%rsi)
    647 	movups	%xmm6,64(%rsi)
    648 	movups	%xmm7,80(%rsi)
    649 	jmp	L$ecb_ret
    650 
    651 .p2align	4
    652 L$ecb_decrypt:
    653 	cmpq	$0x80,%rdx
    654 	jb	L$ecb_dec_tail
    655 
    656 	movdqu	(%rdi),%xmm2
    657 	movdqu	16(%rdi),%xmm3
    658 	movdqu	32(%rdi),%xmm4
    659 	movdqu	48(%rdi),%xmm5
    660 	movdqu	64(%rdi),%xmm6
    661 	movdqu	80(%rdi),%xmm7
    662 	movdqu	96(%rdi),%xmm8
    663 	movdqu	112(%rdi),%xmm9
    664 	leaq	128(%rdi),%rdi
    665 	subq	$0x80,%rdx
    666 	jmp	L$ecb_dec_loop8_enter
    667 .p2align	4
    668 L$ecb_dec_loop8:
    669 	movups	%xmm2,(%rsi)
    670 	movq	%r11,%rcx
    671 	movdqu	(%rdi),%xmm2
    672 	movl	%r10d,%eax
    673 	movups	%xmm3,16(%rsi)
    674 	movdqu	16(%rdi),%xmm3
    675 	movups	%xmm4,32(%rsi)
    676 	movdqu	32(%rdi),%xmm4
    677 	movups	%xmm5,48(%rsi)
    678 	movdqu	48(%rdi),%xmm5
    679 	movups	%xmm6,64(%rsi)
    680 	movdqu	64(%rdi),%xmm6
    681 	movups	%xmm7,80(%rsi)
    682 	movdqu	80(%rdi),%xmm7
    683 	movups	%xmm8,96(%rsi)
    684 	movdqu	96(%rdi),%xmm8
    685 	movups	%xmm9,112(%rsi)
    686 	leaq	128(%rsi),%rsi
    687 	movdqu	112(%rdi),%xmm9
    688 	leaq	128(%rdi),%rdi
    689 L$ecb_dec_loop8_enter:
    690 
    691 	call	_aesni_decrypt8
    692 
    693 	movups	(%r11),%xmm0
    694 	subq	$0x80,%rdx
    695 	jnc	L$ecb_dec_loop8
    696 
    697 	movups	%xmm2,(%rsi)
    698 	pxor	%xmm2,%xmm2
    699 	movq	%r11,%rcx
    700 	movups	%xmm3,16(%rsi)
    701 	pxor	%xmm3,%xmm3
    702 	movl	%r10d,%eax
    703 	movups	%xmm4,32(%rsi)
    704 	pxor	%xmm4,%xmm4
    705 	movups	%xmm5,48(%rsi)
    706 	pxor	%xmm5,%xmm5
    707 	movups	%xmm6,64(%rsi)
    708 	pxor	%xmm6,%xmm6
    709 	movups	%xmm7,80(%rsi)
    710 	pxor	%xmm7,%xmm7
    711 	movups	%xmm8,96(%rsi)
    712 	pxor	%xmm8,%xmm8
    713 	movups	%xmm9,112(%rsi)
    714 	pxor	%xmm9,%xmm9
    715 	leaq	128(%rsi),%rsi
    716 	addq	$0x80,%rdx
    717 	jz	L$ecb_ret
    718 
    719 L$ecb_dec_tail:
    720 	movups	(%rdi),%xmm2
    721 	cmpq	$0x20,%rdx
    722 	jb	L$ecb_dec_one
    723 	movups	16(%rdi),%xmm3
    724 	je	L$ecb_dec_two
    725 	movups	32(%rdi),%xmm4
    726 	cmpq	$0x40,%rdx
    727 	jb	L$ecb_dec_three
    728 	movups	48(%rdi),%xmm5
    729 	je	L$ecb_dec_four
    730 	movups	64(%rdi),%xmm6
    731 	cmpq	$0x60,%rdx
    732 	jb	L$ecb_dec_five
    733 	movups	80(%rdi),%xmm7
    734 	je	L$ecb_dec_six
    735 	movups	96(%rdi),%xmm8
    736 	movups	(%rcx),%xmm0
    737 	xorps	%xmm9,%xmm9
    738 	call	_aesni_decrypt8
    739 	movups	%xmm2,(%rsi)
    740 	pxor	%xmm2,%xmm2
    741 	movups	%xmm3,16(%rsi)
    742 	pxor	%xmm3,%xmm3
    743 	movups	%xmm4,32(%rsi)
    744 	pxor	%xmm4,%xmm4
    745 	movups	%xmm5,48(%rsi)
    746 	pxor	%xmm5,%xmm5
    747 	movups	%xmm6,64(%rsi)
    748 	pxor	%xmm6,%xmm6
    749 	movups	%xmm7,80(%rsi)
    750 	pxor	%xmm7,%xmm7
    751 	movups	%xmm8,96(%rsi)
    752 	pxor	%xmm8,%xmm8
    753 	pxor	%xmm9,%xmm9
    754 	jmp	L$ecb_ret
    755 .p2align	4
    756 L$ecb_dec_one:
    757 	movups	(%rcx),%xmm0
    758 	movups	16(%rcx),%xmm1
    759 	leaq	32(%rcx),%rcx
    760 	xorps	%xmm0,%xmm2
    761 L$oop_dec1_4:
    762 .byte	102,15,56,222,209
    763 	decl	%eax
    764 	movups	(%rcx),%xmm1
    765 	leaq	16(%rcx),%rcx
    766 	jnz	L$oop_dec1_4
    767 .byte	102,15,56,223,209
    768 	movups	%xmm2,(%rsi)
    769 	pxor	%xmm2,%xmm2
    770 	jmp	L$ecb_ret
    771 .p2align	4
    772 L$ecb_dec_two:
    773 	call	_aesni_decrypt2
    774 	movups	%xmm2,(%rsi)
    775 	pxor	%xmm2,%xmm2
    776 	movups	%xmm3,16(%rsi)
    777 	pxor	%xmm3,%xmm3
    778 	jmp	L$ecb_ret
    779 .p2align	4
    780 L$ecb_dec_three:
    781 	call	_aesni_decrypt3
    782 	movups	%xmm2,(%rsi)
    783 	pxor	%xmm2,%xmm2
    784 	movups	%xmm3,16(%rsi)
    785 	pxor	%xmm3,%xmm3
    786 	movups	%xmm4,32(%rsi)
    787 	pxor	%xmm4,%xmm4
    788 	jmp	L$ecb_ret
    789 .p2align	4
    790 L$ecb_dec_four:
    791 	call	_aesni_decrypt4
    792 	movups	%xmm2,(%rsi)
    793 	pxor	%xmm2,%xmm2
    794 	movups	%xmm3,16(%rsi)
    795 	pxor	%xmm3,%xmm3
    796 	movups	%xmm4,32(%rsi)
    797 	pxor	%xmm4,%xmm4
    798 	movups	%xmm5,48(%rsi)
    799 	pxor	%xmm5,%xmm5
    800 	jmp	L$ecb_ret
    801 .p2align	4
    802 L$ecb_dec_five:
    803 	xorps	%xmm7,%xmm7
    804 	call	_aesni_decrypt6
    805 	movups	%xmm2,(%rsi)
    806 	pxor	%xmm2,%xmm2
    807 	movups	%xmm3,16(%rsi)
    808 	pxor	%xmm3,%xmm3
    809 	movups	%xmm4,32(%rsi)
    810 	pxor	%xmm4,%xmm4
    811 	movups	%xmm5,48(%rsi)
    812 	pxor	%xmm5,%xmm5
    813 	movups	%xmm6,64(%rsi)
    814 	pxor	%xmm6,%xmm6
    815 	pxor	%xmm7,%xmm7
    816 	jmp	L$ecb_ret
    817 .p2align	4
    818 L$ecb_dec_six:
    819 	call	_aesni_decrypt6
    820 	movups	%xmm2,(%rsi)
    821 	pxor	%xmm2,%xmm2
    822 	movups	%xmm3,16(%rsi)
    823 	pxor	%xmm3,%xmm3
    824 	movups	%xmm4,32(%rsi)
    825 	pxor	%xmm4,%xmm4
    826 	movups	%xmm5,48(%rsi)
    827 	pxor	%xmm5,%xmm5
    828 	movups	%xmm6,64(%rsi)
    829 	pxor	%xmm6,%xmm6
    830 	movups	%xmm7,80(%rsi)
    831 	pxor	%xmm7,%xmm7
    832 
    833 L$ecb_ret:
    834 	xorps	%xmm0,%xmm0
    835 	pxor	%xmm1,%xmm1
    836 	.byte	0xf3,0xc3
    837 
    838 .globl	_aesni_ccm64_encrypt_blocks
    839 .private_extern _aesni_ccm64_encrypt_blocks
    840 
    841 .p2align	4
    842 _aesni_ccm64_encrypt_blocks:
    843 	movl	240(%rcx),%eax
    844 	movdqu	(%r8),%xmm6
    845 	movdqa	L$increment64(%rip),%xmm9
    846 	movdqa	L$bswap_mask(%rip),%xmm7
    847 
    848 	shll	$4,%eax
    849 	movl	$16,%r10d
    850 	leaq	0(%rcx),%r11
    851 	movdqu	(%r9),%xmm3
    852 	movdqa	%xmm6,%xmm2
    853 	leaq	32(%rcx,%rax,1),%rcx
    854 .byte	102,15,56,0,247
    855 	subq	%rax,%r10
    856 	jmp	L$ccm64_enc_outer
    857 .p2align	4
    858 L$ccm64_enc_outer:
    859 	movups	(%r11),%xmm0
    860 	movq	%r10,%rax
    861 	movups	(%rdi),%xmm8
    862 
    863 	xorps	%xmm0,%xmm2
    864 	movups	16(%r11),%xmm1
    865 	xorps	%xmm8,%xmm0
    866 	xorps	%xmm0,%xmm3
    867 	movups	32(%r11),%xmm0
    868 
    869 L$ccm64_enc2_loop:
    870 .byte	102,15,56,220,209
    871 .byte	102,15,56,220,217
    872 	movups	(%rcx,%rax,1),%xmm1
    873 	addq	$32,%rax
    874 .byte	102,15,56,220,208
    875 .byte	102,15,56,220,216
    876 	movups	-16(%rcx,%rax,1),%xmm0
    877 	jnz	L$ccm64_enc2_loop
    878 .byte	102,15,56,220,209
    879 .byte	102,15,56,220,217
    880 	paddq	%xmm9,%xmm6
    881 	decq	%rdx
    882 .byte	102,15,56,221,208
    883 .byte	102,15,56,221,216
    884 
    885 	leaq	16(%rdi),%rdi
    886 	xorps	%xmm2,%xmm8
    887 	movdqa	%xmm6,%xmm2
    888 	movups	%xmm8,(%rsi)
    889 .byte	102,15,56,0,215
    890 	leaq	16(%rsi),%rsi
    891 	jnz	L$ccm64_enc_outer
    892 
    893 	pxor	%xmm0,%xmm0
    894 	pxor	%xmm1,%xmm1
    895 	pxor	%xmm2,%xmm2
    896 	movups	%xmm3,(%r9)
    897 	pxor	%xmm3,%xmm3
    898 	pxor	%xmm8,%xmm8
    899 	pxor	%xmm6,%xmm6
    900 	.byte	0xf3,0xc3
    901 
    902 .globl	_aesni_ccm64_decrypt_blocks
    903 .private_extern _aesni_ccm64_decrypt_blocks
    904 
    905 .p2align	4
    906 _aesni_ccm64_decrypt_blocks:
    907 	movl	240(%rcx),%eax
    908 	movups	(%r8),%xmm6
    909 	movdqu	(%r9),%xmm3
    910 	movdqa	L$increment64(%rip),%xmm9
    911 	movdqa	L$bswap_mask(%rip),%xmm7
    912 
    913 	movaps	%xmm6,%xmm2
    914 	movl	%eax,%r10d
    915 	movq	%rcx,%r11
    916 .byte	102,15,56,0,247
    917 	movups	(%rcx),%xmm0
    918 	movups	16(%rcx),%xmm1
    919 	leaq	32(%rcx),%rcx
    920 	xorps	%xmm0,%xmm2
    921 L$oop_enc1_5:
    922 .byte	102,15,56,220,209
    923 	decl	%eax
    924 	movups	(%rcx),%xmm1
    925 	leaq	16(%rcx),%rcx
    926 	jnz	L$oop_enc1_5
    927 .byte	102,15,56,221,209
    928 	shll	$4,%r10d
    929 	movl	$16,%eax
    930 	movups	(%rdi),%xmm8
    931 	paddq	%xmm9,%xmm6
    932 	leaq	16(%rdi),%rdi
    933 	subq	%r10,%rax
    934 	leaq	32(%r11,%r10,1),%rcx
    935 	movq	%rax,%r10
    936 	jmp	L$ccm64_dec_outer
    937 .p2align	4
    938 L$ccm64_dec_outer:
    939 	xorps	%xmm2,%xmm8
    940 	movdqa	%xmm6,%xmm2
    941 	movups	%xmm8,(%rsi)
    942 	leaq	16(%rsi),%rsi
    943 .byte	102,15,56,0,215
    944 
    945 	subq	$1,%rdx
    946 	jz	L$ccm64_dec_break
    947 
    948 	movups	(%r11),%xmm0
    949 	movq	%r10,%rax
    950 	movups	16(%r11),%xmm1
    951 	xorps	%xmm0,%xmm8
    952 	xorps	%xmm0,%xmm2
    953 	xorps	%xmm8,%xmm3
    954 	movups	32(%r11),%xmm0
    955 	jmp	L$ccm64_dec2_loop
    956 .p2align	4
    957 L$ccm64_dec2_loop:
    958 .byte	102,15,56,220,209
    959 .byte	102,15,56,220,217
    960 	movups	(%rcx,%rax,1),%xmm1
    961 	addq	$32,%rax
    962 .byte	102,15,56,220,208
    963 .byte	102,15,56,220,216
    964 	movups	-16(%rcx,%rax,1),%xmm0
    965 	jnz	L$ccm64_dec2_loop
    966 	movups	(%rdi),%xmm8
    967 	paddq	%xmm9,%xmm6
    968 .byte	102,15,56,220,209
    969 .byte	102,15,56,220,217
    970 .byte	102,15,56,221,208
    971 .byte	102,15,56,221,216
    972 	leaq	16(%rdi),%rdi
    973 	jmp	L$ccm64_dec_outer
    974 
    975 .p2align	4
    976 L$ccm64_dec_break:
    977 
    978 	movl	240(%r11),%eax
    979 	movups	(%r11),%xmm0
    980 	movups	16(%r11),%xmm1
    981 	xorps	%xmm0,%xmm8
    982 	leaq	32(%r11),%r11
    983 	xorps	%xmm8,%xmm3
    984 L$oop_enc1_6:
    985 .byte	102,15,56,220,217
    986 	decl	%eax
    987 	movups	(%r11),%xmm1
    988 	leaq	16(%r11),%r11
    989 	jnz	L$oop_enc1_6
    990 .byte	102,15,56,221,217
    991 	pxor	%xmm0,%xmm0
    992 	pxor	%xmm1,%xmm1
    993 	pxor	%xmm2,%xmm2
    994 	movups	%xmm3,(%r9)
    995 	pxor	%xmm3,%xmm3
    996 	pxor	%xmm8,%xmm8
    997 	pxor	%xmm6,%xmm6
    998 	.byte	0xf3,0xc3
    999 
   1000 .globl	_aesni_ctr32_encrypt_blocks
   1001 .private_extern _aesni_ctr32_encrypt_blocks
   1002 
   1003 .p2align	4
   1004 _aesni_ctr32_encrypt_blocks:
   1005 	cmpq	$1,%rdx
   1006 	jne	L$ctr32_bulk
   1007 
   1008 
   1009 
   1010 	movups	(%r8),%xmm2
   1011 	movups	(%rdi),%xmm3
   1012 	movl	240(%rcx),%edx
   1013 	movups	(%rcx),%xmm0
   1014 	movups	16(%rcx),%xmm1
   1015 	leaq	32(%rcx),%rcx
   1016 	xorps	%xmm0,%xmm2
   1017 L$oop_enc1_7:
   1018 .byte	102,15,56,220,209
   1019 	decl	%edx
   1020 	movups	(%rcx),%xmm1
   1021 	leaq	16(%rcx),%rcx
   1022 	jnz	L$oop_enc1_7
   1023 .byte	102,15,56,221,209
   1024 	pxor	%xmm0,%xmm0
   1025 	pxor	%xmm1,%xmm1
   1026 	xorps	%xmm3,%xmm2
   1027 	pxor	%xmm3,%xmm3
   1028 	movups	%xmm2,(%rsi)
   1029 	xorps	%xmm2,%xmm2
   1030 	jmp	L$ctr32_epilogue
   1031 
   1032 .p2align	4
   1033 L$ctr32_bulk:
   1034 	leaq	(%rsp),%r11
   1035 	pushq	%rbp
   1036 	subq	$128,%rsp
   1037 	andq	$-16,%rsp
   1038 
   1039 
   1040 
   1041 
   1042 	movdqu	(%r8),%xmm2
   1043 	movdqu	(%rcx),%xmm0
   1044 	movl	12(%r8),%r8d
   1045 	pxor	%xmm0,%xmm2
   1046 	movl	12(%rcx),%ebp
   1047 	movdqa	%xmm2,0(%rsp)
   1048 	bswapl	%r8d
   1049 	movdqa	%xmm2,%xmm3
   1050 	movdqa	%xmm2,%xmm4
   1051 	movdqa	%xmm2,%xmm5
   1052 	movdqa	%xmm2,64(%rsp)
   1053 	movdqa	%xmm2,80(%rsp)
   1054 	movdqa	%xmm2,96(%rsp)
   1055 	movq	%rdx,%r10
   1056 	movdqa	%xmm2,112(%rsp)
   1057 
   1058 	leaq	1(%r8),%rax
   1059 	leaq	2(%r8),%rdx
   1060 	bswapl	%eax
   1061 	bswapl	%edx
   1062 	xorl	%ebp,%eax
   1063 	xorl	%ebp,%edx
   1064 .byte	102,15,58,34,216,3
   1065 	leaq	3(%r8),%rax
   1066 	movdqa	%xmm3,16(%rsp)
   1067 .byte	102,15,58,34,226,3
   1068 	bswapl	%eax
   1069 	movq	%r10,%rdx
   1070 	leaq	4(%r8),%r10
   1071 	movdqa	%xmm4,32(%rsp)
   1072 	xorl	%ebp,%eax
   1073 	bswapl	%r10d
   1074 .byte	102,15,58,34,232,3
   1075 	xorl	%ebp,%r10d
   1076 	movdqa	%xmm5,48(%rsp)
   1077 	leaq	5(%r8),%r9
   1078 	movl	%r10d,64+12(%rsp)
   1079 	bswapl	%r9d
   1080 	leaq	6(%r8),%r10
   1081 	movl	240(%rcx),%eax
   1082 	xorl	%ebp,%r9d
   1083 	bswapl	%r10d
   1084 	movl	%r9d,80+12(%rsp)
   1085 	xorl	%ebp,%r10d
   1086 	leaq	7(%r8),%r9
   1087 	movl	%r10d,96+12(%rsp)
   1088 	bswapl	%r9d
   1089 	movl	_OPENSSL_ia32cap_P+4(%rip),%r10d
   1090 	xorl	%ebp,%r9d
   1091 	andl	$71303168,%r10d
   1092 	movl	%r9d,112+12(%rsp)
   1093 
   1094 	movups	16(%rcx),%xmm1
   1095 
   1096 	movdqa	64(%rsp),%xmm6
   1097 	movdqa	80(%rsp),%xmm7
   1098 
   1099 	cmpq	$8,%rdx
   1100 	jb	L$ctr32_tail
   1101 
   1102 	subq	$6,%rdx
   1103 	cmpl	$4194304,%r10d
   1104 	je	L$ctr32_6x
   1105 
   1106 	leaq	128(%rcx),%rcx
   1107 	subq	$2,%rdx
   1108 	jmp	L$ctr32_loop8
   1109 
   1110 .p2align	4
   1111 L$ctr32_6x:
   1112 	shll	$4,%eax
   1113 	movl	$48,%r10d
   1114 	bswapl	%ebp
   1115 	leaq	32(%rcx,%rax,1),%rcx
   1116 	subq	%rax,%r10
   1117 	jmp	L$ctr32_loop6
   1118 
   1119 .p2align	4
   1120 L$ctr32_loop6:
   1121 	addl	$6,%r8d
   1122 	movups	-48(%rcx,%r10,1),%xmm0
   1123 .byte	102,15,56,220,209
   1124 	movl	%r8d,%eax
   1125 	xorl	%ebp,%eax
   1126 .byte	102,15,56,220,217
   1127 .byte	0x0f,0x38,0xf1,0x44,0x24,12
   1128 	leal	1(%r8),%eax
   1129 .byte	102,15,56,220,225
   1130 	xorl	%ebp,%eax
   1131 .byte	0x0f,0x38,0xf1,0x44,0x24,28
   1132 .byte	102,15,56,220,233
   1133 	leal	2(%r8),%eax
   1134 	xorl	%ebp,%eax
   1135 .byte	102,15,56,220,241
   1136 .byte	0x0f,0x38,0xf1,0x44,0x24,44
   1137 	leal	3(%r8),%eax
   1138 .byte	102,15,56,220,249
   1139 	movups	-32(%rcx,%r10,1),%xmm1
   1140 	xorl	%ebp,%eax
   1141 
   1142 .byte	102,15,56,220,208
   1143 .byte	0x0f,0x38,0xf1,0x44,0x24,60
   1144 	leal	4(%r8),%eax
   1145 .byte	102,15,56,220,216
   1146 	xorl	%ebp,%eax
   1147 .byte	0x0f,0x38,0xf1,0x44,0x24,76
   1148 .byte	102,15,56,220,224
   1149 	leal	5(%r8),%eax
   1150 	xorl	%ebp,%eax
   1151 .byte	102,15,56,220,232
   1152 .byte	0x0f,0x38,0xf1,0x44,0x24,92
   1153 	movq	%r10,%rax
   1154 .byte	102,15,56,220,240
   1155 .byte	102,15,56,220,248
   1156 	movups	-16(%rcx,%r10,1),%xmm0
   1157 
   1158 	call	L$enc_loop6
   1159 
   1160 	movdqu	(%rdi),%xmm8
   1161 	movdqu	16(%rdi),%xmm9
   1162 	movdqu	32(%rdi),%xmm10
   1163 	movdqu	48(%rdi),%xmm11
   1164 	movdqu	64(%rdi),%xmm12
   1165 	movdqu	80(%rdi),%xmm13
   1166 	leaq	96(%rdi),%rdi
   1167 	movups	-64(%rcx,%r10,1),%xmm1
   1168 	pxor	%xmm2,%xmm8
   1169 	movaps	0(%rsp),%xmm2
   1170 	pxor	%xmm3,%xmm9
   1171 	movaps	16(%rsp),%xmm3
   1172 	pxor	%xmm4,%xmm10
   1173 	movaps	32(%rsp),%xmm4
   1174 	pxor	%xmm5,%xmm11
   1175 	movaps	48(%rsp),%xmm5
   1176 	pxor	%xmm6,%xmm12
   1177 	movaps	64(%rsp),%xmm6
   1178 	pxor	%xmm7,%xmm13
   1179 	movaps	80(%rsp),%xmm7
   1180 	movdqu	%xmm8,(%rsi)
   1181 	movdqu	%xmm9,16(%rsi)
   1182 	movdqu	%xmm10,32(%rsi)
   1183 	movdqu	%xmm11,48(%rsi)
   1184 	movdqu	%xmm12,64(%rsi)
   1185 	movdqu	%xmm13,80(%rsi)
   1186 	leaq	96(%rsi),%rsi
   1187 
   1188 	subq	$6,%rdx
   1189 	jnc	L$ctr32_loop6
   1190 
   1191 	addq	$6,%rdx
   1192 	jz	L$ctr32_done
   1193 
   1194 	leal	-48(%r10),%eax
   1195 	leaq	-80(%rcx,%r10,1),%rcx
   1196 	negl	%eax
   1197 	shrl	$4,%eax
   1198 	jmp	L$ctr32_tail
   1199 
   1200 .p2align	5
   1201 L$ctr32_loop8:
   1202 	addl	$8,%r8d
   1203 	movdqa	96(%rsp),%xmm8
   1204 .byte	102,15,56,220,209
   1205 	movl	%r8d,%r9d
   1206 	movdqa	112(%rsp),%xmm9
   1207 .byte	102,15,56,220,217
   1208 	bswapl	%r9d
   1209 	movups	32-128(%rcx),%xmm0
   1210 .byte	102,15,56,220,225
   1211 	xorl	%ebp,%r9d
   1212 	nop
   1213 .byte	102,15,56,220,233
   1214 	movl	%r9d,0+12(%rsp)
   1215 	leaq	1(%r8),%r9
   1216 .byte	102,15,56,220,241
   1217 .byte	102,15,56,220,249
   1218 .byte	102,68,15,56,220,193
   1219 .byte	102,68,15,56,220,201
   1220 	movups	48-128(%rcx),%xmm1
   1221 	bswapl	%r9d
   1222 .byte	102,15,56,220,208
   1223 .byte	102,15,56,220,216
   1224 	xorl	%ebp,%r9d
   1225 .byte	0x66,0x90
   1226 .byte	102,15,56,220,224
   1227 .byte	102,15,56,220,232
   1228 	movl	%r9d,16+12(%rsp)
   1229 	leaq	2(%r8),%r9
   1230 .byte	102,15,56,220,240
   1231 .byte	102,15,56,220,248
   1232 .byte	102,68,15,56,220,192
   1233 .byte	102,68,15,56,220,200
   1234 	movups	64-128(%rcx),%xmm0
   1235 	bswapl	%r9d
   1236 .byte	102,15,56,220,209
   1237 .byte	102,15,56,220,217
   1238 	xorl	%ebp,%r9d
   1239 .byte	0x66,0x90
   1240 .byte	102,15,56,220,225
   1241 .byte	102,15,56,220,233
   1242 	movl	%r9d,32+12(%rsp)
   1243 	leaq	3(%r8),%r9
   1244 .byte	102,15,56,220,241
   1245 .byte	102,15,56,220,249
   1246 .byte	102,68,15,56,220,193
   1247 .byte	102,68,15,56,220,201
   1248 	movups	80-128(%rcx),%xmm1
   1249 	bswapl	%r9d
   1250 .byte	102,15,56,220,208
   1251 .byte	102,15,56,220,216
   1252 	xorl	%ebp,%r9d
   1253 .byte	0x66,0x90
   1254 .byte	102,15,56,220,224
   1255 .byte	102,15,56,220,232
   1256 	movl	%r9d,48+12(%rsp)
   1257 	leaq	4(%r8),%r9
   1258 .byte	102,15,56,220,240
   1259 .byte	102,15,56,220,248
   1260 .byte	102,68,15,56,220,192
   1261 .byte	102,68,15,56,220,200
   1262 	movups	96-128(%rcx),%xmm0
   1263 	bswapl	%r9d
   1264 .byte	102,15,56,220,209
   1265 .byte	102,15,56,220,217
   1266 	xorl	%ebp,%r9d
   1267 .byte	0x66,0x90
   1268 .byte	102,15,56,220,225
   1269 .byte	102,15,56,220,233
   1270 	movl	%r9d,64+12(%rsp)
   1271 	leaq	5(%r8),%r9
   1272 .byte	102,15,56,220,241
   1273 .byte	102,15,56,220,249
   1274 .byte	102,68,15,56,220,193
   1275 .byte	102,68,15,56,220,201
   1276 	movups	112-128(%rcx),%xmm1
   1277 	bswapl	%r9d
   1278 .byte	102,15,56,220,208
   1279 .byte	102,15,56,220,216
   1280 	xorl	%ebp,%r9d
   1281 .byte	0x66,0x90
   1282 .byte	102,15,56,220,224
   1283 .byte	102,15,56,220,232
   1284 	movl	%r9d,80+12(%rsp)
   1285 	leaq	6(%r8),%r9
   1286 .byte	102,15,56,220,240
   1287 .byte	102,15,56,220,248
   1288 .byte	102,68,15,56,220,192
   1289 .byte	102,68,15,56,220,200
   1290 	movups	128-128(%rcx),%xmm0
   1291 	bswapl	%r9d
   1292 .byte	102,15,56,220,209
   1293 .byte	102,15,56,220,217
   1294 	xorl	%ebp,%r9d
   1295 .byte	0x66,0x90
   1296 .byte	102,15,56,220,225
   1297 .byte	102,15,56,220,233
   1298 	movl	%r9d,96+12(%rsp)
   1299 	leaq	7(%r8),%r9
   1300 .byte	102,15,56,220,241
   1301 .byte	102,15,56,220,249
   1302 .byte	102,68,15,56,220,193
   1303 .byte	102,68,15,56,220,201
   1304 	movups	144-128(%rcx),%xmm1
   1305 	bswapl	%r9d
   1306 .byte	102,15,56,220,208
   1307 .byte	102,15,56,220,216
   1308 .byte	102,15,56,220,224
   1309 	xorl	%ebp,%r9d
   1310 	movdqu	0(%rdi),%xmm10
   1311 .byte	102,15,56,220,232
   1312 	movl	%r9d,112+12(%rsp)
   1313 	cmpl	$11,%eax
   1314 .byte	102,15,56,220,240
   1315 .byte	102,15,56,220,248
   1316 .byte	102,68,15,56,220,192
   1317 .byte	102,68,15,56,220,200
   1318 	movups	160-128(%rcx),%xmm0
   1319 
   1320 	jb	L$ctr32_enc_done
   1321 
   1322 .byte	102,15,56,220,209
   1323 .byte	102,15,56,220,217
   1324 .byte	102,15,56,220,225
   1325 .byte	102,15,56,220,233
   1326 .byte	102,15,56,220,241
   1327 .byte	102,15,56,220,249
   1328 .byte	102,68,15,56,220,193
   1329 .byte	102,68,15,56,220,201
   1330 	movups	176-128(%rcx),%xmm1
   1331 
   1332 .byte	102,15,56,220,208
   1333 .byte	102,15,56,220,216
   1334 .byte	102,15,56,220,224
   1335 .byte	102,15,56,220,232
   1336 .byte	102,15,56,220,240
   1337 .byte	102,15,56,220,248
   1338 .byte	102,68,15,56,220,192
   1339 .byte	102,68,15,56,220,200
   1340 	movups	192-128(%rcx),%xmm0
   1341 	je	L$ctr32_enc_done
   1342 
   1343 .byte	102,15,56,220,209
   1344 .byte	102,15,56,220,217
   1345 .byte	102,15,56,220,225
   1346 .byte	102,15,56,220,233
   1347 .byte	102,15,56,220,241
   1348 .byte	102,15,56,220,249
   1349 .byte	102,68,15,56,220,193
   1350 .byte	102,68,15,56,220,201
   1351 	movups	208-128(%rcx),%xmm1
   1352 
   1353 .byte	102,15,56,220,208
   1354 .byte	102,15,56,220,216
   1355 .byte	102,15,56,220,224
   1356 .byte	102,15,56,220,232
   1357 .byte	102,15,56,220,240
   1358 .byte	102,15,56,220,248
   1359 .byte	102,68,15,56,220,192
   1360 .byte	102,68,15,56,220,200
   1361 	movups	224-128(%rcx),%xmm0
   1362 	jmp	L$ctr32_enc_done
   1363 
   1364 .p2align	4
   1365 L$ctr32_enc_done:
   1366 	movdqu	16(%rdi),%xmm11
   1367 	pxor	%xmm0,%xmm10
   1368 	movdqu	32(%rdi),%xmm12
   1369 	pxor	%xmm0,%xmm11
   1370 	movdqu	48(%rdi),%xmm13
   1371 	pxor	%xmm0,%xmm12
   1372 	movdqu	64(%rdi),%xmm14
   1373 	pxor	%xmm0,%xmm13
   1374 	movdqu	80(%rdi),%xmm15
   1375 	pxor	%xmm0,%xmm14
   1376 	pxor	%xmm0,%xmm15
   1377 .byte	102,15,56,220,209
   1378 .byte	102,15,56,220,217
   1379 .byte	102,15,56,220,225
   1380 .byte	102,15,56,220,233
   1381 .byte	102,15,56,220,241
   1382 .byte	102,15,56,220,249
   1383 .byte	102,68,15,56,220,193
   1384 .byte	102,68,15,56,220,201
   1385 	movdqu	96(%rdi),%xmm1
   1386 	leaq	128(%rdi),%rdi
   1387 
   1388 .byte	102,65,15,56,221,210
   1389 	pxor	%xmm0,%xmm1
   1390 	movdqu	112-128(%rdi),%xmm10
   1391 .byte	102,65,15,56,221,219
   1392 	pxor	%xmm0,%xmm10
   1393 	movdqa	0(%rsp),%xmm11
   1394 .byte	102,65,15,56,221,228
   1395 .byte	102,65,15,56,221,237
   1396 	movdqa	16(%rsp),%xmm12
   1397 	movdqa	32(%rsp),%xmm13
   1398 .byte	102,65,15,56,221,246
   1399 .byte	102,65,15,56,221,255
   1400 	movdqa	48(%rsp),%xmm14
   1401 	movdqa	64(%rsp),%xmm15
   1402 .byte	102,68,15,56,221,193
   1403 	movdqa	80(%rsp),%xmm0
   1404 	movups	16-128(%rcx),%xmm1
   1405 .byte	102,69,15,56,221,202
   1406 
   1407 	movups	%xmm2,(%rsi)
   1408 	movdqa	%xmm11,%xmm2
   1409 	movups	%xmm3,16(%rsi)
   1410 	movdqa	%xmm12,%xmm3
   1411 	movups	%xmm4,32(%rsi)
   1412 	movdqa	%xmm13,%xmm4
   1413 	movups	%xmm5,48(%rsi)
   1414 	movdqa	%xmm14,%xmm5
   1415 	movups	%xmm6,64(%rsi)
   1416 	movdqa	%xmm15,%xmm6
   1417 	movups	%xmm7,80(%rsi)
   1418 	movdqa	%xmm0,%xmm7
   1419 	movups	%xmm8,96(%rsi)
   1420 	movups	%xmm9,112(%rsi)
   1421 	leaq	128(%rsi),%rsi
   1422 
   1423 	subq	$8,%rdx
   1424 	jnc	L$ctr32_loop8
   1425 
   1426 	addq	$8,%rdx
   1427 	jz	L$ctr32_done
   1428 	leaq	-128(%rcx),%rcx
   1429 
   1430 L$ctr32_tail:
   1431 
   1432 
   1433 	leaq	16(%rcx),%rcx
   1434 	cmpq	$4,%rdx
   1435 	jb	L$ctr32_loop3
   1436 	je	L$ctr32_loop4
   1437 
   1438 
   1439 	shll	$4,%eax
   1440 	movdqa	96(%rsp),%xmm8
   1441 	pxor	%xmm9,%xmm9
   1442 
   1443 	movups	16(%rcx),%xmm0
   1444 .byte	102,15,56,220,209
   1445 .byte	102,15,56,220,217
   1446 	leaq	32-16(%rcx,%rax,1),%rcx
   1447 	negq	%rax
   1448 .byte	102,15,56,220,225
   1449 	addq	$16,%rax
   1450 	movups	(%rdi),%xmm10
   1451 .byte	102,15,56,220,233
   1452 .byte	102,15,56,220,241
   1453 	movups	16(%rdi),%xmm11
   1454 	movups	32(%rdi),%xmm12
   1455 .byte	102,15,56,220,249
   1456 .byte	102,68,15,56,220,193
   1457 
   1458 	call	L$enc_loop8_enter
   1459 
   1460 	movdqu	48(%rdi),%xmm13
   1461 	pxor	%xmm10,%xmm2
   1462 	movdqu	64(%rdi),%xmm10
   1463 	pxor	%xmm11,%xmm3
   1464 	movdqu	%xmm2,(%rsi)
   1465 	pxor	%xmm12,%xmm4
   1466 	movdqu	%xmm3,16(%rsi)
   1467 	pxor	%xmm13,%xmm5
   1468 	movdqu	%xmm4,32(%rsi)
   1469 	pxor	%xmm10,%xmm6
   1470 	movdqu	%xmm5,48(%rsi)
   1471 	movdqu	%xmm6,64(%rsi)
   1472 	cmpq	$6,%rdx
   1473 	jb	L$ctr32_done
   1474 
   1475 	movups	80(%rdi),%xmm11
   1476 	xorps	%xmm11,%xmm7
   1477 	movups	%xmm7,80(%rsi)
   1478 	je	L$ctr32_done
   1479 
   1480 	movups	96(%rdi),%xmm12
   1481 	xorps	%xmm12,%xmm8
   1482 	movups	%xmm8,96(%rsi)
   1483 	jmp	L$ctr32_done
   1484 
   1485 .p2align	5
   1486 L$ctr32_loop4:
   1487 .byte	102,15,56,220,209
   1488 	leaq	16(%rcx),%rcx
   1489 	decl	%eax
   1490 .byte	102,15,56,220,217
   1491 .byte	102,15,56,220,225
   1492 .byte	102,15,56,220,233
   1493 	movups	(%rcx),%xmm1
   1494 	jnz	L$ctr32_loop4
   1495 .byte	102,15,56,221,209
   1496 .byte	102,15,56,221,217
   1497 	movups	(%rdi),%xmm10
   1498 	movups	16(%rdi),%xmm11
   1499 .byte	102,15,56,221,225
   1500 .byte	102,15,56,221,233
   1501 	movups	32(%rdi),%xmm12
   1502 	movups	48(%rdi),%xmm13
   1503 
   1504 	xorps	%xmm10,%xmm2
   1505 	movups	%xmm2,(%rsi)
   1506 	xorps	%xmm11,%xmm3
   1507 	movups	%xmm3,16(%rsi)
   1508 	pxor	%xmm12,%xmm4
   1509 	movdqu	%xmm4,32(%rsi)
   1510 	pxor	%xmm13,%xmm5
   1511 	movdqu	%xmm5,48(%rsi)
   1512 	jmp	L$ctr32_done
   1513 
   1514 .p2align	5
   1515 L$ctr32_loop3:
   1516 .byte	102,15,56,220,209
   1517 	leaq	16(%rcx),%rcx
   1518 	decl	%eax
   1519 .byte	102,15,56,220,217
   1520 .byte	102,15,56,220,225
   1521 	movups	(%rcx),%xmm1
   1522 	jnz	L$ctr32_loop3
   1523 .byte	102,15,56,221,209
   1524 .byte	102,15,56,221,217
   1525 .byte	102,15,56,221,225
   1526 
   1527 	movups	(%rdi),%xmm10
   1528 	xorps	%xmm10,%xmm2
   1529 	movups	%xmm2,(%rsi)
   1530 	cmpq	$2,%rdx
   1531 	jb	L$ctr32_done
   1532 
   1533 	movups	16(%rdi),%xmm11
   1534 	xorps	%xmm11,%xmm3
   1535 	movups	%xmm3,16(%rsi)
   1536 	je	L$ctr32_done
   1537 
   1538 	movups	32(%rdi),%xmm12
   1539 	xorps	%xmm12,%xmm4
   1540 	movups	%xmm4,32(%rsi)
   1541 
   1542 L$ctr32_done:
   1543 	xorps	%xmm0,%xmm0
   1544 	xorl	%ebp,%ebp
   1545 	pxor	%xmm1,%xmm1
   1546 	pxor	%xmm2,%xmm2
   1547 	pxor	%xmm3,%xmm3
   1548 	pxor	%xmm4,%xmm4
   1549 	pxor	%xmm5,%xmm5
   1550 	pxor	%xmm6,%xmm6
   1551 	pxor	%xmm7,%xmm7
   1552 	movaps	%xmm0,0(%rsp)
   1553 	pxor	%xmm8,%xmm8
   1554 	movaps	%xmm0,16(%rsp)
   1555 	pxor	%xmm9,%xmm9
   1556 	movaps	%xmm0,32(%rsp)
   1557 	pxor	%xmm10,%xmm10
   1558 	movaps	%xmm0,48(%rsp)
   1559 	pxor	%xmm11,%xmm11
   1560 	movaps	%xmm0,64(%rsp)
   1561 	pxor	%xmm12,%xmm12
   1562 	movaps	%xmm0,80(%rsp)
   1563 	pxor	%xmm13,%xmm13
   1564 	movaps	%xmm0,96(%rsp)
   1565 	pxor	%xmm14,%xmm14
   1566 	movaps	%xmm0,112(%rsp)
   1567 	pxor	%xmm15,%xmm15
   1568 	movq	-8(%r11),%rbp
   1569 	leaq	(%r11),%rsp
   1570 L$ctr32_epilogue:
   1571 	.byte	0xf3,0xc3
   1572 
   1573 .globl	_aesni_xts_encrypt
   1574 .private_extern _aesni_xts_encrypt
   1575 
   1576 .p2align	4
   1577 _aesni_xts_encrypt:
   1578 	leaq	(%rsp),%r11
   1579 	pushq	%rbp
   1580 	subq	$112,%rsp
   1581 	andq	$-16,%rsp
   1582 	movups	(%r9),%xmm2
   1583 	movl	240(%r8),%eax
   1584 	movl	240(%rcx),%r10d
   1585 	movups	(%r8),%xmm0
   1586 	movups	16(%r8),%xmm1
   1587 	leaq	32(%r8),%r8
   1588 	xorps	%xmm0,%xmm2
   1589 L$oop_enc1_8:
   1590 .byte	102,15,56,220,209
   1591 	decl	%eax
   1592 	movups	(%r8),%xmm1
   1593 	leaq	16(%r8),%r8
   1594 	jnz	L$oop_enc1_8
   1595 .byte	102,15,56,221,209
   1596 	movups	(%rcx),%xmm0
   1597 	movq	%rcx,%rbp
   1598 	movl	%r10d,%eax
   1599 	shll	$4,%r10d
   1600 	movq	%rdx,%r9
   1601 	andq	$-16,%rdx
   1602 
   1603 	movups	16(%rcx,%r10,1),%xmm1
   1604 
   1605 	movdqa	L$xts_magic(%rip),%xmm8
   1606 	movdqa	%xmm2,%xmm15
   1607 	pshufd	$0x5f,%xmm2,%xmm9
   1608 	pxor	%xmm0,%xmm1
   1609 	movdqa	%xmm9,%xmm14
   1610 	paddd	%xmm9,%xmm9
   1611 	movdqa	%xmm15,%xmm10
   1612 	psrad	$31,%xmm14
   1613 	paddq	%xmm15,%xmm15
   1614 	pand	%xmm8,%xmm14
   1615 	pxor	%xmm0,%xmm10
   1616 	pxor	%xmm14,%xmm15
   1617 	movdqa	%xmm9,%xmm14
   1618 	paddd	%xmm9,%xmm9
   1619 	movdqa	%xmm15,%xmm11
   1620 	psrad	$31,%xmm14
   1621 	paddq	%xmm15,%xmm15
   1622 	pand	%xmm8,%xmm14
   1623 	pxor	%xmm0,%xmm11
   1624 	pxor	%xmm14,%xmm15
   1625 	movdqa	%xmm9,%xmm14
   1626 	paddd	%xmm9,%xmm9
   1627 	movdqa	%xmm15,%xmm12
   1628 	psrad	$31,%xmm14
   1629 	paddq	%xmm15,%xmm15
   1630 	pand	%xmm8,%xmm14
   1631 	pxor	%xmm0,%xmm12
   1632 	pxor	%xmm14,%xmm15
   1633 	movdqa	%xmm9,%xmm14
   1634 	paddd	%xmm9,%xmm9
   1635 	movdqa	%xmm15,%xmm13
   1636 	psrad	$31,%xmm14
   1637 	paddq	%xmm15,%xmm15
   1638 	pand	%xmm8,%xmm14
   1639 	pxor	%xmm0,%xmm13
   1640 	pxor	%xmm14,%xmm15
   1641 	movdqa	%xmm15,%xmm14
   1642 	psrad	$31,%xmm9
   1643 	paddq	%xmm15,%xmm15
   1644 	pand	%xmm8,%xmm9
   1645 	pxor	%xmm0,%xmm14
   1646 	pxor	%xmm9,%xmm15
   1647 	movaps	%xmm1,96(%rsp)
   1648 
   1649 	subq	$96,%rdx
   1650 	jc	L$xts_enc_short
   1651 
   1652 	movl	$16+96,%eax
   1653 	leaq	32(%rbp,%r10,1),%rcx
   1654 	subq	%r10,%rax
   1655 	movups	16(%rbp),%xmm1
   1656 	movq	%rax,%r10
   1657 	leaq	L$xts_magic(%rip),%r8
   1658 	jmp	L$xts_enc_grandloop
   1659 
   1660 .p2align	5
   1661 L$xts_enc_grandloop:
   1662 	movdqu	0(%rdi),%xmm2
   1663 	movdqa	%xmm0,%xmm8
   1664 	movdqu	16(%rdi),%xmm3
   1665 	pxor	%xmm10,%xmm2
   1666 	movdqu	32(%rdi),%xmm4
   1667 	pxor	%xmm11,%xmm3
   1668 .byte	102,15,56,220,209
   1669 	movdqu	48(%rdi),%xmm5
   1670 	pxor	%xmm12,%xmm4
   1671 .byte	102,15,56,220,217
   1672 	movdqu	64(%rdi),%xmm6
   1673 	pxor	%xmm13,%xmm5
   1674 .byte	102,15,56,220,225
   1675 	movdqu	80(%rdi),%xmm7
   1676 	pxor	%xmm15,%xmm8
   1677 	movdqa	96(%rsp),%xmm9
   1678 	pxor	%xmm14,%xmm6
   1679 .byte	102,15,56,220,233
   1680 	movups	32(%rbp),%xmm0
   1681 	leaq	96(%rdi),%rdi
   1682 	pxor	%xmm8,%xmm7
   1683 
   1684 	pxor	%xmm9,%xmm10
   1685 .byte	102,15,56,220,241
   1686 	pxor	%xmm9,%xmm11
   1687 	movdqa	%xmm10,0(%rsp)
   1688 .byte	102,15,56,220,249
   1689 	movups	48(%rbp),%xmm1
   1690 	pxor	%xmm9,%xmm12
   1691 
   1692 .byte	102,15,56,220,208
   1693 	pxor	%xmm9,%xmm13
   1694 	movdqa	%xmm11,16(%rsp)
   1695 .byte	102,15,56,220,216
   1696 	pxor	%xmm9,%xmm14
   1697 	movdqa	%xmm12,32(%rsp)
   1698 .byte	102,15,56,220,224
   1699 .byte	102,15,56,220,232
   1700 	pxor	%xmm9,%xmm8
   1701 	movdqa	%xmm14,64(%rsp)
   1702 .byte	102,15,56,220,240
   1703 .byte	102,15,56,220,248
   1704 	movups	64(%rbp),%xmm0
   1705 	movdqa	%xmm8,80(%rsp)
   1706 	pshufd	$0x5f,%xmm15,%xmm9
   1707 	jmp	L$xts_enc_loop6
   1708 .p2align	5
   1709 L$xts_enc_loop6:
   1710 .byte	102,15,56,220,209
   1711 .byte	102,15,56,220,217
   1712 .byte	102,15,56,220,225
   1713 .byte	102,15,56,220,233
   1714 .byte	102,15,56,220,241
   1715 .byte	102,15,56,220,249
   1716 	movups	-64(%rcx,%rax,1),%xmm1
   1717 	addq	$32,%rax
   1718 
   1719 .byte	102,15,56,220,208
   1720 .byte	102,15,56,220,216
   1721 .byte	102,15,56,220,224
   1722 .byte	102,15,56,220,232
   1723 .byte	102,15,56,220,240
   1724 .byte	102,15,56,220,248
   1725 	movups	-80(%rcx,%rax,1),%xmm0
   1726 	jnz	L$xts_enc_loop6
   1727 
   1728 	movdqa	(%r8),%xmm8
   1729 	movdqa	%xmm9,%xmm14
   1730 	paddd	%xmm9,%xmm9
   1731 .byte	102,15,56,220,209
   1732 	paddq	%xmm15,%xmm15
   1733 	psrad	$31,%xmm14
   1734 .byte	102,15,56,220,217
   1735 	pand	%xmm8,%xmm14
   1736 	movups	(%rbp),%xmm10
   1737 .byte	102,15,56,220,225
   1738 .byte	102,15,56,220,233
   1739 .byte	102,15,56,220,241
   1740 	pxor	%xmm14,%xmm15
   1741 	movaps	%xmm10,%xmm11
   1742 .byte	102,15,56,220,249
   1743 	movups	-64(%rcx),%xmm1
   1744 
   1745 	movdqa	%xmm9,%xmm14
   1746 .byte	102,15,56,220,208
   1747 	paddd	%xmm9,%xmm9
   1748 	pxor	%xmm15,%xmm10
   1749 .byte	102,15,56,220,216
   1750 	psrad	$31,%xmm14
   1751 	paddq	%xmm15,%xmm15
   1752 .byte	102,15,56,220,224
   1753 .byte	102,15,56,220,232
   1754 	pand	%xmm8,%xmm14
   1755 	movaps	%xmm11,%xmm12
   1756 .byte	102,15,56,220,240
   1757 	pxor	%xmm14,%xmm15
   1758 	movdqa	%xmm9,%xmm14
   1759 .byte	102,15,56,220,248
   1760 	movups	-48(%rcx),%xmm0
   1761 
   1762 	paddd	%xmm9,%xmm9
   1763 .byte	102,15,56,220,209
   1764 	pxor	%xmm15,%xmm11
   1765 	psrad	$31,%xmm14
   1766 .byte	102,15,56,220,217
   1767 	paddq	%xmm15,%xmm15
   1768 	pand	%xmm8,%xmm14
   1769 .byte	102,15,56,220,225
   1770 .byte	102,15,56,220,233
   1771 	movdqa	%xmm13,48(%rsp)
   1772 	pxor	%xmm14,%xmm15
   1773 .byte	102,15,56,220,241
   1774 	movaps	%xmm12,%xmm13
   1775 	movdqa	%xmm9,%xmm14
   1776 .byte	102,15,56,220,249
   1777 	movups	-32(%rcx),%xmm1
   1778 
   1779 	paddd	%xmm9,%xmm9
   1780 .byte	102,15,56,220,208
   1781 	pxor	%xmm15,%xmm12
   1782 	psrad	$31,%xmm14
   1783 .byte	102,15,56,220,216
   1784 	paddq	%xmm15,%xmm15
   1785 	pand	%xmm8,%xmm14
   1786 .byte	102,15,56,220,224
   1787 .byte	102,15,56,220,232
   1788 .byte	102,15,56,220,240
   1789 	pxor	%xmm14,%xmm15
   1790 	movaps	%xmm13,%xmm14
   1791 .byte	102,15,56,220,248
   1792 
   1793 	movdqa	%xmm9,%xmm0
   1794 	paddd	%xmm9,%xmm9
   1795 .byte	102,15,56,220,209
   1796 	pxor	%xmm15,%xmm13
   1797 	psrad	$31,%xmm0
   1798 .byte	102,15,56,220,217
   1799 	paddq	%xmm15,%xmm15
   1800 	pand	%xmm8,%xmm0
   1801 .byte	102,15,56,220,225
   1802 .byte	102,15,56,220,233
   1803 	pxor	%xmm0,%xmm15
   1804 	movups	(%rbp),%xmm0
   1805 .byte	102,15,56,220,241
   1806 .byte	102,15,56,220,249
   1807 	movups	16(%rbp),%xmm1
   1808 
   1809 	pxor	%xmm15,%xmm14
   1810 .byte	102,15,56,221,84,36,0
   1811 	psrad	$31,%xmm9
   1812 	paddq	%xmm15,%xmm15
   1813 .byte	102,15,56,221,92,36,16
   1814 .byte	102,15,56,221,100,36,32
   1815 	pand	%xmm8,%xmm9
   1816 	movq	%r10,%rax
   1817 .byte	102,15,56,221,108,36,48
   1818 .byte	102,15,56,221,116,36,64
   1819 .byte	102,15,56,221,124,36,80
   1820 	pxor	%xmm9,%xmm15
   1821 
   1822 	leaq	96(%rsi),%rsi
   1823 	movups	%xmm2,-96(%rsi)
   1824 	movups	%xmm3,-80(%rsi)
   1825 	movups	%xmm4,-64(%rsi)
   1826 	movups	%xmm5,-48(%rsi)
   1827 	movups	%xmm6,-32(%rsi)
   1828 	movups	%xmm7,-16(%rsi)
   1829 	subq	$96,%rdx
   1830 	jnc	L$xts_enc_grandloop
   1831 
   1832 	movl	$16+96,%eax
   1833 	subl	%r10d,%eax
   1834 	movq	%rbp,%rcx
   1835 	shrl	$4,%eax
   1836 
   1837 L$xts_enc_short:
   1838 
   1839 	movl	%eax,%r10d
   1840 	pxor	%xmm0,%xmm10
   1841 	addq	$96,%rdx
   1842 	jz	L$xts_enc_done
   1843 
   1844 	pxor	%xmm0,%xmm11
   1845 	cmpq	$0x20,%rdx
   1846 	jb	L$xts_enc_one
   1847 	pxor	%xmm0,%xmm12
   1848 	je	L$xts_enc_two
   1849 
   1850 	pxor	%xmm0,%xmm13
   1851 	cmpq	$0x40,%rdx
   1852 	jb	L$xts_enc_three
   1853 	pxor	%xmm0,%xmm14
   1854 	je	L$xts_enc_four
   1855 
   1856 	movdqu	(%rdi),%xmm2
   1857 	movdqu	16(%rdi),%xmm3
   1858 	movdqu	32(%rdi),%xmm4
   1859 	pxor	%xmm10,%xmm2
   1860 	movdqu	48(%rdi),%xmm5
   1861 	pxor	%xmm11,%xmm3
   1862 	movdqu	64(%rdi),%xmm6
   1863 	leaq	80(%rdi),%rdi
   1864 	pxor	%xmm12,%xmm4
   1865 	pxor	%xmm13,%xmm5
   1866 	pxor	%xmm14,%xmm6
   1867 	pxor	%xmm7,%xmm7
   1868 
   1869 	call	_aesni_encrypt6
   1870 
   1871 	xorps	%xmm10,%xmm2
   1872 	movdqa	%xmm15,%xmm10
   1873 	xorps	%xmm11,%xmm3
   1874 	xorps	%xmm12,%xmm4
   1875 	movdqu	%xmm2,(%rsi)
   1876 	xorps	%xmm13,%xmm5
   1877 	movdqu	%xmm3,16(%rsi)
   1878 	xorps	%xmm14,%xmm6
   1879 	movdqu	%xmm4,32(%rsi)
   1880 	movdqu	%xmm5,48(%rsi)
   1881 	movdqu	%xmm6,64(%rsi)
   1882 	leaq	80(%rsi),%rsi
   1883 	jmp	L$xts_enc_done
   1884 
   1885 .p2align	4
   1886 L$xts_enc_one:
   1887 	movups	(%rdi),%xmm2
   1888 	leaq	16(%rdi),%rdi
   1889 	xorps	%xmm10,%xmm2
   1890 	movups	(%rcx),%xmm0
   1891 	movups	16(%rcx),%xmm1
   1892 	leaq	32(%rcx),%rcx
   1893 	xorps	%xmm0,%xmm2
   1894 L$oop_enc1_9:
   1895 .byte	102,15,56,220,209
   1896 	decl	%eax
   1897 	movups	(%rcx),%xmm1
   1898 	leaq	16(%rcx),%rcx
   1899 	jnz	L$oop_enc1_9
   1900 .byte	102,15,56,221,209
   1901 	xorps	%xmm10,%xmm2
   1902 	movdqa	%xmm11,%xmm10
   1903 	movups	%xmm2,(%rsi)
   1904 	leaq	16(%rsi),%rsi
   1905 	jmp	L$xts_enc_done
   1906 
   1907 .p2align	4
   1908 L$xts_enc_two:
   1909 	movups	(%rdi),%xmm2
   1910 	movups	16(%rdi),%xmm3
   1911 	leaq	32(%rdi),%rdi
   1912 	xorps	%xmm10,%xmm2
   1913 	xorps	%xmm11,%xmm3
   1914 
   1915 	call	_aesni_encrypt2
   1916 
   1917 	xorps	%xmm10,%xmm2
   1918 	movdqa	%xmm12,%xmm10
   1919 	xorps	%xmm11,%xmm3
   1920 	movups	%xmm2,(%rsi)
   1921 	movups	%xmm3,16(%rsi)
   1922 	leaq	32(%rsi),%rsi
   1923 	jmp	L$xts_enc_done
   1924 
   1925 .p2align	4
   1926 L$xts_enc_three:
   1927 	movups	(%rdi),%xmm2
   1928 	movups	16(%rdi),%xmm3
   1929 	movups	32(%rdi),%xmm4
   1930 	leaq	48(%rdi),%rdi
   1931 	xorps	%xmm10,%xmm2
   1932 	xorps	%xmm11,%xmm3
   1933 	xorps	%xmm12,%xmm4
   1934 
   1935 	call	_aesni_encrypt3
   1936 
   1937 	xorps	%xmm10,%xmm2
   1938 	movdqa	%xmm13,%xmm10
   1939 	xorps	%xmm11,%xmm3
   1940 	xorps	%xmm12,%xmm4
   1941 	movups	%xmm2,(%rsi)
   1942 	movups	%xmm3,16(%rsi)
   1943 	movups	%xmm4,32(%rsi)
   1944 	leaq	48(%rsi),%rsi
   1945 	jmp	L$xts_enc_done
   1946 
   1947 .p2align	4
   1948 L$xts_enc_four:
   1949 	movups	(%rdi),%xmm2
   1950 	movups	16(%rdi),%xmm3
   1951 	movups	32(%rdi),%xmm4
   1952 	xorps	%xmm10,%xmm2
   1953 	movups	48(%rdi),%xmm5
   1954 	leaq	64(%rdi),%rdi
   1955 	xorps	%xmm11,%xmm3
   1956 	xorps	%xmm12,%xmm4
   1957 	xorps	%xmm13,%xmm5
   1958 
   1959 	call	_aesni_encrypt4
   1960 
   1961 	pxor	%xmm10,%xmm2
   1962 	movdqa	%xmm14,%xmm10
   1963 	pxor	%xmm11,%xmm3
   1964 	pxor	%xmm12,%xmm4
   1965 	movdqu	%xmm2,(%rsi)
   1966 	pxor	%xmm13,%xmm5
   1967 	movdqu	%xmm3,16(%rsi)
   1968 	movdqu	%xmm4,32(%rsi)
   1969 	movdqu	%xmm5,48(%rsi)
   1970 	leaq	64(%rsi),%rsi
   1971 	jmp	L$xts_enc_done
   1972 
   1973 .p2align	4
   1974 L$xts_enc_done:
   1975 	andq	$15,%r9
   1976 	jz	L$xts_enc_ret
   1977 	movq	%r9,%rdx
   1978 
   1979 L$xts_enc_steal:
   1980 	movzbl	(%rdi),%eax
   1981 	movzbl	-16(%rsi),%ecx
   1982 	leaq	1(%rdi),%rdi
   1983 	movb	%al,-16(%rsi)
   1984 	movb	%cl,0(%rsi)
   1985 	leaq	1(%rsi),%rsi
   1986 	subq	$1,%rdx
   1987 	jnz	L$xts_enc_steal
   1988 
   1989 	subq	%r9,%rsi
   1990 	movq	%rbp,%rcx
   1991 	movl	%r10d,%eax
   1992 
   1993 	movups	-16(%rsi),%xmm2
   1994 	xorps	%xmm10,%xmm2
   1995 	movups	(%rcx),%xmm0
   1996 	movups	16(%rcx),%xmm1
   1997 	leaq	32(%rcx),%rcx
   1998 	xorps	%xmm0,%xmm2
   1999 L$oop_enc1_10:
   2000 .byte	102,15,56,220,209
   2001 	decl	%eax
   2002 	movups	(%rcx),%xmm1
   2003 	leaq	16(%rcx),%rcx
   2004 	jnz	L$oop_enc1_10
   2005 .byte	102,15,56,221,209
   2006 	xorps	%xmm10,%xmm2
   2007 	movups	%xmm2,-16(%rsi)
   2008 
   2009 L$xts_enc_ret:
   2010 	xorps	%xmm0,%xmm0
   2011 	pxor	%xmm1,%xmm1
   2012 	pxor	%xmm2,%xmm2
   2013 	pxor	%xmm3,%xmm3
   2014 	pxor	%xmm4,%xmm4
   2015 	pxor	%xmm5,%xmm5
   2016 	pxor	%xmm6,%xmm6
   2017 	pxor	%xmm7,%xmm7
   2018 	movaps	%xmm0,0(%rsp)
   2019 	pxor	%xmm8,%xmm8
   2020 	movaps	%xmm0,16(%rsp)
   2021 	pxor	%xmm9,%xmm9
   2022 	movaps	%xmm0,32(%rsp)
   2023 	pxor	%xmm10,%xmm10
   2024 	movaps	%xmm0,48(%rsp)
   2025 	pxor	%xmm11,%xmm11
   2026 	movaps	%xmm0,64(%rsp)
   2027 	pxor	%xmm12,%xmm12
   2028 	movaps	%xmm0,80(%rsp)
   2029 	pxor	%xmm13,%xmm13
   2030 	movaps	%xmm0,96(%rsp)
   2031 	pxor	%xmm14,%xmm14
   2032 	pxor	%xmm15,%xmm15
   2033 	movq	-8(%r11),%rbp
   2034 	leaq	(%r11),%rsp
   2035 L$xts_enc_epilogue:
   2036 	.byte	0xf3,0xc3
   2037 
   2038 .globl	_aesni_xts_decrypt
   2039 .private_extern _aesni_xts_decrypt
   2040 
   2041 .p2align	4
   2042 _aesni_xts_decrypt:
   2043 	leaq	(%rsp),%r11
   2044 	pushq	%rbp
   2045 	subq	$112,%rsp
   2046 	andq	$-16,%rsp
   2047 	movups	(%r9),%xmm2
   2048 	movl	240(%r8),%eax
   2049 	movl	240(%rcx),%r10d
   2050 	movups	(%r8),%xmm0
   2051 	movups	16(%r8),%xmm1
   2052 	leaq	32(%r8),%r8
   2053 	xorps	%xmm0,%xmm2
   2054 L$oop_enc1_11:
   2055 .byte	102,15,56,220,209
   2056 	decl	%eax
   2057 	movups	(%r8),%xmm1
   2058 	leaq	16(%r8),%r8
   2059 	jnz	L$oop_enc1_11
   2060 .byte	102,15,56,221,209
   2061 	xorl	%eax,%eax
   2062 	testq	$15,%rdx
   2063 	setnz	%al
   2064 	shlq	$4,%rax
   2065 	subq	%rax,%rdx
   2066 
   2067 	movups	(%rcx),%xmm0
   2068 	movq	%rcx,%rbp
   2069 	movl	%r10d,%eax
   2070 	shll	$4,%r10d
   2071 	movq	%rdx,%r9
   2072 	andq	$-16,%rdx
   2073 
   2074 	movups	16(%rcx,%r10,1),%xmm1
   2075 
   2076 	movdqa	L$xts_magic(%rip),%xmm8
   2077 	movdqa	%xmm2,%xmm15
   2078 	pshufd	$0x5f,%xmm2,%xmm9
   2079 	pxor	%xmm0,%xmm1
   2080 	movdqa	%xmm9,%xmm14
   2081 	paddd	%xmm9,%xmm9
   2082 	movdqa	%xmm15,%xmm10
   2083 	psrad	$31,%xmm14
   2084 	paddq	%xmm15,%xmm15
   2085 	pand	%xmm8,%xmm14
   2086 	pxor	%xmm0,%xmm10
   2087 	pxor	%xmm14,%xmm15
   2088 	movdqa	%xmm9,%xmm14
   2089 	paddd	%xmm9,%xmm9
   2090 	movdqa	%xmm15,%xmm11
   2091 	psrad	$31,%xmm14
   2092 	paddq	%xmm15,%xmm15
   2093 	pand	%xmm8,%xmm14
   2094 	pxor	%xmm0,%xmm11
   2095 	pxor	%xmm14,%xmm15
   2096 	movdqa	%xmm9,%xmm14
   2097 	paddd	%xmm9,%xmm9
   2098 	movdqa	%xmm15,%xmm12
   2099 	psrad	$31,%xmm14
   2100 	paddq	%xmm15,%xmm15
   2101 	pand	%xmm8,%xmm14
   2102 	pxor	%xmm0,%xmm12
   2103 	pxor	%xmm14,%xmm15
   2104 	movdqa	%xmm9,%xmm14
   2105 	paddd	%xmm9,%xmm9
   2106 	movdqa	%xmm15,%xmm13
   2107 	psrad	$31,%xmm14
   2108 	paddq	%xmm15,%xmm15
   2109 	pand	%xmm8,%xmm14
   2110 	pxor	%xmm0,%xmm13
   2111 	pxor	%xmm14,%xmm15
   2112 	movdqa	%xmm15,%xmm14
   2113 	psrad	$31,%xmm9
   2114 	paddq	%xmm15,%xmm15
   2115 	pand	%xmm8,%xmm9
   2116 	pxor	%xmm0,%xmm14
   2117 	pxor	%xmm9,%xmm15
   2118 	movaps	%xmm1,96(%rsp)
   2119 
   2120 	subq	$96,%rdx
   2121 	jc	L$xts_dec_short
   2122 
   2123 	movl	$16+96,%eax
   2124 	leaq	32(%rbp,%r10,1),%rcx
   2125 	subq	%r10,%rax
   2126 	movups	16(%rbp),%xmm1
   2127 	movq	%rax,%r10
   2128 	leaq	L$xts_magic(%rip),%r8
   2129 	jmp	L$xts_dec_grandloop
   2130 
   2131 .p2align	5
   2132 L$xts_dec_grandloop:
   2133 	movdqu	0(%rdi),%xmm2
   2134 	movdqa	%xmm0,%xmm8
   2135 	movdqu	16(%rdi),%xmm3
   2136 	pxor	%xmm10,%xmm2
   2137 	movdqu	32(%rdi),%xmm4
   2138 	pxor	%xmm11,%xmm3
   2139 .byte	102,15,56,222,209
   2140 	movdqu	48(%rdi),%xmm5
   2141 	pxor	%xmm12,%xmm4
   2142 .byte	102,15,56,222,217
   2143 	movdqu	64(%rdi),%xmm6
   2144 	pxor	%xmm13,%xmm5
   2145 .byte	102,15,56,222,225
   2146 	movdqu	80(%rdi),%xmm7
   2147 	pxor	%xmm15,%xmm8
   2148 	movdqa	96(%rsp),%xmm9
   2149 	pxor	%xmm14,%xmm6
   2150 .byte	102,15,56,222,233
   2151 	movups	32(%rbp),%xmm0
   2152 	leaq	96(%rdi),%rdi
   2153 	pxor	%xmm8,%xmm7
   2154 
   2155 	pxor	%xmm9,%xmm10
   2156 .byte	102,15,56,222,241
   2157 	pxor	%xmm9,%xmm11
   2158 	movdqa	%xmm10,0(%rsp)
   2159 .byte	102,15,56,222,249
   2160 	movups	48(%rbp),%xmm1
   2161 	pxor	%xmm9,%xmm12
   2162 
   2163 .byte	102,15,56,222,208
   2164 	pxor	%xmm9,%xmm13
   2165 	movdqa	%xmm11,16(%rsp)
   2166 .byte	102,15,56,222,216
   2167 	pxor	%xmm9,%xmm14
   2168 	movdqa	%xmm12,32(%rsp)
   2169 .byte	102,15,56,222,224
   2170 .byte	102,15,56,222,232
   2171 	pxor	%xmm9,%xmm8
   2172 	movdqa	%xmm14,64(%rsp)
   2173 .byte	102,15,56,222,240
   2174 .byte	102,15,56,222,248
   2175 	movups	64(%rbp),%xmm0
   2176 	movdqa	%xmm8,80(%rsp)
   2177 	pshufd	$0x5f,%xmm15,%xmm9
   2178 	jmp	L$xts_dec_loop6
   2179 .p2align	5
   2180 L$xts_dec_loop6:
   2181 .byte	102,15,56,222,209
   2182 .byte	102,15,56,222,217
   2183 .byte	102,15,56,222,225
   2184 .byte	102,15,56,222,233
   2185 .byte	102,15,56,222,241
   2186 .byte	102,15,56,222,249
   2187 	movups	-64(%rcx,%rax,1),%xmm1
   2188 	addq	$32,%rax
   2189 
   2190 .byte	102,15,56,222,208
   2191 .byte	102,15,56,222,216
   2192 .byte	102,15,56,222,224
   2193 .byte	102,15,56,222,232
   2194 .byte	102,15,56,222,240
   2195 .byte	102,15,56,222,248
   2196 	movups	-80(%rcx,%rax,1),%xmm0
   2197 	jnz	L$xts_dec_loop6
   2198 
   2199 	movdqa	(%r8),%xmm8
   2200 	movdqa	%xmm9,%xmm14
   2201 	paddd	%xmm9,%xmm9
   2202 .byte	102,15,56,222,209
   2203 	paddq	%xmm15,%xmm15
   2204 	psrad	$31,%xmm14
   2205 .byte	102,15,56,222,217
   2206 	pand	%xmm8,%xmm14
   2207 	movups	(%rbp),%xmm10
   2208 .byte	102,15,56,222,225
   2209 .byte	102,15,56,222,233
   2210 .byte	102,15,56,222,241
   2211 	pxor	%xmm14,%xmm15
   2212 	movaps	%xmm10,%xmm11
   2213 .byte	102,15,56,222,249
   2214 	movups	-64(%rcx),%xmm1
   2215 
   2216 	movdqa	%xmm9,%xmm14
   2217 .byte	102,15,56,222,208
   2218 	paddd	%xmm9,%xmm9
   2219 	pxor	%xmm15,%xmm10
   2220 .byte	102,15,56,222,216
   2221 	psrad	$31,%xmm14
   2222 	paddq	%xmm15,%xmm15
   2223 .byte	102,15,56,222,224
   2224 .byte	102,15,56,222,232
   2225 	pand	%xmm8,%xmm14
   2226 	movaps	%xmm11,%xmm12
   2227 .byte	102,15,56,222,240
   2228 	pxor	%xmm14,%xmm15
   2229 	movdqa	%xmm9,%xmm14
   2230 .byte	102,15,56,222,248
   2231 	movups	-48(%rcx),%xmm0
   2232 
   2233 	paddd	%xmm9,%xmm9
   2234 .byte	102,15,56,222,209
   2235 	pxor	%xmm15,%xmm11
   2236 	psrad	$31,%xmm14
   2237 .byte	102,15,56,222,217
   2238 	paddq	%xmm15,%xmm15
   2239 	pand	%xmm8,%xmm14
   2240 .byte	102,15,56,222,225
   2241 .byte	102,15,56,222,233
   2242 	movdqa	%xmm13,48(%rsp)
   2243 	pxor	%xmm14,%xmm15
   2244 .byte	102,15,56,222,241
   2245 	movaps	%xmm12,%xmm13
   2246 	movdqa	%xmm9,%xmm14
   2247 .byte	102,15,56,222,249
   2248 	movups	-32(%rcx),%xmm1
   2249 
   2250 	paddd	%xmm9,%xmm9
   2251 .byte	102,15,56,222,208
   2252 	pxor	%xmm15,%xmm12
   2253 	psrad	$31,%xmm14
   2254 .byte	102,15,56,222,216
   2255 	paddq	%xmm15,%xmm15
   2256 	pand	%xmm8,%xmm14
   2257 .byte	102,15,56,222,224
   2258 .byte	102,15,56,222,232
   2259 .byte	102,15,56,222,240
   2260 	pxor	%xmm14,%xmm15
   2261 	movaps	%xmm13,%xmm14
   2262 .byte	102,15,56,222,248
   2263 
   2264 	movdqa	%xmm9,%xmm0
   2265 	paddd	%xmm9,%xmm9
   2266 .byte	102,15,56,222,209
   2267 	pxor	%xmm15,%xmm13
   2268 	psrad	$31,%xmm0
   2269 .byte	102,15,56,222,217
   2270 	paddq	%xmm15,%xmm15
   2271 	pand	%xmm8,%xmm0
   2272 .byte	102,15,56,222,225
   2273 .byte	102,15,56,222,233
   2274 	pxor	%xmm0,%xmm15
   2275 	movups	(%rbp),%xmm0
   2276 .byte	102,15,56,222,241
   2277 .byte	102,15,56,222,249
   2278 	movups	16(%rbp),%xmm1
   2279 
   2280 	pxor	%xmm15,%xmm14
   2281 .byte	102,15,56,223,84,36,0
   2282 	psrad	$31,%xmm9
   2283 	paddq	%xmm15,%xmm15
   2284 .byte	102,15,56,223,92,36,16
   2285 .byte	102,15,56,223,100,36,32
   2286 	pand	%xmm8,%xmm9
   2287 	movq	%r10,%rax
   2288 .byte	102,15,56,223,108,36,48
   2289 .byte	102,15,56,223,116,36,64
   2290 .byte	102,15,56,223,124,36,80
   2291 	pxor	%xmm9,%xmm15
   2292 
   2293 	leaq	96(%rsi),%rsi
   2294 	movups	%xmm2,-96(%rsi)
   2295 	movups	%xmm3,-80(%rsi)
   2296 	movups	%xmm4,-64(%rsi)
   2297 	movups	%xmm5,-48(%rsi)
   2298 	movups	%xmm6,-32(%rsi)
   2299 	movups	%xmm7,-16(%rsi)
   2300 	subq	$96,%rdx
   2301 	jnc	L$xts_dec_grandloop
   2302 
   2303 	movl	$16+96,%eax
   2304 	subl	%r10d,%eax
   2305 	movq	%rbp,%rcx
   2306 	shrl	$4,%eax
   2307 
   2308 L$xts_dec_short:
   2309 
   2310 	movl	%eax,%r10d
   2311 	pxor	%xmm0,%xmm10
   2312 	pxor	%xmm0,%xmm11
   2313 	addq	$96,%rdx
   2314 	jz	L$xts_dec_done
   2315 
   2316 	pxor	%xmm0,%xmm12
   2317 	cmpq	$0x20,%rdx
   2318 	jb	L$xts_dec_one
   2319 	pxor	%xmm0,%xmm13
   2320 	je	L$xts_dec_two
   2321 
   2322 	pxor	%xmm0,%xmm14
   2323 	cmpq	$0x40,%rdx
   2324 	jb	L$xts_dec_three
   2325 	je	L$xts_dec_four
   2326 
   2327 	movdqu	(%rdi),%xmm2
   2328 	movdqu	16(%rdi),%xmm3
   2329 	movdqu	32(%rdi),%xmm4
   2330 	pxor	%xmm10,%xmm2
   2331 	movdqu	48(%rdi),%xmm5
   2332 	pxor	%xmm11,%xmm3
   2333 	movdqu	64(%rdi),%xmm6
   2334 	leaq	80(%rdi),%rdi
   2335 	pxor	%xmm12,%xmm4
   2336 	pxor	%xmm13,%xmm5
   2337 	pxor	%xmm14,%xmm6
   2338 
   2339 	call	_aesni_decrypt6
   2340 
   2341 	xorps	%xmm10,%xmm2
   2342 	xorps	%xmm11,%xmm3
   2343 	xorps	%xmm12,%xmm4
   2344 	movdqu	%xmm2,(%rsi)
   2345 	xorps	%xmm13,%xmm5
   2346 	movdqu	%xmm3,16(%rsi)
   2347 	xorps	%xmm14,%xmm6
   2348 	movdqu	%xmm4,32(%rsi)
   2349 	pxor	%xmm14,%xmm14
   2350 	movdqu	%xmm5,48(%rsi)
   2351 	pcmpgtd	%xmm15,%xmm14
   2352 	movdqu	%xmm6,64(%rsi)
   2353 	leaq	80(%rsi),%rsi
   2354 	pshufd	$0x13,%xmm14,%xmm11
   2355 	andq	$15,%r9
   2356 	jz	L$xts_dec_ret
   2357 
   2358 	movdqa	%xmm15,%xmm10
   2359 	paddq	%xmm15,%xmm15
   2360 	pand	%xmm8,%xmm11
   2361 	pxor	%xmm15,%xmm11
   2362 	jmp	L$xts_dec_done2
   2363 
   2364 .p2align	4
   2365 L$xts_dec_one:
   2366 	movups	(%rdi),%xmm2
   2367 	leaq	16(%rdi),%rdi
   2368 	xorps	%xmm10,%xmm2
   2369 	movups	(%rcx),%xmm0
   2370 	movups	16(%rcx),%xmm1
   2371 	leaq	32(%rcx),%rcx
   2372 	xorps	%xmm0,%xmm2
   2373 L$oop_dec1_12:
   2374 .byte	102,15,56,222,209
   2375 	decl	%eax
   2376 	movups	(%rcx),%xmm1
   2377 	leaq	16(%rcx),%rcx
   2378 	jnz	L$oop_dec1_12
   2379 .byte	102,15,56,223,209
   2380 	xorps	%xmm10,%xmm2
   2381 	movdqa	%xmm11,%xmm10
   2382 	movups	%xmm2,(%rsi)
   2383 	movdqa	%xmm12,%xmm11
   2384 	leaq	16(%rsi),%rsi
   2385 	jmp	L$xts_dec_done
   2386 
   2387 .p2align	4
   2388 L$xts_dec_two:
   2389 	movups	(%rdi),%xmm2
   2390 	movups	16(%rdi),%xmm3
   2391 	leaq	32(%rdi),%rdi
   2392 	xorps	%xmm10,%xmm2
   2393 	xorps	%xmm11,%xmm3
   2394 
   2395 	call	_aesni_decrypt2
   2396 
   2397 	xorps	%xmm10,%xmm2
   2398 	movdqa	%xmm12,%xmm10
   2399 	xorps	%xmm11,%xmm3
   2400 	movdqa	%xmm13,%xmm11
   2401 	movups	%xmm2,(%rsi)
   2402 	movups	%xmm3,16(%rsi)
   2403 	leaq	32(%rsi),%rsi
   2404 	jmp	L$xts_dec_done
   2405 
   2406 .p2align	4
   2407 L$xts_dec_three:
   2408 	movups	(%rdi),%xmm2
   2409 	movups	16(%rdi),%xmm3
   2410 	movups	32(%rdi),%xmm4
   2411 	leaq	48(%rdi),%rdi
   2412 	xorps	%xmm10,%xmm2
   2413 	xorps	%xmm11,%xmm3
   2414 	xorps	%xmm12,%xmm4
   2415 
   2416 	call	_aesni_decrypt3
   2417 
   2418 	xorps	%xmm10,%xmm2
   2419 	movdqa	%xmm13,%xmm10
   2420 	xorps	%xmm11,%xmm3
   2421 	movdqa	%xmm14,%xmm11
   2422 	xorps	%xmm12,%xmm4
   2423 	movups	%xmm2,(%rsi)
   2424 	movups	%xmm3,16(%rsi)
   2425 	movups	%xmm4,32(%rsi)
   2426 	leaq	48(%rsi),%rsi
   2427 	jmp	L$xts_dec_done
   2428 
   2429 .p2align	4
   2430 L$xts_dec_four:
   2431 	movups	(%rdi),%xmm2
   2432 	movups	16(%rdi),%xmm3
   2433 	movups	32(%rdi),%xmm4
   2434 	xorps	%xmm10,%xmm2
   2435 	movups	48(%rdi),%xmm5
   2436 	leaq	64(%rdi),%rdi
   2437 	xorps	%xmm11,%xmm3
   2438 	xorps	%xmm12,%xmm4
   2439 	xorps	%xmm13,%xmm5
   2440 
   2441 	call	_aesni_decrypt4
   2442 
   2443 	pxor	%xmm10,%xmm2
   2444 	movdqa	%xmm14,%xmm10
   2445 	pxor	%xmm11,%xmm3
   2446 	movdqa	%xmm15,%xmm11
   2447 	pxor	%xmm12,%xmm4
   2448 	movdqu	%xmm2,(%rsi)
   2449 	pxor	%xmm13,%xmm5
   2450 	movdqu	%xmm3,16(%rsi)
   2451 	movdqu	%xmm4,32(%rsi)
   2452 	movdqu	%xmm5,48(%rsi)
   2453 	leaq	64(%rsi),%rsi
   2454 	jmp	L$xts_dec_done
   2455 
   2456 .p2align	4
   2457 L$xts_dec_done:
   2458 	andq	$15,%r9
   2459 	jz	L$xts_dec_ret
   2460 L$xts_dec_done2:
   2461 	movq	%r9,%rdx
   2462 	movq	%rbp,%rcx
   2463 	movl	%r10d,%eax
   2464 
   2465 	movups	(%rdi),%xmm2
   2466 	xorps	%xmm11,%xmm2
   2467 	movups	(%rcx),%xmm0
   2468 	movups	16(%rcx),%xmm1
   2469 	leaq	32(%rcx),%rcx
   2470 	xorps	%xmm0,%xmm2
   2471 L$oop_dec1_13:
   2472 .byte	102,15,56,222,209
   2473 	decl	%eax
   2474 	movups	(%rcx),%xmm1
   2475 	leaq	16(%rcx),%rcx
   2476 	jnz	L$oop_dec1_13
   2477 .byte	102,15,56,223,209
   2478 	xorps	%xmm11,%xmm2
   2479 	movups	%xmm2,(%rsi)
   2480 
   2481 L$xts_dec_steal:
   2482 	movzbl	16(%rdi),%eax
   2483 	movzbl	(%rsi),%ecx
   2484 	leaq	1(%rdi),%rdi
   2485 	movb	%al,(%rsi)
   2486 	movb	%cl,16(%rsi)
   2487 	leaq	1(%rsi),%rsi
   2488 	subq	$1,%rdx
   2489 	jnz	L$xts_dec_steal
   2490 
   2491 	subq	%r9,%rsi
   2492 	movq	%rbp,%rcx
   2493 	movl	%r10d,%eax
   2494 
   2495 	movups	(%rsi),%xmm2
   2496 	xorps	%xmm10,%xmm2
   2497 	movups	(%rcx),%xmm0
   2498 	movups	16(%rcx),%xmm1
   2499 	leaq	32(%rcx),%rcx
   2500 	xorps	%xmm0,%xmm2
   2501 L$oop_dec1_14:
   2502 .byte	102,15,56,222,209
   2503 	decl	%eax
   2504 	movups	(%rcx),%xmm1
   2505 	leaq	16(%rcx),%rcx
   2506 	jnz	L$oop_dec1_14
   2507 .byte	102,15,56,223,209
   2508 	xorps	%xmm10,%xmm2
   2509 	movups	%xmm2,(%rsi)
   2510 
   2511 L$xts_dec_ret:
   2512 	xorps	%xmm0,%xmm0
   2513 	pxor	%xmm1,%xmm1
   2514 	pxor	%xmm2,%xmm2
   2515 	pxor	%xmm3,%xmm3
   2516 	pxor	%xmm4,%xmm4
   2517 	pxor	%xmm5,%xmm5
   2518 	pxor	%xmm6,%xmm6
   2519 	pxor	%xmm7,%xmm7
   2520 	movaps	%xmm0,0(%rsp)
   2521 	pxor	%xmm8,%xmm8
   2522 	movaps	%xmm0,16(%rsp)
   2523 	pxor	%xmm9,%xmm9
   2524 	movaps	%xmm0,32(%rsp)
   2525 	pxor	%xmm10,%xmm10
   2526 	movaps	%xmm0,48(%rsp)
   2527 	pxor	%xmm11,%xmm11
   2528 	movaps	%xmm0,64(%rsp)
   2529 	pxor	%xmm12,%xmm12
   2530 	movaps	%xmm0,80(%rsp)
   2531 	pxor	%xmm13,%xmm13
   2532 	movaps	%xmm0,96(%rsp)
   2533 	pxor	%xmm14,%xmm14
   2534 	pxor	%xmm15,%xmm15
   2535 	movq	-8(%r11),%rbp
   2536 	leaq	(%r11),%rsp
   2537 L$xts_dec_epilogue:
   2538 	.byte	0xf3,0xc3
   2539 
   2540 .globl	_aesni_ocb_encrypt
   2541 .private_extern _aesni_ocb_encrypt
   2542 
   2543 .p2align	5
   2544 _aesni_ocb_encrypt:
   2545 	leaq	(%rsp),%rax
   2546 	pushq	%rbx
   2547 	pushq	%rbp
   2548 	pushq	%r12
   2549 	pushq	%r13
   2550 	pushq	%r14
   2551 	movq	8(%rax),%rbx
   2552 	movq	8+8(%rax),%rbp
   2553 
   2554 	movl	240(%rcx),%r10d
   2555 	movq	%rcx,%r11
   2556 	shll	$4,%r10d
   2557 	movups	(%rcx),%xmm9
   2558 	movups	16(%rcx,%r10,1),%xmm1
   2559 
   2560 	movdqu	(%r9),%xmm15
   2561 	pxor	%xmm1,%xmm9
   2562 	pxor	%xmm1,%xmm15
   2563 
   2564 	movl	$16+32,%eax
   2565 	leaq	32(%r11,%r10,1),%rcx
   2566 	movups	16(%r11),%xmm1
   2567 	subq	%r10,%rax
   2568 	movq	%rax,%r10
   2569 
   2570 	movdqu	(%rbx),%xmm10
   2571 	movdqu	(%rbp),%xmm8
   2572 
   2573 	testq	$1,%r8
   2574 	jnz	L$ocb_enc_odd
   2575 
   2576 	bsfq	%r8,%r12
   2577 	addq	$1,%r8
   2578 	shlq	$4,%r12
   2579 	movdqu	(%rbx,%r12,1),%xmm7
   2580 	movdqu	(%rdi),%xmm2
   2581 	leaq	16(%rdi),%rdi
   2582 
   2583 	call	__ocb_encrypt1
   2584 
   2585 	movdqa	%xmm7,%xmm15
   2586 	movups	%xmm2,(%rsi)
   2587 	leaq	16(%rsi),%rsi
   2588 	subq	$1,%rdx
   2589 	jz	L$ocb_enc_done
   2590 
   2591 L$ocb_enc_odd:
   2592 	leaq	1(%r8),%r12
   2593 	leaq	3(%r8),%r13
   2594 	leaq	5(%r8),%r14
   2595 	leaq	6(%r8),%r8
   2596 	bsfq	%r12,%r12
   2597 	bsfq	%r13,%r13
   2598 	bsfq	%r14,%r14
   2599 	shlq	$4,%r12
   2600 	shlq	$4,%r13
   2601 	shlq	$4,%r14
   2602 
   2603 	subq	$6,%rdx
   2604 	jc	L$ocb_enc_short
   2605 	jmp	L$ocb_enc_grandloop
   2606 
   2607 .p2align	5
   2608 L$ocb_enc_grandloop:
   2609 	movdqu	0(%rdi),%xmm2
   2610 	movdqu	16(%rdi),%xmm3
   2611 	movdqu	32(%rdi),%xmm4
   2612 	movdqu	48(%rdi),%xmm5
   2613 	movdqu	64(%rdi),%xmm6
   2614 	movdqu	80(%rdi),%xmm7
   2615 	leaq	96(%rdi),%rdi
   2616 
   2617 	call	__ocb_encrypt6
   2618 
   2619 	movups	%xmm2,0(%rsi)
   2620 	movups	%xmm3,16(%rsi)
   2621 	movups	%xmm4,32(%rsi)
   2622 	movups	%xmm5,48(%rsi)
   2623 	movups	%xmm6,64(%rsi)
   2624 	movups	%xmm7,80(%rsi)
   2625 	leaq	96(%rsi),%rsi
   2626 	subq	$6,%rdx
   2627 	jnc	L$ocb_enc_grandloop
   2628 
   2629 L$ocb_enc_short:
   2630 	addq	$6,%rdx
   2631 	jz	L$ocb_enc_done
   2632 
   2633 	movdqu	0(%rdi),%xmm2
   2634 	cmpq	$2,%rdx
   2635 	jb	L$ocb_enc_one
   2636 	movdqu	16(%rdi),%xmm3
   2637 	je	L$ocb_enc_two
   2638 
   2639 	movdqu	32(%rdi),%xmm4
   2640 	cmpq	$4,%rdx
   2641 	jb	L$ocb_enc_three
   2642 	movdqu	48(%rdi),%xmm5
   2643 	je	L$ocb_enc_four
   2644 
   2645 	movdqu	64(%rdi),%xmm6
   2646 	pxor	%xmm7,%xmm7
   2647 
   2648 	call	__ocb_encrypt6
   2649 
   2650 	movdqa	%xmm14,%xmm15
   2651 	movups	%xmm2,0(%rsi)
   2652 	movups	%xmm3,16(%rsi)
   2653 	movups	%xmm4,32(%rsi)
   2654 	movups	%xmm5,48(%rsi)
   2655 	movups	%xmm6,64(%rsi)
   2656 
   2657 	jmp	L$ocb_enc_done
   2658 
   2659 .p2align	4
   2660 L$ocb_enc_one:
   2661 	movdqa	%xmm10,%xmm7
   2662 
   2663 	call	__ocb_encrypt1
   2664 
   2665 	movdqa	%xmm7,%xmm15
   2666 	movups	%xmm2,0(%rsi)
   2667 	jmp	L$ocb_enc_done
   2668 
   2669 .p2align	4
   2670 L$ocb_enc_two:
   2671 	pxor	%xmm4,%xmm4
   2672 	pxor	%xmm5,%xmm5
   2673 
   2674 	call	__ocb_encrypt4
   2675 
   2676 	movdqa	%xmm11,%xmm15
   2677 	movups	%xmm2,0(%rsi)
   2678 	movups	%xmm3,16(%rsi)
   2679 
   2680 	jmp	L$ocb_enc_done
   2681 
   2682 .p2align	4
   2683 L$ocb_enc_three:
   2684 	pxor	%xmm5,%xmm5
   2685 
   2686 	call	__ocb_encrypt4
   2687 
   2688 	movdqa	%xmm12,%xmm15
   2689 	movups	%xmm2,0(%rsi)
   2690 	movups	%xmm3,16(%rsi)
   2691 	movups	%xmm4,32(%rsi)
   2692 
   2693 	jmp	L$ocb_enc_done
   2694 
   2695 .p2align	4
   2696 L$ocb_enc_four:
   2697 	call	__ocb_encrypt4
   2698 
   2699 	movdqa	%xmm13,%xmm15
   2700 	movups	%xmm2,0(%rsi)
   2701 	movups	%xmm3,16(%rsi)
   2702 	movups	%xmm4,32(%rsi)
   2703 	movups	%xmm5,48(%rsi)
   2704 
   2705 L$ocb_enc_done:
   2706 	pxor	%xmm0,%xmm15
   2707 	movdqu	%xmm8,(%rbp)
   2708 	movdqu	%xmm15,(%r9)
   2709 
   2710 	xorps	%xmm0,%xmm0
   2711 	pxor	%xmm1,%xmm1
   2712 	pxor	%xmm2,%xmm2
   2713 	pxor	%xmm3,%xmm3
   2714 	pxor	%xmm4,%xmm4
   2715 	pxor	%xmm5,%xmm5
   2716 	pxor	%xmm6,%xmm6
   2717 	pxor	%xmm7,%xmm7
   2718 	pxor	%xmm8,%xmm8
   2719 	pxor	%xmm9,%xmm9
   2720 	pxor	%xmm10,%xmm10
   2721 	pxor	%xmm11,%xmm11
   2722 	pxor	%xmm12,%xmm12
   2723 	pxor	%xmm13,%xmm13
   2724 	pxor	%xmm14,%xmm14
   2725 	pxor	%xmm15,%xmm15
   2726 	leaq	40(%rsp),%rax
   2727 	movq	-40(%rax),%r14
   2728 	movq	-32(%rax),%r13
   2729 	movq	-24(%rax),%r12
   2730 	movq	-16(%rax),%rbp
   2731 	movq	-8(%rax),%rbx
   2732 	leaq	(%rax),%rsp
   2733 L$ocb_enc_epilogue:
   2734 	.byte	0xf3,0xc3
   2735 
   2736 
   2737 
   2738 .p2align	5
   2739 __ocb_encrypt6:
   2740 	pxor	%xmm9,%xmm15
   2741 	movdqu	(%rbx,%r12,1),%xmm11
   2742 	movdqa	%xmm10,%xmm12
   2743 	movdqu	(%rbx,%r13,1),%xmm13
   2744 	movdqa	%xmm10,%xmm14
   2745 	pxor	%xmm15,%xmm10
   2746 	movdqu	(%rbx,%r14,1),%xmm15
   2747 	pxor	%xmm10,%xmm11
   2748 	pxor	%xmm2,%xmm8
   2749 	pxor	%xmm10,%xmm2
   2750 	pxor	%xmm11,%xmm12
   2751 	pxor	%xmm3,%xmm8
   2752 	pxor	%xmm11,%xmm3
   2753 	pxor	%xmm12,%xmm13
   2754 	pxor	%xmm4,%xmm8
   2755 	pxor	%xmm12,%xmm4
   2756 	pxor	%xmm13,%xmm14
   2757 	pxor	%xmm5,%xmm8
   2758 	pxor	%xmm13,%xmm5
   2759 	pxor	%xmm14,%xmm15
   2760 	pxor	%xmm6,%xmm8
   2761 	pxor	%xmm14,%xmm6
   2762 	pxor	%xmm7,%xmm8
   2763 	pxor	%xmm15,%xmm7
   2764 	movups	32(%r11),%xmm0
   2765 
   2766 	leaq	1(%r8),%r12
   2767 	leaq	3(%r8),%r13
   2768 	leaq	5(%r8),%r14
   2769 	addq	$6,%r8
   2770 	pxor	%xmm9,%xmm10
   2771 	bsfq	%r12,%r12
   2772 	bsfq	%r13,%r13
   2773 	bsfq	%r14,%r14
   2774 
   2775 .byte	102,15,56,220,209
   2776 .byte	102,15,56,220,217
   2777 .byte	102,15,56,220,225
   2778 .byte	102,15,56,220,233
   2779 	pxor	%xmm9,%xmm11
   2780 	pxor	%xmm9,%xmm12
   2781 .byte	102,15,56,220,241
   2782 	pxor	%xmm9,%xmm13
   2783 	pxor	%xmm9,%xmm14
   2784 .byte	102,15,56,220,249
   2785 	movups	48(%r11),%xmm1
   2786 	pxor	%xmm9,%xmm15
   2787 
   2788 .byte	102,15,56,220,208
   2789 .byte	102,15,56,220,216
   2790 .byte	102,15,56,220,224
   2791 .byte	102,15,56,220,232
   2792 .byte	102,15,56,220,240
   2793 .byte	102,15,56,220,248
   2794 	movups	64(%r11),%xmm0
   2795 	shlq	$4,%r12
   2796 	shlq	$4,%r13
   2797 	jmp	L$ocb_enc_loop6
   2798 
   2799 .p2align	5
   2800 L$ocb_enc_loop6:
   2801 .byte	102,15,56,220,209
   2802 .byte	102,15,56,220,217
   2803 .byte	102,15,56,220,225
   2804 .byte	102,15,56,220,233
   2805 .byte	102,15,56,220,241
   2806 .byte	102,15,56,220,249
   2807 	movups	(%rcx,%rax,1),%xmm1
   2808 	addq	$32,%rax
   2809 
   2810 .byte	102,15,56,220,208
   2811 .byte	102,15,56,220,216
   2812 .byte	102,15,56,220,224
   2813 .byte	102,15,56,220,232
   2814 .byte	102,15,56,220,240
   2815 .byte	102,15,56,220,248
   2816 	movups	-16(%rcx,%rax,1),%xmm0
   2817 	jnz	L$ocb_enc_loop6
   2818 
   2819 .byte	102,15,56,220,209
   2820 .byte	102,15,56,220,217
   2821 .byte	102,15,56,220,225
   2822 .byte	102,15,56,220,233
   2823 .byte	102,15,56,220,241
   2824 .byte	102,15,56,220,249
   2825 	movups	16(%r11),%xmm1
   2826 	shlq	$4,%r14
   2827 
   2828 .byte	102,65,15,56,221,210
   2829 	movdqu	(%rbx),%xmm10
   2830 	movq	%r10,%rax
   2831 .byte	102,65,15,56,221,219
   2832 .byte	102,65,15,56,221,228
   2833 .byte	102,65,15,56,221,237
   2834 .byte	102,65,15,56,221,246
   2835 .byte	102,65,15,56,221,255
   2836 	.byte	0xf3,0xc3
   2837 
   2838 
   2839 
   2840 .p2align	5
   2841 __ocb_encrypt4:
   2842 	pxor	%xmm9,%xmm15
   2843 	movdqu	(%rbx,%r12,1),%xmm11
   2844 	movdqa	%xmm10,%xmm12
   2845 	movdqu	(%rbx,%r13,1),%xmm13
   2846 	pxor	%xmm15,%xmm10
   2847 	pxor	%xmm10,%xmm11
   2848 	pxor	%xmm2,%xmm8
   2849 	pxor	%xmm10,%xmm2
   2850 	pxor	%xmm11,%xmm12
   2851 	pxor	%xmm3,%xmm8
   2852 	pxor	%xmm11,%xmm3
   2853 	pxor	%xmm12,%xmm13
   2854 	pxor	%xmm4,%xmm8
   2855 	pxor	%xmm12,%xmm4
   2856 	pxor	%xmm5,%xmm8
   2857 	pxor	%xmm13,%xmm5
   2858 	movups	32(%r11),%xmm0
   2859 
   2860 	pxor	%xmm9,%xmm10
   2861 	pxor	%xmm9,%xmm11
   2862 	pxor	%xmm9,%xmm12
   2863 	pxor	%xmm9,%xmm13
   2864 
   2865 .byte	102,15,56,220,209
   2866 .byte	102,15,56,220,217
   2867 .byte	102,15,56,220,225
   2868 .byte	102,15,56,220,233
   2869 	movups	48(%r11),%xmm1
   2870 
   2871 .byte	102,15,56,220,208
   2872 .byte	102,15,56,220,216
   2873 .byte	102,15,56,220,224
   2874 .byte	102,15,56,220,232
   2875 	movups	64(%r11),%xmm0
   2876 	jmp	L$ocb_enc_loop4
   2877 
   2878 .p2align	5
   2879 L$ocb_enc_loop4:
   2880 .byte	102,15,56,220,209
   2881 .byte	102,15,56,220,217
   2882 .byte	102,15,56,220,225
   2883 .byte	102,15,56,220,233
   2884 	movups	(%rcx,%rax,1),%xmm1
   2885 	addq	$32,%rax
   2886 
   2887 .byte	102,15,56,220,208
   2888 .byte	102,15,56,220,216
   2889 .byte	102,15,56,220,224
   2890 .byte	102,15,56,220,232
   2891 	movups	-16(%rcx,%rax,1),%xmm0
   2892 	jnz	L$ocb_enc_loop4
   2893 
   2894 .byte	102,15,56,220,209
   2895 .byte	102,15,56,220,217
   2896 .byte	102,15,56,220,225
   2897 .byte	102,15,56,220,233
   2898 	movups	16(%r11),%xmm1
   2899 	movq	%r10,%rax
   2900 
   2901 .byte	102,65,15,56,221,210
   2902 .byte	102,65,15,56,221,219
   2903 .byte	102,65,15,56,221,228
   2904 .byte	102,65,15,56,221,237
   2905 	.byte	0xf3,0xc3
   2906 
   2907 
   2908 
   2909 .p2align	5
   2910 __ocb_encrypt1:
   2911 	pxor	%xmm15,%xmm7
   2912 	pxor	%xmm9,%xmm7
   2913 	pxor	%xmm2,%xmm8
   2914 	pxor	%xmm7,%xmm2
   2915 	movups	32(%r11),%xmm0
   2916 
   2917 .byte	102,15,56,220,209
   2918 	movups	48(%r11),%xmm1
   2919 	pxor	%xmm9,%xmm7
   2920 
   2921 .byte	102,15,56,220,208
   2922 	movups	64(%r11),%xmm0
   2923 	jmp	L$ocb_enc_loop1
   2924 
   2925 .p2align	5
   2926 L$ocb_enc_loop1:
   2927 .byte	102,15,56,220,209
   2928 	movups	(%rcx,%rax,1),%xmm1
   2929 	addq	$32,%rax
   2930 
   2931 .byte	102,15,56,220,208
   2932 	movups	-16(%rcx,%rax,1),%xmm0
   2933 	jnz	L$ocb_enc_loop1
   2934 
   2935 .byte	102,15,56,220,209
   2936 	movups	16(%r11),%xmm1
   2937 	movq	%r10,%rax
   2938 
   2939 .byte	102,15,56,221,215
   2940 	.byte	0xf3,0xc3
   2941 
   2942 
   2943 .globl	_aesni_ocb_decrypt
   2944 .private_extern _aesni_ocb_decrypt
   2945 
   2946 .p2align	5
   2947 _aesni_ocb_decrypt:
   2948 	leaq	(%rsp),%rax
   2949 	pushq	%rbx
   2950 	pushq	%rbp
   2951 	pushq	%r12
   2952 	pushq	%r13
   2953 	pushq	%r14
   2954 	movq	8(%rax),%rbx
   2955 	movq	8+8(%rax),%rbp
   2956 
   2957 	movl	240(%rcx),%r10d
   2958 	movq	%rcx,%r11
   2959 	shll	$4,%r10d
   2960 	movups	(%rcx),%xmm9
   2961 	movups	16(%rcx,%r10,1),%xmm1
   2962 
   2963 	movdqu	(%r9),%xmm15
   2964 	pxor	%xmm1,%xmm9
   2965 	pxor	%xmm1,%xmm15
   2966 
   2967 	movl	$16+32,%eax
   2968 	leaq	32(%r11,%r10,1),%rcx
   2969 	movups	16(%r11),%xmm1
   2970 	subq	%r10,%rax
   2971 	movq	%rax,%r10
   2972 
   2973 	movdqu	(%rbx),%xmm10
   2974 	movdqu	(%rbp),%xmm8
   2975 
   2976 	testq	$1,%r8
   2977 	jnz	L$ocb_dec_odd
   2978 
   2979 	bsfq	%r8,%r12
   2980 	addq	$1,%r8
   2981 	shlq	$4,%r12
   2982 	movdqu	(%rbx,%r12,1),%xmm7
   2983 	movdqu	(%rdi),%xmm2
   2984 	leaq	16(%rdi),%rdi
   2985 
   2986 	call	__ocb_decrypt1
   2987 
   2988 	movdqa	%xmm7,%xmm15
   2989 	movups	%xmm2,(%rsi)
   2990 	xorps	%xmm2,%xmm8
   2991 	leaq	16(%rsi),%rsi
   2992 	subq	$1,%rdx
   2993 	jz	L$ocb_dec_done
   2994 
   2995 L$ocb_dec_odd:
   2996 	leaq	1(%r8),%r12
   2997 	leaq	3(%r8),%r13
   2998 	leaq	5(%r8),%r14
   2999 	leaq	6(%r8),%r8
   3000 	bsfq	%r12,%r12
   3001 	bsfq	%r13,%r13
   3002 	bsfq	%r14,%r14
   3003 	shlq	$4,%r12
   3004 	shlq	$4,%r13
   3005 	shlq	$4,%r14
   3006 
   3007 	subq	$6,%rdx
   3008 	jc	L$ocb_dec_short
   3009 	jmp	L$ocb_dec_grandloop
   3010 
   3011 .p2align	5
   3012 L$ocb_dec_grandloop:
   3013 	movdqu	0(%rdi),%xmm2
   3014 	movdqu	16(%rdi),%xmm3
   3015 	movdqu	32(%rdi),%xmm4
   3016 	movdqu	48(%rdi),%xmm5
   3017 	movdqu	64(%rdi),%xmm6
   3018 	movdqu	80(%rdi),%xmm7
   3019 	leaq	96(%rdi),%rdi
   3020 
   3021 	call	__ocb_decrypt6
   3022 
   3023 	movups	%xmm2,0(%rsi)
   3024 	pxor	%xmm2,%xmm8
   3025 	movups	%xmm3,16(%rsi)
   3026 	pxor	%xmm3,%xmm8
   3027 	movups	%xmm4,32(%rsi)
   3028 	pxor	%xmm4,%xmm8
   3029 	movups	%xmm5,48(%rsi)
   3030 	pxor	%xmm5,%xmm8
   3031 	movups	%xmm6,64(%rsi)
   3032 	pxor	%xmm6,%xmm8
   3033 	movups	%xmm7,80(%rsi)
   3034 	pxor	%xmm7,%xmm8
   3035 	leaq	96(%rsi),%rsi
   3036 	subq	$6,%rdx
   3037 	jnc	L$ocb_dec_grandloop
   3038 
   3039 L$ocb_dec_short:
   3040 	addq	$6,%rdx
   3041 	jz	L$ocb_dec_done
   3042 
   3043 	movdqu	0(%rdi),%xmm2
   3044 	cmpq	$2,%rdx
   3045 	jb	L$ocb_dec_one
   3046 	movdqu	16(%rdi),%xmm3
   3047 	je	L$ocb_dec_two
   3048 
   3049 	movdqu	32(%rdi),%xmm4
   3050 	cmpq	$4,%rdx
   3051 	jb	L$ocb_dec_three
   3052 	movdqu	48(%rdi),%xmm5
   3053 	je	L$ocb_dec_four
   3054 
   3055 	movdqu	64(%rdi),%xmm6
   3056 	pxor	%xmm7,%xmm7
   3057 
   3058 	call	__ocb_decrypt6
   3059 
   3060 	movdqa	%xmm14,%xmm15
   3061 	movups	%xmm2,0(%rsi)
   3062 	pxor	%xmm2,%xmm8
   3063 	movups	%xmm3,16(%rsi)
   3064 	pxor	%xmm3,%xmm8
   3065 	movups	%xmm4,32(%rsi)
   3066 	pxor	%xmm4,%xmm8
   3067 	movups	%xmm5,48(%rsi)
   3068 	pxor	%xmm5,%xmm8
   3069 	movups	%xmm6,64(%rsi)
   3070 	pxor	%xmm6,%xmm8
   3071 
   3072 	jmp	L$ocb_dec_done
   3073 
   3074 .p2align	4
   3075 L$ocb_dec_one:
   3076 	movdqa	%xmm10,%xmm7
   3077 
   3078 	call	__ocb_decrypt1
   3079 
   3080 	movdqa	%xmm7,%xmm15
   3081 	movups	%xmm2,0(%rsi)
   3082 	xorps	%xmm2,%xmm8
   3083 	jmp	L$ocb_dec_done
   3084 
   3085 .p2align	4
   3086 L$ocb_dec_two:
   3087 	pxor	%xmm4,%xmm4
   3088 	pxor	%xmm5,%xmm5
   3089 
   3090 	call	__ocb_decrypt4
   3091 
   3092 	movdqa	%xmm11,%xmm15
   3093 	movups	%xmm2,0(%rsi)
   3094 	xorps	%xmm2,%xmm8
   3095 	movups	%xmm3,16(%rsi)
   3096 	xorps	%xmm3,%xmm8
   3097 
   3098 	jmp	L$ocb_dec_done
   3099 
   3100 .p2align	4
   3101 L$ocb_dec_three:
   3102 	pxor	%xmm5,%xmm5
   3103 
   3104 	call	__ocb_decrypt4
   3105 
   3106 	movdqa	%xmm12,%xmm15
   3107 	movups	%xmm2,0(%rsi)
   3108 	xorps	%xmm2,%xmm8
   3109 	movups	%xmm3,16(%rsi)
   3110 	xorps	%xmm3,%xmm8
   3111 	movups	%xmm4,32(%rsi)
   3112 	xorps	%xmm4,%xmm8
   3113 
   3114 	jmp	L$ocb_dec_done
   3115 
   3116 .p2align	4
   3117 L$ocb_dec_four:
   3118 	call	__ocb_decrypt4
   3119 
   3120 	movdqa	%xmm13,%xmm15
   3121 	movups	%xmm2,0(%rsi)
   3122 	pxor	%xmm2,%xmm8
   3123 	movups	%xmm3,16(%rsi)
   3124 	pxor	%xmm3,%xmm8
   3125 	movups	%xmm4,32(%rsi)
   3126 	pxor	%xmm4,%xmm8
   3127 	movups	%xmm5,48(%rsi)
   3128 	pxor	%xmm5,%xmm8
   3129 
   3130 L$ocb_dec_done:
   3131 	pxor	%xmm0,%xmm15
   3132 	movdqu	%xmm8,(%rbp)
   3133 	movdqu	%xmm15,(%r9)
   3134 
   3135 	xorps	%xmm0,%xmm0
   3136 	pxor	%xmm1,%xmm1
   3137 	pxor	%xmm2,%xmm2
   3138 	pxor	%xmm3,%xmm3
   3139 	pxor	%xmm4,%xmm4
   3140 	pxor	%xmm5,%xmm5
   3141 	pxor	%xmm6,%xmm6
   3142 	pxor	%xmm7,%xmm7
   3143 	pxor	%xmm8,%xmm8
   3144 	pxor	%xmm9,%xmm9
   3145 	pxor	%xmm10,%xmm10
   3146 	pxor	%xmm11,%xmm11
   3147 	pxor	%xmm12,%xmm12
   3148 	pxor	%xmm13,%xmm13
   3149 	pxor	%xmm14,%xmm14
   3150 	pxor	%xmm15,%xmm15
   3151 	leaq	40(%rsp),%rax
   3152 	movq	-40(%rax),%r14
   3153 	movq	-32(%rax),%r13
   3154 	movq	-24(%rax),%r12
   3155 	movq	-16(%rax),%rbp
   3156 	movq	-8(%rax),%rbx
   3157 	leaq	(%rax),%rsp
   3158 L$ocb_dec_epilogue:
   3159 	.byte	0xf3,0xc3
   3160 
   3161 
   3162 
   3163 .p2align	5
   3164 __ocb_decrypt6:
   3165 	pxor	%xmm9,%xmm15
   3166 	movdqu	(%rbx,%r12,1),%xmm11
   3167 	movdqa	%xmm10,%xmm12
   3168 	movdqu	(%rbx,%r13,1),%xmm13
   3169 	movdqa	%xmm10,%xmm14
   3170 	pxor	%xmm15,%xmm10
   3171 	movdqu	(%rbx,%r14,1),%xmm15
   3172 	pxor	%xmm10,%xmm11
   3173 	pxor	%xmm10,%xmm2
   3174 	pxor	%xmm11,%xmm12
   3175 	pxor	%xmm11,%xmm3
   3176 	pxor	%xmm12,%xmm13
   3177 	pxor	%xmm12,%xmm4
   3178 	pxor	%xmm13,%xmm14
   3179 	pxor	%xmm13,%xmm5
   3180 	pxor	%xmm14,%xmm15
   3181 	pxor	%xmm14,%xmm6
   3182 	pxor	%xmm15,%xmm7
   3183 	movups	32(%r11),%xmm0
   3184 
   3185 	leaq	1(%r8),%r12
   3186 	leaq	3(%r8),%r13
   3187 	leaq	5(%r8),%r14
   3188 	addq	$6,%r8
   3189 	pxor	%xmm9,%xmm10
   3190 	bsfq	%r12,%r12
   3191 	bsfq	%r13,%r13
   3192 	bsfq	%r14,%r14
   3193 
   3194 .byte	102,15,56,222,209
   3195 .byte	102,15,56,222,217
   3196 .byte	102,15,56,222,225
   3197 .byte	102,15,56,222,233
   3198 	pxor	%xmm9,%xmm11
   3199 	pxor	%xmm9,%xmm12
   3200 .byte	102,15,56,222,241
   3201 	pxor	%xmm9,%xmm13
   3202 	pxor	%xmm9,%xmm14
   3203 .byte	102,15,56,222,249
   3204 	movups	48(%r11),%xmm1
   3205 	pxor	%xmm9,%xmm15
   3206 
   3207 .byte	102,15,56,222,208
   3208 .byte	102,15,56,222,216
   3209 .byte	102,15,56,222,224
   3210 .byte	102,15,56,222,232
   3211 .byte	102,15,56,222,240
   3212 .byte	102,15,56,222,248
   3213 	movups	64(%r11),%xmm0
   3214 	shlq	$4,%r12
   3215 	shlq	$4,%r13
   3216 	jmp	L$ocb_dec_loop6
   3217 
   3218 .p2align	5
   3219 L$ocb_dec_loop6:
   3220 .byte	102,15,56,222,209
   3221 .byte	102,15,56,222,217
   3222 .byte	102,15,56,222,225
   3223 .byte	102,15,56,222,233
   3224 .byte	102,15,56,222,241
   3225 .byte	102,15,56,222,249
   3226 	movups	(%rcx,%rax,1),%xmm1
   3227 	addq	$32,%rax
   3228 
   3229 .byte	102,15,56,222,208
   3230 .byte	102,15,56,222,216
   3231 .byte	102,15,56,222,224
   3232 .byte	102,15,56,222,232
   3233 .byte	102,15,56,222,240
   3234 .byte	102,15,56,222,248
   3235 	movups	-16(%rcx,%rax,1),%xmm0
   3236 	jnz	L$ocb_dec_loop6
   3237 
   3238 .byte	102,15,56,222,209
   3239 .byte	102,15,56,222,217
   3240 .byte	102,15,56,222,225
   3241 .byte	102,15,56,222,233
   3242 .byte	102,15,56,222,241
   3243 .byte	102,15,56,222,249
   3244 	movups	16(%r11),%xmm1
   3245 	shlq	$4,%r14
   3246 
   3247 .byte	102,65,15,56,223,210
   3248 	movdqu	(%rbx),%xmm10
   3249 	movq	%r10,%rax
   3250 .byte	102,65,15,56,223,219
   3251 .byte	102,65,15,56,223,228
   3252 .byte	102,65,15,56,223,237
   3253 .byte	102,65,15,56,223,246
   3254 .byte	102,65,15,56,223,255
   3255 	.byte	0xf3,0xc3
   3256 
   3257 
   3258 
   3259 .p2align	5
   3260 __ocb_decrypt4:
   3261 	pxor	%xmm9,%xmm15
   3262 	movdqu	(%rbx,%r12,1),%xmm11
   3263 	movdqa	%xmm10,%xmm12
   3264 	movdqu	(%rbx,%r13,1),%xmm13
   3265 	pxor	%xmm15,%xmm10
   3266 	pxor	%xmm10,%xmm11
   3267 	pxor	%xmm10,%xmm2
   3268 	pxor	%xmm11,%xmm12
   3269 	pxor	%xmm11,%xmm3
   3270 	pxor	%xmm12,%xmm13
   3271 	pxor	%xmm12,%xmm4
   3272 	pxor	%xmm13,%xmm5
   3273 	movups	32(%r11),%xmm0
   3274 
   3275 	pxor	%xmm9,%xmm10
   3276 	pxor	%xmm9,%xmm11
   3277 	pxor	%xmm9,%xmm12
   3278 	pxor	%xmm9,%xmm13
   3279 
   3280 .byte	102,15,56,222,209
   3281 .byte	102,15,56,222,217
   3282 .byte	102,15,56,222,225
   3283 .byte	102,15,56,222,233
   3284 	movups	48(%r11),%xmm1
   3285 
   3286 .byte	102,15,56,222,208
   3287 .byte	102,15,56,222,216
   3288 .byte	102,15,56,222,224
   3289 .byte	102,15,56,222,232
   3290 	movups	64(%r11),%xmm0
   3291 	jmp	L$ocb_dec_loop4
   3292 
   3293 .p2align	5
   3294 L$ocb_dec_loop4:
   3295 .byte	102,15,56,222,209
   3296 .byte	102,15,56,222,217
   3297 .byte	102,15,56,222,225
   3298 .byte	102,15,56,222,233
   3299 	movups	(%rcx,%rax,1),%xmm1
   3300 	addq	$32,%rax
   3301 
   3302 .byte	102,15,56,222,208
   3303 .byte	102,15,56,222,216
   3304 .byte	102,15,56,222,224
   3305 .byte	102,15,56,222,232
   3306 	movups	-16(%rcx,%rax,1),%xmm0
   3307 	jnz	L$ocb_dec_loop4
   3308 
   3309 .byte	102,15,56,222,209
   3310 .byte	102,15,56,222,217
   3311 .byte	102,15,56,222,225
   3312 .byte	102,15,56,222,233
   3313 	movups	16(%r11),%xmm1
   3314 	movq	%r10,%rax
   3315 
   3316 .byte	102,65,15,56,223,210
   3317 .byte	102,65,15,56,223,219
   3318 .byte	102,65,15,56,223,228
   3319 .byte	102,65,15,56,223,237
   3320 	.byte	0xf3,0xc3
   3321 
   3322 
   3323 
   3324 .p2align	5
   3325 __ocb_decrypt1:
   3326 	pxor	%xmm15,%xmm7
   3327 	pxor	%xmm9,%xmm7
   3328 	pxor	%xmm7,%xmm2
   3329 	movups	32(%r11),%xmm0
   3330 
   3331 .byte	102,15,56,222,209
   3332 	movups	48(%r11),%xmm1
   3333 	pxor	%xmm9,%xmm7
   3334 
   3335 .byte	102,15,56,222,208
   3336 	movups	64(%r11),%xmm0
   3337 	jmp	L$ocb_dec_loop1
   3338 
   3339 .p2align	5
   3340 L$ocb_dec_loop1:
   3341 .byte	102,15,56,222,209
   3342 	movups	(%rcx,%rax,1),%xmm1
   3343 	addq	$32,%rax
   3344 
   3345 .byte	102,15,56,222,208
   3346 	movups	-16(%rcx,%rax,1),%xmm0
   3347 	jnz	L$ocb_dec_loop1
   3348 
   3349 .byte	102,15,56,222,209
   3350 	movups	16(%r11),%xmm1
   3351 	movq	%r10,%rax
   3352 
   3353 .byte	102,15,56,223,215
   3354 	.byte	0xf3,0xc3
   3355 
   3356 .globl	_aesni_cbc_encrypt
   3357 .private_extern _aesni_cbc_encrypt
   3358 
   3359 .p2align	4
   3360 _aesni_cbc_encrypt:
   3361 	testq	%rdx,%rdx
   3362 	jz	L$cbc_ret
   3363 
   3364 	movl	240(%rcx),%r10d
   3365 	movq	%rcx,%r11
   3366 	testl	%r9d,%r9d
   3367 	jz	L$cbc_decrypt
   3368 
   3369 	movups	(%r8),%xmm2
   3370 	movl	%r10d,%eax
   3371 	cmpq	$16,%rdx
   3372 	jb	L$cbc_enc_tail
   3373 	subq	$16,%rdx
   3374 	jmp	L$cbc_enc_loop
   3375 .p2align	4
   3376 L$cbc_enc_loop:
   3377 	movups	(%rdi),%xmm3
   3378 	leaq	16(%rdi),%rdi
   3379 
   3380 	movups	(%rcx),%xmm0
   3381 	movups	16(%rcx),%xmm1
   3382 	xorps	%xmm0,%xmm3
   3383 	leaq	32(%rcx),%rcx
   3384 	xorps	%xmm3,%xmm2
   3385 L$oop_enc1_15:
   3386 .byte	102,15,56,220,209
   3387 	decl	%eax
   3388 	movups	(%rcx),%xmm1
   3389 	leaq	16(%rcx),%rcx
   3390 	jnz	L$oop_enc1_15
   3391 .byte	102,15,56,221,209
   3392 	movl	%r10d,%eax
   3393 	movq	%r11,%rcx
   3394 	movups	%xmm2,0(%rsi)
   3395 	leaq	16(%rsi),%rsi
   3396 	subq	$16,%rdx
   3397 	jnc	L$cbc_enc_loop
   3398 	addq	$16,%rdx
   3399 	jnz	L$cbc_enc_tail
   3400 	pxor	%xmm0,%xmm0
   3401 	pxor	%xmm1,%xmm1
   3402 	movups	%xmm2,(%r8)
   3403 	pxor	%xmm2,%xmm2
   3404 	pxor	%xmm3,%xmm3
   3405 	jmp	L$cbc_ret
   3406 
   3407 L$cbc_enc_tail:
   3408 	movq	%rdx,%rcx
   3409 	xchgq	%rdi,%rsi
   3410 .long	0x9066A4F3
   3411 	movl	$16,%ecx
   3412 	subq	%rdx,%rcx
   3413 	xorl	%eax,%eax
   3414 .long	0x9066AAF3
   3415 	leaq	-16(%rdi),%rdi
   3416 	movl	%r10d,%eax
   3417 	movq	%rdi,%rsi
   3418 	movq	%r11,%rcx
   3419 	xorq	%rdx,%rdx
   3420 	jmp	L$cbc_enc_loop
   3421 
   3422 .p2align	4
   3423 L$cbc_decrypt:
   3424 	cmpq	$16,%rdx
   3425 	jne	L$cbc_decrypt_bulk
   3426 
   3427 
   3428 
   3429 	movdqu	(%rdi),%xmm2
   3430 	movdqu	(%r8),%xmm3
   3431 	movdqa	%xmm2,%xmm4
   3432 	movups	(%rcx),%xmm0
   3433 	movups	16(%rcx),%xmm1
   3434 	leaq	32(%rcx),%rcx
   3435 	xorps	%xmm0,%xmm2
   3436 L$oop_dec1_16:
   3437 .byte	102,15,56,222,209
   3438 	decl	%r10d
   3439 	movups	(%rcx),%xmm1
   3440 	leaq	16(%rcx),%rcx
   3441 	jnz	L$oop_dec1_16
   3442 .byte	102,15,56,223,209
   3443 	pxor	%xmm0,%xmm0
   3444 	pxor	%xmm1,%xmm1
   3445 	movdqu	%xmm4,(%r8)
   3446 	xorps	%xmm3,%xmm2
   3447 	pxor	%xmm3,%xmm3
   3448 	movups	%xmm2,(%rsi)
   3449 	pxor	%xmm2,%xmm2
   3450 	jmp	L$cbc_ret
   3451 .p2align	4
   3452 L$cbc_decrypt_bulk:
   3453 	leaq	(%rsp),%r11
   3454 	pushq	%rbp
   3455 	subq	$16,%rsp
   3456 	andq	$-16,%rsp
   3457 	movq	%rcx,%rbp
   3458 	movups	(%r8),%xmm10
   3459 	movl	%r10d,%eax
   3460 	cmpq	$0x50,%rdx
   3461 	jbe	L$cbc_dec_tail
   3462 
   3463 	movups	(%rcx),%xmm0
   3464 	movdqu	0(%rdi),%xmm2
   3465 	movdqu	16(%rdi),%xmm3
   3466 	movdqa	%xmm2,%xmm11
   3467 	movdqu	32(%rdi),%xmm4
   3468 	movdqa	%xmm3,%xmm12
   3469 	movdqu	48(%rdi),%xmm5
   3470 	movdqa	%xmm4,%xmm13
   3471 	movdqu	64(%rdi),%xmm6
   3472 	movdqa	%xmm5,%xmm14
   3473 	movdqu	80(%rdi),%xmm7
   3474 	movdqa	%xmm6,%xmm15
   3475 	movl	_OPENSSL_ia32cap_P+4(%rip),%r9d
   3476 	cmpq	$0x70,%rdx
   3477 	jbe	L$cbc_dec_six_or_seven
   3478 
   3479 	andl	$71303168,%r9d
   3480 	subq	$0x50,%rdx
   3481 	cmpl	$4194304,%r9d
   3482 	je	L$cbc_dec_loop6_enter
   3483 	subq	$0x20,%rdx
   3484 	leaq	112(%rcx),%rcx
   3485 	jmp	L$cbc_dec_loop8_enter
   3486 .p2align	4
   3487 L$cbc_dec_loop8:
   3488 	movups	%xmm9,(%rsi)
   3489 	leaq	16(%rsi),%rsi
   3490 L$cbc_dec_loop8_enter:
   3491 	movdqu	96(%rdi),%xmm8
   3492 	pxor	%xmm0,%xmm2
   3493 	movdqu	112(%rdi),%xmm9
   3494 	pxor	%xmm0,%xmm3
   3495 	movups	16-112(%rcx),%xmm1
   3496 	pxor	%xmm0,%xmm4
   3497 	movq	$-1,%rbp
   3498 	cmpq	$0x70,%rdx
   3499 	pxor	%xmm0,%xmm5
   3500 	pxor	%xmm0,%xmm6
   3501 	pxor	%xmm0,%xmm7
   3502 	pxor	%xmm0,%xmm8
   3503 
   3504 .byte	102,15,56,222,209
   3505 	pxor	%xmm0,%xmm9
   3506 	movups	32-112(%rcx),%xmm0
   3507 .byte	102,15,56,222,217
   3508 .byte	102,15,56,222,225
   3509 .byte	102,15,56,222,233
   3510 .byte	102,15,56,222,241
   3511 .byte	102,15,56,222,249
   3512 .byte	102,68,15,56,222,193
   3513 	adcq	$0,%rbp
   3514 	andq	$128,%rbp
   3515 .byte	102,68,15,56,222,201
   3516 	addq	%rdi,%rbp
   3517 	movups	48-112(%rcx),%xmm1
   3518 .byte	102,15,56,222,208
   3519 .byte	102,15,56,222,216
   3520 .byte	102,15,56,222,224
   3521 .byte	102,15,56,222,232
   3522 .byte	102,15,56,222,240
   3523 .byte	102,15,56,222,248
   3524 .byte	102,68,15,56,222,192
   3525 .byte	102,68,15,56,222,200
   3526 	movups	64-112(%rcx),%xmm0
   3527 	nop
   3528 .byte	102,15,56,222,209
   3529 .byte	102,15,56,222,217
   3530 .byte	102,15,56,222,225
   3531 .byte	102,15,56,222,233
   3532 .byte	102,15,56,222,241
   3533 .byte	102,15,56,222,249
   3534 .byte	102,68,15,56,222,193
   3535 .byte	102,68,15,56,222,201
   3536 	movups	80-112(%rcx),%xmm1
   3537 	nop
   3538 .byte	102,15,56,222,208
   3539 .byte	102,15,56,222,216
   3540 .byte	102,15,56,222,224
   3541 .byte	102,15,56,222,232
   3542 .byte	102,15,56,222,240
   3543 .byte	102,15,56,222,248
   3544 .byte	102,68,15,56,222,192
   3545 .byte	102,68,15,56,222,200
   3546 	movups	96-112(%rcx),%xmm0
   3547 	nop
   3548 .byte	102,15,56,222,209
   3549 .byte	102,15,56,222,217
   3550 .byte	102,15,56,222,225
   3551 .byte	102,15,56,222,233
   3552 .byte	102,15,56,222,241
   3553 .byte	102,15,56,222,249
   3554 .byte	102,68,15,56,222,193
   3555 .byte	102,68,15,56,222,201
   3556 	movups	112-112(%rcx),%xmm1
   3557 	nop
   3558 .byte	102,15,56,222,208
   3559 .byte	102,15,56,222,216
   3560 .byte	102,15,56,222,224
   3561 .byte	102,15,56,222,232
   3562 .byte	102,15,56,222,240
   3563 .byte	102,15,56,222,248
   3564 .byte	102,68,15,56,222,192
   3565 .byte	102,68,15,56,222,200
   3566 	movups	128-112(%rcx),%xmm0
   3567 	nop
   3568 .byte	102,15,56,222,209
   3569 .byte	102,15,56,222,217
   3570 .byte	102,15,56,222,225
   3571 .byte	102,15,56,222,233
   3572 .byte	102,15,56,222,241
   3573 .byte	102,15,56,222,249
   3574 .byte	102,68,15,56,222,193
   3575 .byte	102,68,15,56,222,201
   3576 	movups	144-112(%rcx),%xmm1
   3577 	cmpl	$11,%eax
   3578 .byte	102,15,56,222,208
   3579 .byte	102,15,56,222,216
   3580 .byte	102,15,56,222,224
   3581 .byte	102,15,56,222,232
   3582 .byte	102,15,56,222,240
   3583 .byte	102,15,56,222,248
   3584 .byte	102,68,15,56,222,192
   3585 .byte	102,68,15,56,222,200
   3586 	movups	160-112(%rcx),%xmm0
   3587 	jb	L$cbc_dec_done
   3588 .byte	102,15,56,222,209
   3589 .byte	102,15,56,222,217
   3590 .byte	102,15,56,222,225
   3591 .byte	102,15,56,222,233
   3592 .byte	102,15,56,222,241
   3593 .byte	102,15,56,222,249
   3594 .byte	102,68,15,56,222,193
   3595 .byte	102,68,15,56,222,201
   3596 	movups	176-112(%rcx),%xmm1
   3597 	nop
   3598 .byte	102,15,56,222,208
   3599 .byte	102,15,56,222,216
   3600 .byte	102,15,56,222,224
   3601 .byte	102,15,56,222,232
   3602 .byte	102,15,56,222,240
   3603 .byte	102,15,56,222,248
   3604 .byte	102,68,15,56,222,192
   3605 .byte	102,68,15,56,222,200
   3606 	movups	192-112(%rcx),%xmm0
   3607 	je	L$cbc_dec_done
   3608 .byte	102,15,56,222,209
   3609 .byte	102,15,56,222,217
   3610 .byte	102,15,56,222,225
   3611 .byte	102,15,56,222,233
   3612 .byte	102,15,56,222,241
   3613 .byte	102,15,56,222,249
   3614 .byte	102,68,15,56,222,193
   3615 .byte	102,68,15,56,222,201
   3616 	movups	208-112(%rcx),%xmm1
   3617 	nop
   3618 .byte	102,15,56,222,208
   3619 .byte	102,15,56,222,216
   3620 .byte	102,15,56,222,224
   3621 .byte	102,15,56,222,232
   3622 .byte	102,15,56,222,240
   3623 .byte	102,15,56,222,248
   3624 .byte	102,68,15,56,222,192
   3625 .byte	102,68,15,56,222,200
   3626 	movups	224-112(%rcx),%xmm0
   3627 	jmp	L$cbc_dec_done
   3628 .p2align	4
   3629 L$cbc_dec_done:
   3630 .byte	102,15,56,222,209
   3631 .byte	102,15,56,222,217
   3632 	pxor	%xmm0,%xmm10
   3633 	pxor	%xmm0,%xmm11
   3634 .byte	102,15,56,222,225
   3635 .byte	102,15,56,222,233
   3636 	pxor	%xmm0,%xmm12
   3637 	pxor	%xmm0,%xmm13
   3638 .byte	102,15,56,222,241
   3639 .byte	102,15,56,222,249
   3640 	pxor	%xmm0,%xmm14
   3641 	pxor	%xmm0,%xmm15
   3642 .byte	102,68,15,56,222,193
   3643 .byte	102,68,15,56,222,201
   3644 	movdqu	80(%rdi),%xmm1
   3645 
   3646 .byte	102,65,15,56,223,210
   3647 	movdqu	96(%rdi),%xmm10
   3648 	pxor	%xmm0,%xmm1
   3649 .byte	102,65,15,56,223,219
   3650 	pxor	%xmm0,%xmm10
   3651 	movdqu	112(%rdi),%xmm0
   3652 .byte	102,65,15,56,223,228
   3653 	leaq	128(%rdi),%rdi
   3654 	movdqu	0(%rbp),%xmm11
   3655 .byte	102,65,15,56,223,237
   3656 .byte	102,65,15,56,223,246
   3657 	movdqu	16(%rbp),%xmm12
   3658 	movdqu	32(%rbp),%xmm13
   3659 .byte	102,65,15,56,223,255
   3660 .byte	102,68,15,56,223,193
   3661 	movdqu	48(%rbp),%xmm14
   3662 	movdqu	64(%rbp),%xmm15
   3663 .byte	102,69,15,56,223,202
   3664 	movdqa	%xmm0,%xmm10
   3665 	movdqu	80(%rbp),%xmm1
   3666 	movups	-112(%rcx),%xmm0
   3667 
   3668 	movups	%xmm2,(%rsi)
   3669 	movdqa	%xmm11,%xmm2
   3670 	movups	%xmm3,16(%rsi)
   3671 	movdqa	%xmm12,%xmm3
   3672 	movups	%xmm4,32(%rsi)
   3673 	movdqa	%xmm13,%xmm4
   3674 	movups	%xmm5,48(%rsi)
   3675 	movdqa	%xmm14,%xmm5
   3676 	movups	%xmm6,64(%rsi)
   3677 	movdqa	%xmm15,%xmm6
   3678 	movups	%xmm7,80(%rsi)
   3679 	movdqa	%xmm1,%xmm7
   3680 	movups	%xmm8,96(%rsi)
   3681 	leaq	112(%rsi),%rsi
   3682 
   3683 	subq	$0x80,%rdx
   3684 	ja	L$cbc_dec_loop8
   3685 
   3686 	movaps	%xmm9,%xmm2
   3687 	leaq	-112(%rcx),%rcx
   3688 	addq	$0x70,%rdx
   3689 	jle	L$cbc_dec_clear_tail_collected
   3690 	movups	%xmm9,(%rsi)
   3691 	leaq	16(%rsi),%rsi
   3692 	cmpq	$0x50,%rdx
   3693 	jbe	L$cbc_dec_tail
   3694 
   3695 	movaps	%xmm11,%xmm2
   3696 L$cbc_dec_six_or_seven:
   3697 	cmpq	$0x60,%rdx
   3698 	ja	L$cbc_dec_seven
   3699 
   3700 	movaps	%xmm7,%xmm8
   3701 	call	_aesni_decrypt6
   3702 	pxor	%xmm10,%xmm2
   3703 	movaps	%xmm8,%xmm10
   3704 	pxor	%xmm11,%xmm3
   3705 	movdqu	%xmm2,(%rsi)
   3706 	pxor	%xmm12,%xmm4
   3707 	movdqu	%xmm3,16(%rsi)
   3708 	pxor	%xmm3,%xmm3
   3709 	pxor	%xmm13,%xmm5
   3710 	movdqu	%xmm4,32(%rsi)
   3711 	pxor	%xmm4,%xmm4
   3712 	pxor	%xmm14,%xmm6
   3713 	movdqu	%xmm5,48(%rsi)
   3714 	pxor	%xmm5,%xmm5
   3715 	pxor	%xmm15,%xmm7
   3716 	movdqu	%xmm6,64(%rsi)
   3717 	pxor	%xmm6,%xmm6
   3718 	leaq	80(%rsi),%rsi
   3719 	movdqa	%xmm7,%xmm2
   3720 	pxor	%xmm7,%xmm7
   3721 	jmp	L$cbc_dec_tail_collected
   3722 
   3723 .p2align	4
   3724 L$cbc_dec_seven:
   3725 	movups	96(%rdi),%xmm8
   3726 	xorps	%xmm9,%xmm9
   3727 	call	_aesni_decrypt8
   3728 	movups	80(%rdi),%xmm9
   3729 	pxor	%xmm10,%xmm2
   3730 	movups	96(%rdi),%xmm10
   3731 	pxor	%xmm11,%xmm3
   3732 	movdqu	%xmm2,(%rsi)
   3733 	pxor	%xmm12,%xmm4
   3734 	movdqu	%xmm3,16(%rsi)
   3735 	pxor	%xmm3,%xmm3
   3736 	pxor	%xmm13,%xmm5
   3737 	movdqu	%xmm4,32(%rsi)
   3738 	pxor	%xmm4,%xmm4
   3739 	pxor	%xmm14,%xmm6
   3740 	movdqu	%xmm5,48(%rsi)
   3741 	pxor	%xmm5,%xmm5
   3742 	pxor	%xmm15,%xmm7
   3743 	movdqu	%xmm6,64(%rsi)
   3744 	pxor	%xmm6,%xmm6
   3745 	pxor	%xmm9,%xmm8
   3746 	movdqu	%xmm7,80(%rsi)
   3747 	pxor	%xmm7,%xmm7
   3748 	leaq	96(%rsi),%rsi
   3749 	movdqa	%xmm8,%xmm2
   3750 	pxor	%xmm8,%xmm8
   3751 	pxor	%xmm9,%xmm9
   3752 	jmp	L$cbc_dec_tail_collected
   3753 
   3754 .p2align	4
   3755 L$cbc_dec_loop6:
   3756 	movups	%xmm7,(%rsi)
   3757 	leaq	16(%rsi),%rsi
   3758 	movdqu	0(%rdi),%xmm2
   3759 	movdqu	16(%rdi),%xmm3
   3760 	movdqa	%xmm2,%xmm11
   3761 	movdqu	32(%rdi),%xmm4
   3762 	movdqa	%xmm3,%xmm12
   3763 	movdqu	48(%rdi),%xmm5
   3764 	movdqa	%xmm4,%xmm13
   3765 	movdqu	64(%rdi),%xmm6
   3766 	movdqa	%xmm5,%xmm14
   3767 	movdqu	80(%rdi),%xmm7
   3768 	movdqa	%xmm6,%xmm15
   3769 L$cbc_dec_loop6_enter:
   3770 	leaq	96(%rdi),%rdi
   3771 	movdqa	%xmm7,%xmm8
   3772 
   3773 	call	_aesni_decrypt6
   3774 
   3775 	pxor	%xmm10,%xmm2
   3776 	movdqa	%xmm8,%xmm10
   3777 	pxor	%xmm11,%xmm3
   3778 	movdqu	%xmm2,(%rsi)
   3779 	pxor	%xmm12,%xmm4
   3780 	movdqu	%xmm3,16(%rsi)
   3781 	pxor	%xmm13,%xmm5
   3782 	movdqu	%xmm4,32(%rsi)
   3783 	pxor	%xmm14,%xmm6
   3784 	movq	%rbp,%rcx
   3785 	movdqu	%xmm5,48(%rsi)
   3786 	pxor	%xmm15,%xmm7
   3787 	movl	%r10d,%eax
   3788 	movdqu	%xmm6,64(%rsi)
   3789 	leaq	80(%rsi),%rsi
   3790 	subq	$0x60,%rdx
   3791 	ja	L$cbc_dec_loop6
   3792 
   3793 	movdqa	%xmm7,%xmm2
   3794 	addq	$0x50,%rdx
   3795 	jle	L$cbc_dec_clear_tail_collected
   3796 	movups	%xmm7,(%rsi)
   3797 	leaq	16(%rsi),%rsi
   3798 
   3799 L$cbc_dec_tail:
   3800 	movups	(%rdi),%xmm2
   3801 	subq	$0x10,%rdx
   3802 	jbe	L$cbc_dec_one
   3803 
   3804 	movups	16(%rdi),%xmm3
   3805 	movaps	%xmm2,%xmm11
   3806 	subq	$0x10,%rdx
   3807 	jbe	L$cbc_dec_two
   3808 
   3809 	movups	32(%rdi),%xmm4
   3810 	movaps	%xmm3,%xmm12
   3811 	subq	$0x10,%rdx
   3812 	jbe	L$cbc_dec_three
   3813 
   3814 	movups	48(%rdi),%xmm5
   3815 	movaps	%xmm4,%xmm13
   3816 	subq	$0x10,%rdx
   3817 	jbe	L$cbc_dec_four
   3818 
   3819 	movups	64(%rdi),%xmm6
   3820 	movaps	%xmm5,%xmm14
   3821 	movaps	%xmm6,%xmm15
   3822 	xorps	%xmm7,%xmm7
   3823 	call	_aesni_decrypt6
   3824 	pxor	%xmm10,%xmm2
   3825 	movaps	%xmm15,%xmm10
   3826 	pxor	%xmm11,%xmm3
   3827 	movdqu	%xmm2,(%rsi)
   3828 	pxor	%xmm12,%xmm4
   3829 	movdqu	%xmm3,16(%rsi)
   3830 	pxor	%xmm3,%xmm3
   3831 	pxor	%xmm13,%xmm5
   3832 	movdqu	%xmm4,32(%rsi)
   3833 	pxor	%xmm4,%xmm4
   3834 	pxor	%xmm14,%xmm6
   3835 	movdqu	%xmm5,48(%rsi)
   3836 	pxor	%xmm5,%xmm5
   3837 	leaq	64(%rsi),%rsi
   3838 	movdqa	%xmm6,%xmm2
   3839 	pxor	%xmm6,%xmm6
   3840 	pxor	%xmm7,%xmm7
   3841 	subq	$0x10,%rdx
   3842 	jmp	L$cbc_dec_tail_collected
   3843 
   3844 .p2align	4
   3845 L$cbc_dec_one:
   3846 	movaps	%xmm2,%xmm11
   3847 	movups	(%rcx),%xmm0
   3848 	movups	16(%rcx),%xmm1
   3849 	leaq	32(%rcx),%rcx
   3850 	xorps	%xmm0,%xmm2
   3851 L$oop_dec1_17:
   3852 .byte	102,15,56,222,209
   3853 	decl	%eax
   3854 	movups	(%rcx),%xmm1
   3855 	leaq	16(%rcx),%rcx
   3856 	jnz	L$oop_dec1_17
   3857 .byte	102,15,56,223,209
   3858 	xorps	%xmm10,%xmm2
   3859 	movaps	%xmm11,%xmm10
   3860 	jmp	L$cbc_dec_tail_collected
   3861 .p2align	4
   3862 L$cbc_dec_two:
   3863 	movaps	%xmm3,%xmm12
   3864 	call	_aesni_decrypt2
   3865 	pxor	%xmm10,%xmm2
   3866 	movaps	%xmm12,%xmm10
   3867 	pxor	%xmm11,%xmm3
   3868 	movdqu	%xmm2,(%rsi)
   3869 	movdqa	%xmm3,%xmm2
   3870 	pxor	%xmm3,%xmm3
   3871 	leaq	16(%rsi),%rsi
   3872 	jmp	L$cbc_dec_tail_collected
   3873 .p2align	4
   3874 L$cbc_dec_three:
   3875 	movaps	%xmm4,%xmm13
   3876 	call	_aesni_decrypt3
   3877 	pxor	%xmm10,%xmm2
   3878 	movaps	%xmm13,%xmm10
   3879 	pxor	%xmm11,%xmm3
   3880 	movdqu	%xmm2,(%rsi)
   3881 	pxor	%xmm12,%xmm4
   3882 	movdqu	%xmm3,16(%rsi)
   3883 	pxor	%xmm3,%xmm3
   3884 	movdqa	%xmm4,%xmm2
   3885 	pxor	%xmm4,%xmm4
   3886 	leaq	32(%rsi),%rsi
   3887 	jmp	L$cbc_dec_tail_collected
   3888 .p2align	4
   3889 L$cbc_dec_four:
   3890 	movaps	%xmm5,%xmm14
   3891 	call	_aesni_decrypt4
   3892 	pxor	%xmm10,%xmm2
   3893 	movaps	%xmm14,%xmm10
   3894 	pxor	%xmm11,%xmm3
   3895 	movdqu	%xmm2,(%rsi)
   3896 	pxor	%xmm12,%xmm4
   3897 	movdqu	%xmm3,16(%rsi)
   3898 	pxor	%xmm3,%xmm3
   3899 	pxor	%xmm13,%xmm5
   3900 	movdqu	%xmm4,32(%rsi)
   3901 	pxor	%xmm4,%xmm4
   3902 	movdqa	%xmm5,%xmm2
   3903 	pxor	%xmm5,%xmm5
   3904 	leaq	48(%rsi),%rsi
   3905 	jmp	L$cbc_dec_tail_collected
   3906 
   3907 .p2align	4
   3908 L$cbc_dec_clear_tail_collected:
   3909 	pxor	%xmm3,%xmm3
   3910 	pxor	%xmm4,%xmm4
   3911 	pxor	%xmm5,%xmm5
   3912 	pxor	%xmm6,%xmm6
   3913 	pxor	%xmm7,%xmm7
   3914 	pxor	%xmm8,%xmm8
   3915 	pxor	%xmm9,%xmm9
   3916 L$cbc_dec_tail_collected:
   3917 	movups	%xmm10,(%r8)
   3918 	andq	$15,%rdx
   3919 	jnz	L$cbc_dec_tail_partial
   3920 	movups	%xmm2,(%rsi)
   3921 	pxor	%xmm2,%xmm2
   3922 	jmp	L$cbc_dec_ret
   3923 .p2align	4
   3924 L$cbc_dec_tail_partial:
   3925 	movaps	%xmm2,(%rsp)
   3926 	pxor	%xmm2,%xmm2
   3927 	movq	$16,%rcx
   3928 	movq	%rsi,%rdi
   3929 	subq	%rdx,%rcx
   3930 	leaq	(%rsp),%rsi
   3931 .long	0x9066A4F3
   3932 	movdqa	%xmm2,(%rsp)
   3933 
   3934 L$cbc_dec_ret:
   3935 	xorps	%xmm0,%xmm0
   3936 	pxor	%xmm1,%xmm1
   3937 	movq	-8(%r11),%rbp
   3938 	leaq	(%r11),%rsp
   3939 L$cbc_ret:
   3940 	.byte	0xf3,0xc3
   3941 
   3942 .globl	_aesni_set_decrypt_key
   3943 .private_extern _aesni_set_decrypt_key
   3944 
   3945 .p2align	4
   3946 _aesni_set_decrypt_key:
   3947 .byte	0x48,0x83,0xEC,0x08
   3948 	call	__aesni_set_encrypt_key
   3949 	shll	$4,%esi
   3950 	testl	%eax,%eax
   3951 	jnz	L$dec_key_ret
   3952 	leaq	16(%rdx,%rsi,1),%rdi
   3953 
   3954 	movups	(%rdx),%xmm0
   3955 	movups	(%rdi),%xmm1
   3956 	movups	%xmm0,(%rdi)
   3957 	movups	%xmm1,(%rdx)
   3958 	leaq	16(%rdx),%rdx
   3959 	leaq	-16(%rdi),%rdi
   3960 
   3961 L$dec_key_inverse:
   3962 	movups	(%rdx),%xmm0
   3963 	movups	(%rdi),%xmm1
   3964 .byte	102,15,56,219,192
   3965 .byte	102,15,56,219,201
   3966 	leaq	16(%rdx),%rdx
   3967 	leaq	-16(%rdi),%rdi
   3968 	movups	%xmm0,16(%rdi)
   3969 	movups	%xmm1,-16(%rdx)
   3970 	cmpq	%rdx,%rdi
   3971 	ja	L$dec_key_inverse
   3972 
   3973 	movups	(%rdx),%xmm0
   3974 .byte	102,15,56,219,192
   3975 	pxor	%xmm1,%xmm1
   3976 	movups	%xmm0,(%rdi)
   3977 	pxor	%xmm0,%xmm0
   3978 L$dec_key_ret:
   3979 	addq	$8,%rsp
   3980 	.byte	0xf3,0xc3
   3981 L$SEH_end_set_decrypt_key:
   3982 
   3983 .globl	_aesni_set_encrypt_key
   3984 .private_extern _aesni_set_encrypt_key
   3985 
   3986 .p2align	4
   3987 _aesni_set_encrypt_key:
   3988 __aesni_set_encrypt_key:
   3989 .byte	0x48,0x83,0xEC,0x08
   3990 	movq	$-1,%rax
   3991 	testq	%rdi,%rdi
   3992 	jz	L$enc_key_ret
   3993 	testq	%rdx,%rdx
   3994 	jz	L$enc_key_ret
   3995 
   3996 	movl	$268437504,%r10d
   3997 	movups	(%rdi),%xmm0
   3998 	xorps	%xmm4,%xmm4
   3999 	andl	_OPENSSL_ia32cap_P+4(%rip),%r10d
   4000 	leaq	16(%rdx),%rax
   4001 	cmpl	$256,%esi
   4002 	je	L$14rounds
   4003 	cmpl	$192,%esi
   4004 	je	L$12rounds
   4005 	cmpl	$128,%esi
   4006 	jne	L$bad_keybits
   4007 
   4008 L$10rounds:
   4009 	movl	$9,%esi
   4010 	cmpl	$268435456,%r10d
   4011 	je	L$10rounds_alt
   4012 
   4013 	movups	%xmm0,(%rdx)
   4014 .byte	102,15,58,223,200,1
   4015 	call	L$key_expansion_128_cold
   4016 .byte	102,15,58,223,200,2
   4017 	call	L$key_expansion_128
   4018 .byte	102,15,58,223,200,4
   4019 	call	L$key_expansion_128
   4020 .byte	102,15,58,223,200,8
   4021 	call	L$key_expansion_128
   4022 .byte	102,15,58,223,200,16
   4023 	call	L$key_expansion_128
   4024 .byte	102,15,58,223,200,32
   4025 	call	L$key_expansion_128
   4026 .byte	102,15,58,223,200,64
   4027 	call	L$key_expansion_128
   4028 .byte	102,15,58,223,200,128
   4029 	call	L$key_expansion_128
   4030 .byte	102,15,58,223,200,27
   4031 	call	L$key_expansion_128
   4032 .byte	102,15,58,223,200,54
   4033 	call	L$key_expansion_128
   4034 	movups	%xmm0,(%rax)
   4035 	movl	%esi,80(%rax)
   4036 	xorl	%eax,%eax
   4037 	jmp	L$enc_key_ret
   4038 
   4039 .p2align	4
   4040 L$10rounds_alt:
   4041 	movdqa	L$key_rotate(%rip),%xmm5
   4042 	movl	$8,%r10d
   4043 	movdqa	L$key_rcon1(%rip),%xmm4
   4044 	movdqa	%xmm0,%xmm2
   4045 	movdqu	%xmm0,(%rdx)
   4046 	jmp	L$oop_key128
   4047 
   4048 .p2align	4
   4049 L$oop_key128:
   4050 .byte	102,15,56,0,197
   4051 .byte	102,15,56,221,196
   4052 	pslld	$1,%xmm4
   4053 	leaq	16(%rax),%rax
   4054 
   4055 	movdqa	%xmm2,%xmm3
   4056 	pslldq	$4,%xmm2
   4057 	pxor	%xmm2,%xmm3
   4058 	pslldq	$4,%xmm2
   4059 	pxor	%xmm2,%xmm3
   4060 	pslldq	$4,%xmm2
   4061 	pxor	%xmm3,%xmm2
   4062 
   4063 	pxor	%xmm2,%xmm0
   4064 	movdqu	%xmm0,-16(%rax)
   4065 	movdqa	%xmm0,%xmm2
   4066 
   4067 	decl	%r10d
   4068 	jnz	L$oop_key128
   4069 
   4070 	movdqa	L$key_rcon1b(%rip),%xmm4
   4071 
   4072 .byte	102,15,56,0,197
   4073 .byte	102,15,56,221,196
   4074 	pslld	$1,%xmm4
   4075 
   4076 	movdqa	%xmm2,%xmm3
   4077 	pslldq	$4,%xmm2
   4078 	pxor	%xmm2,%xmm3
   4079 	pslldq	$4,%xmm2
   4080 	pxor	%xmm2,%xmm3
   4081 	pslldq	$4,%xmm2
   4082 	pxor	%xmm3,%xmm2
   4083 
   4084 	pxor	%xmm2,%xmm0
   4085 	movdqu	%xmm0,(%rax)
   4086 
   4087 	movdqa	%xmm0,%xmm2
   4088 .byte	102,15,56,0,197
   4089 .byte	102,15,56,221,196
   4090 
   4091 	movdqa	%xmm2,%xmm3
   4092 	pslldq	$4,%xmm2
   4093 	pxor	%xmm2,%xmm3
   4094 	pslldq	$4,%xmm2
   4095 	pxor	%xmm2,%xmm3
   4096 	pslldq	$4,%xmm2
   4097 	pxor	%xmm3,%xmm2
   4098 
   4099 	pxor	%xmm2,%xmm0
   4100 	movdqu	%xmm0,16(%rax)
   4101 
   4102 	movl	%esi,96(%rax)
   4103 	xorl	%eax,%eax
   4104 	jmp	L$enc_key_ret
   4105 
   4106 .p2align	4
   4107 L$12rounds:
   4108 	movq	16(%rdi),%xmm2
   4109 	movl	$11,%esi
   4110 	cmpl	$268435456,%r10d
   4111 	je	L$12rounds_alt
   4112 
   4113 	movups	%xmm0,(%rdx)
   4114 .byte	102,15,58,223,202,1
   4115 	call	L$key_expansion_192a_cold
   4116 .byte	102,15,58,223,202,2
   4117 	call	L$key_expansion_192b
   4118 .byte	102,15,58,223,202,4
   4119 	call	L$key_expansion_192a
   4120 .byte	102,15,58,223,202,8
   4121 	call	L$key_expansion_192b
   4122 .byte	102,15,58,223,202,16
   4123 	call	L$key_expansion_192a
   4124 .byte	102,15,58,223,202,32
   4125 	call	L$key_expansion_192b
   4126 .byte	102,15,58,223,202,64
   4127 	call	L$key_expansion_192a
   4128 .byte	102,15,58,223,202,128
   4129 	call	L$key_expansion_192b
   4130 	movups	%xmm0,(%rax)
   4131 	movl	%esi,48(%rax)
   4132 	xorq	%rax,%rax
   4133 	jmp	L$enc_key_ret
   4134 
   4135 .p2align	4
   4136 L$12rounds_alt:
   4137 	movdqa	L$key_rotate192(%rip),%xmm5
   4138 	movdqa	L$key_rcon1(%rip),%xmm4
   4139 	movl	$8,%r10d
   4140 	movdqu	%xmm0,(%rdx)
   4141 	jmp	L$oop_key192
   4142 
   4143 .p2align	4
   4144 L$oop_key192:
   4145 	movq	%xmm2,0(%rax)
   4146 	movdqa	%xmm2,%xmm1
   4147 .byte	102,15,56,0,213
   4148 .byte	102,15,56,221,212
   4149 	pslld	$1,%xmm4
   4150 	leaq	24(%rax),%rax
   4151 
   4152 	movdqa	%xmm0,%xmm3
   4153 	pslldq	$4,%xmm0
   4154 	pxor	%xmm0,%xmm3
   4155 	pslldq	$4,%xmm0
   4156 	pxor	%xmm0,%xmm3
   4157 	pslldq	$4,%xmm0
   4158 	pxor	%xmm3,%xmm0
   4159 
   4160 	pshufd	$0xff,%xmm0,%xmm3
   4161 	pxor	%xmm1,%xmm3
   4162 	pslldq	$4,%xmm1
   4163 	pxor	%xmm1,%xmm3
   4164 
   4165 	pxor	%xmm2,%xmm0
   4166 	pxor	%xmm3,%xmm2
   4167 	movdqu	%xmm0,-16(%rax)
   4168 
   4169 	decl	%r10d
   4170 	jnz	L$oop_key192
   4171 
   4172 	movl	%esi,32(%rax)
   4173 	xorl	%eax,%eax
   4174 	jmp	L$enc_key_ret
   4175 
   4176 .p2align	4
   4177 L$14rounds:
   4178 	movups	16(%rdi),%xmm2
   4179 	movl	$13,%esi
   4180 	leaq	16(%rax),%rax
   4181 	cmpl	$268435456,%r10d
   4182 	je	L$14rounds_alt
   4183 
   4184 	movups	%xmm0,(%rdx)
   4185 	movups	%xmm2,16(%rdx)
   4186 .byte	102,15,58,223,202,1
   4187 	call	L$key_expansion_256a_cold
   4188 .byte	102,15,58,223,200,1
   4189 	call	L$key_expansion_256b
   4190 .byte	102,15,58,223,202,2
   4191 	call	L$key_expansion_256a
   4192 .byte	102,15,58,223,200,2
   4193 	call	L$key_expansion_256b
   4194 .byte	102,15,58,223,202,4
   4195 	call	L$key_expansion_256a
   4196 .byte	102,15,58,223,200,4
   4197 	call	L$key_expansion_256b
   4198 .byte	102,15,58,223,202,8
   4199 	call	L$key_expansion_256a
   4200 .byte	102,15,58,223,200,8
   4201 	call	L$key_expansion_256b
   4202 .byte	102,15,58,223,202,16
   4203 	call	L$key_expansion_256a
   4204 .byte	102,15,58,223,200,16
   4205 	call	L$key_expansion_256b
   4206 .byte	102,15,58,223,202,32
   4207 	call	L$key_expansion_256a
   4208 .byte	102,15,58,223,200,32
   4209 	call	L$key_expansion_256b
   4210 .byte	102,15,58,223,202,64
   4211 	call	L$key_expansion_256a
   4212 	movups	%xmm0,(%rax)
   4213 	movl	%esi,16(%rax)
   4214 	xorq	%rax,%rax
   4215 	jmp	L$enc_key_ret
   4216 
   4217 .p2align	4
   4218 L$14rounds_alt:
   4219 	movdqa	L$key_rotate(%rip),%xmm5
   4220 	movdqa	L$key_rcon1(%rip),%xmm4
   4221 	movl	$7,%r10d
   4222 	movdqu	%xmm0,0(%rdx)
   4223 	movdqa	%xmm2,%xmm1
   4224 	movdqu	%xmm2,16(%rdx)
   4225 	jmp	L$oop_key256
   4226 
   4227 .p2align	4
   4228 L$oop_key256:
   4229 .byte	102,15,56,0,213
   4230 .byte	102,15,56,221,212
   4231 
   4232 	movdqa	%xmm0,%xmm3
   4233 	pslldq	$4,%xmm0
   4234 	pxor	%xmm0,%xmm3
   4235 	pslldq	$4,%xmm0
   4236 	pxor	%xmm0,%xmm3
   4237 	pslldq	$4,%xmm0
   4238 	pxor	%xmm3,%xmm0
   4239 	pslld	$1,%xmm4
   4240 
   4241 	pxor	%xmm2,%xmm0
   4242 	movdqu	%xmm0,(%rax)
   4243 
   4244 	decl	%r10d
   4245 	jz	L$done_key256
   4246 
   4247 	pshufd	$0xff,%xmm0,%xmm2
   4248 	pxor	%xmm3,%xmm3
   4249 .byte	102,15,56,221,211
   4250 
   4251 	movdqa	%xmm1,%xmm3
   4252 	pslldq	$4,%xmm1
   4253 	pxor	%xmm1,%xmm3
   4254 	pslldq	$4,%xmm1
   4255 	pxor	%xmm1,%xmm3
   4256 	pslldq	$4,%xmm1
   4257 	pxor	%xmm3,%xmm1
   4258 
   4259 	pxor	%xmm1,%xmm2
   4260 	movdqu	%xmm2,16(%rax)
   4261 	leaq	32(%rax),%rax
   4262 	movdqa	%xmm2,%xmm1
   4263 
   4264 	jmp	L$oop_key256
   4265 
   4266 L$done_key256:
   4267 	movl	%esi,16(%rax)
   4268 	xorl	%eax,%eax
   4269 	jmp	L$enc_key_ret
   4270 
   4271 .p2align	4
   4272 L$bad_keybits:
   4273 	movq	$-2,%rax
   4274 L$enc_key_ret:
   4275 	pxor	%xmm0,%xmm0
   4276 	pxor	%xmm1,%xmm1
   4277 	pxor	%xmm2,%xmm2
   4278 	pxor	%xmm3,%xmm3
   4279 	pxor	%xmm4,%xmm4
   4280 	pxor	%xmm5,%xmm5
   4281 	addq	$8,%rsp
   4282 	.byte	0xf3,0xc3
   4283 L$SEH_end_set_encrypt_key:
   4284 
   4285 .p2align	4
   4286 L$key_expansion_128:
   4287 	movups	%xmm0,(%rax)
   4288 	leaq	16(%rax),%rax
   4289 L$key_expansion_128_cold:
   4290 	shufps	$16,%xmm0,%xmm4
   4291 	xorps	%xmm4,%xmm0
   4292 	shufps	$140,%xmm0,%xmm4
   4293 	xorps	%xmm4,%xmm0
   4294 	shufps	$255,%xmm1,%xmm1
   4295 	xorps	%xmm1,%xmm0
   4296 	.byte	0xf3,0xc3
   4297 
   4298 .p2align	4
   4299 L$key_expansion_192a:
   4300 	movups	%xmm0,(%rax)
   4301 	leaq	16(%rax),%rax
   4302 L$key_expansion_192a_cold:
   4303 	movaps	%xmm2,%xmm5
   4304 L$key_expansion_192b_warm:
   4305 	shufps	$16,%xmm0,%xmm4
   4306 	movdqa	%xmm2,%xmm3
   4307 	xorps	%xmm4,%xmm0
   4308 	shufps	$140,%xmm0,%xmm4
   4309 	pslldq	$4,%xmm3
   4310 	xorps	%xmm4,%xmm0
   4311 	pshufd	$85,%xmm1,%xmm1
   4312 	pxor	%xmm3,%xmm2
   4313 	pxor	%xmm1,%xmm0
   4314 	pshufd	$255,%xmm0,%xmm3
   4315 	pxor	%xmm3,%xmm2
   4316 	.byte	0xf3,0xc3
   4317 
   4318 .p2align	4
   4319 L$key_expansion_192b:
   4320 	movaps	%xmm0,%xmm3
   4321 	shufps	$68,%xmm0,%xmm5
   4322 	movups	%xmm5,(%rax)
   4323 	shufps	$78,%xmm2,%xmm3
   4324 	movups	%xmm3,16(%rax)
   4325 	leaq	32(%rax),%rax
   4326 	jmp	L$key_expansion_192b_warm
   4327 
   4328 .p2align	4
   4329 L$key_expansion_256a:
   4330 	movups	%xmm2,(%rax)
   4331 	leaq	16(%rax),%rax
   4332 L$key_expansion_256a_cold:
   4333 	shufps	$16,%xmm0,%xmm4
   4334 	xorps	%xmm4,%xmm0
   4335 	shufps	$140,%xmm0,%xmm4
   4336 	xorps	%xmm4,%xmm0
   4337 	shufps	$255,%xmm1,%xmm1
   4338 	xorps	%xmm1,%xmm0
   4339 	.byte	0xf3,0xc3
   4340 
   4341 .p2align	4
   4342 L$key_expansion_256b:
   4343 	movups	%xmm0,(%rax)
   4344 	leaq	16(%rax),%rax
   4345 
   4346 	shufps	$16,%xmm2,%xmm4
   4347 	xorps	%xmm4,%xmm2
   4348 	shufps	$140,%xmm2,%xmm4
   4349 	xorps	%xmm4,%xmm2
   4350 	shufps	$170,%xmm1,%xmm1
   4351 	xorps	%xmm1,%xmm2
   4352 	.byte	0xf3,0xc3
   4353 
   4354 
   4355 .p2align	6
   4356 L$bswap_mask:
   4357 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   4358 L$increment32:
   4359 .long	6,6,6,0
   4360 L$increment64:
   4361 .long	1,0,0,0
   4362 L$xts_magic:
   4363 .long	0x87,0,1,0
   4364 L$increment1:
   4365 .byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
   4366 L$key_rotate:
   4367 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
   4368 L$key_rotate192:
   4369 .long	0x04070605,0x04070605,0x04070605,0x04070605
   4370 L$key_rcon1:
   4371 .long	1,1,1,1
   4372 L$key_rcon1b:
   4373 .long	0x1b,0x1b,0x1b,0x1b
   4374 
   4375 .byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   4376 .p2align	6
   4377 #endif
   4378