Home | History | Annotate | Download | only in asm
      1 .text
      2 .globl	aesni_encrypt
      3 .type	aesni_encrypt,@function
      4 .align	16
      5 aesni_encrypt:
      6 	movups	(%rdi),%xmm2
      7 	movl	240(%rdx),%eax
      8 	movups	(%rdx),%xmm0
      9 	movups	16(%rdx),%xmm1
     10 	leaq	32(%rdx),%rdx
     11 	xorps	%xmm0,%xmm2
     12 .Loop_enc1_1:
     13 .byte	102,15,56,220,209
     14 	decl	%eax
     15 	movups	(%rdx),%xmm1
     16 	leaq	16(%rdx),%rdx
     17 	jnz	.Loop_enc1_1
     18 .byte	102,15,56,221,209
     19 	movups	%xmm2,(%rsi)
     20 	.byte	0xf3,0xc3
     21 .size	aesni_encrypt,.-aesni_encrypt
     22 
     23 .globl	aesni_decrypt
     24 .type	aesni_decrypt,@function
     25 .align	16
     26 aesni_decrypt:
     27 	movups	(%rdi),%xmm2
     28 	movl	240(%rdx),%eax
     29 	movups	(%rdx),%xmm0
     30 	movups	16(%rdx),%xmm1
     31 	leaq	32(%rdx),%rdx
     32 	xorps	%xmm0,%xmm2
     33 .Loop_dec1_2:
     34 .byte	102,15,56,222,209
     35 	decl	%eax
     36 	movups	(%rdx),%xmm1
     37 	leaq	16(%rdx),%rdx
     38 	jnz	.Loop_dec1_2
     39 .byte	102,15,56,223,209
     40 	movups	%xmm2,(%rsi)
     41 	.byte	0xf3,0xc3
     42 .size	aesni_decrypt, .-aesni_decrypt
     43 .type	_aesni_encrypt3,@function
     44 .align	16
     45 _aesni_encrypt3:
     46 	movups	(%rcx),%xmm0
     47 	shrl	$1,%eax
     48 	movups	16(%rcx),%xmm1
     49 	leaq	32(%rcx),%rcx
     50 	xorps	%xmm0,%xmm2
     51 	xorps	%xmm0,%xmm3
     52 	xorps	%xmm0,%xmm4
     53 	movups	(%rcx),%xmm0
     54 
     55 .Lenc_loop3:
     56 .byte	102,15,56,220,209
     57 .byte	102,15,56,220,217
     58 	decl	%eax
     59 .byte	102,15,56,220,225
     60 	movups	16(%rcx),%xmm1
     61 .byte	102,15,56,220,208
     62 .byte	102,15,56,220,216
     63 	leaq	32(%rcx),%rcx
     64 .byte	102,15,56,220,224
     65 	movups	(%rcx),%xmm0
     66 	jnz	.Lenc_loop3
     67 
     68 .byte	102,15,56,220,209
     69 .byte	102,15,56,220,217
     70 .byte	102,15,56,220,225
     71 .byte	102,15,56,221,208
     72 .byte	102,15,56,221,216
     73 .byte	102,15,56,221,224
     74 	.byte	0xf3,0xc3
     75 .size	_aesni_encrypt3,.-_aesni_encrypt3
     76 .type	_aesni_decrypt3,@function
     77 .align	16
     78 _aesni_decrypt3:
     79 	movups	(%rcx),%xmm0
     80 	shrl	$1,%eax
     81 	movups	16(%rcx),%xmm1
     82 	leaq	32(%rcx),%rcx
     83 	xorps	%xmm0,%xmm2
     84 	xorps	%xmm0,%xmm3
     85 	xorps	%xmm0,%xmm4
     86 	movups	(%rcx),%xmm0
     87 
     88 .Ldec_loop3:
     89 .byte	102,15,56,222,209
     90 .byte	102,15,56,222,217
     91 	decl	%eax
     92 .byte	102,15,56,222,225
     93 	movups	16(%rcx),%xmm1
     94 .byte	102,15,56,222,208
     95 .byte	102,15,56,222,216
     96 	leaq	32(%rcx),%rcx
     97 .byte	102,15,56,222,224
     98 	movups	(%rcx),%xmm0
     99 	jnz	.Ldec_loop3
    100 
    101 .byte	102,15,56,222,209
    102 .byte	102,15,56,222,217
    103 .byte	102,15,56,222,225
    104 .byte	102,15,56,223,208
    105 .byte	102,15,56,223,216
    106 .byte	102,15,56,223,224
    107 	.byte	0xf3,0xc3
    108 .size	_aesni_decrypt3,.-_aesni_decrypt3
    109 .type	_aesni_encrypt4,@function
    110 .align	16
    111 _aesni_encrypt4:
    112 	movups	(%rcx),%xmm0
    113 	shrl	$1,%eax
    114 	movups	16(%rcx),%xmm1
    115 	leaq	32(%rcx),%rcx
    116 	xorps	%xmm0,%xmm2
    117 	xorps	%xmm0,%xmm3
    118 	xorps	%xmm0,%xmm4
    119 	xorps	%xmm0,%xmm5
    120 	movups	(%rcx),%xmm0
    121 
    122 .Lenc_loop4:
    123 .byte	102,15,56,220,209
    124 .byte	102,15,56,220,217
    125 	decl	%eax
    126 .byte	102,15,56,220,225
    127 .byte	102,15,56,220,233
    128 	movups	16(%rcx),%xmm1
    129 .byte	102,15,56,220,208
    130 .byte	102,15,56,220,216
    131 	leaq	32(%rcx),%rcx
    132 .byte	102,15,56,220,224
    133 .byte	102,15,56,220,232
    134 	movups	(%rcx),%xmm0
    135 	jnz	.Lenc_loop4
    136 
    137 .byte	102,15,56,220,209
    138 .byte	102,15,56,220,217
    139 .byte	102,15,56,220,225
    140 .byte	102,15,56,220,233
    141 .byte	102,15,56,221,208
    142 .byte	102,15,56,221,216
    143 .byte	102,15,56,221,224
    144 .byte	102,15,56,221,232
    145 	.byte	0xf3,0xc3
    146 .size	_aesni_encrypt4,.-_aesni_encrypt4
    147 .type	_aesni_decrypt4,@function
    148 .align	16
    149 _aesni_decrypt4:
    150 	movups	(%rcx),%xmm0
    151 	shrl	$1,%eax
    152 	movups	16(%rcx),%xmm1
    153 	leaq	32(%rcx),%rcx
    154 	xorps	%xmm0,%xmm2
    155 	xorps	%xmm0,%xmm3
    156 	xorps	%xmm0,%xmm4
    157 	xorps	%xmm0,%xmm5
    158 	movups	(%rcx),%xmm0
    159 
    160 .Ldec_loop4:
    161 .byte	102,15,56,222,209
    162 .byte	102,15,56,222,217
    163 	decl	%eax
    164 .byte	102,15,56,222,225
    165 .byte	102,15,56,222,233
    166 	movups	16(%rcx),%xmm1
    167 .byte	102,15,56,222,208
    168 .byte	102,15,56,222,216
    169 	leaq	32(%rcx),%rcx
    170 .byte	102,15,56,222,224
    171 .byte	102,15,56,222,232
    172 	movups	(%rcx),%xmm0
    173 	jnz	.Ldec_loop4
    174 
    175 .byte	102,15,56,222,209
    176 .byte	102,15,56,222,217
    177 .byte	102,15,56,222,225
    178 .byte	102,15,56,222,233
    179 .byte	102,15,56,223,208
    180 .byte	102,15,56,223,216
    181 .byte	102,15,56,223,224
    182 .byte	102,15,56,223,232
    183 	.byte	0xf3,0xc3
    184 .size	_aesni_decrypt4,.-_aesni_decrypt4
    185 .type	_aesni_encrypt6,@function
    186 .align	16
    187 _aesni_encrypt6:
    188 	movups	(%rcx),%xmm0
    189 	shrl	$1,%eax
    190 	movups	16(%rcx),%xmm1
    191 	leaq	32(%rcx),%rcx
    192 	xorps	%xmm0,%xmm2
    193 	pxor	%xmm0,%xmm3
    194 .byte	102,15,56,220,209
    195 	pxor	%xmm0,%xmm4
    196 .byte	102,15,56,220,217
    197 	pxor	%xmm0,%xmm5
    198 .byte	102,15,56,220,225
    199 	pxor	%xmm0,%xmm6
    200 .byte	102,15,56,220,233
    201 	pxor	%xmm0,%xmm7
    202 	decl	%eax
    203 .byte	102,15,56,220,241
    204 	movups	(%rcx),%xmm0
    205 .byte	102,15,56,220,249
    206 	jmp	.Lenc_loop6_enter
    207 .align	16
    208 .Lenc_loop6:
    209 .byte	102,15,56,220,209
    210 .byte	102,15,56,220,217
    211 	decl	%eax
    212 .byte	102,15,56,220,225
    213 .byte	102,15,56,220,233
    214 .byte	102,15,56,220,241
    215 .byte	102,15,56,220,249
    216 .Lenc_loop6_enter:
    217 	movups	16(%rcx),%xmm1
    218 .byte	102,15,56,220,208
    219 .byte	102,15,56,220,216
    220 	leaq	32(%rcx),%rcx
    221 .byte	102,15,56,220,224
    222 .byte	102,15,56,220,232
    223 .byte	102,15,56,220,240
    224 .byte	102,15,56,220,248
    225 	movups	(%rcx),%xmm0
    226 	jnz	.Lenc_loop6
    227 
    228 .byte	102,15,56,220,209
    229 .byte	102,15,56,220,217
    230 .byte	102,15,56,220,225
    231 .byte	102,15,56,220,233
    232 .byte	102,15,56,220,241
    233 .byte	102,15,56,220,249
    234 .byte	102,15,56,221,208
    235 .byte	102,15,56,221,216
    236 .byte	102,15,56,221,224
    237 .byte	102,15,56,221,232
    238 .byte	102,15,56,221,240
    239 .byte	102,15,56,221,248
    240 	.byte	0xf3,0xc3
    241 .size	_aesni_encrypt6,.-_aesni_encrypt6
    242 .type	_aesni_decrypt6,@function
    243 .align	16
    244 _aesni_decrypt6:
    245 	movups	(%rcx),%xmm0
    246 	shrl	$1,%eax
    247 	movups	16(%rcx),%xmm1
    248 	leaq	32(%rcx),%rcx
    249 	xorps	%xmm0,%xmm2
    250 	pxor	%xmm0,%xmm3
    251 .byte	102,15,56,222,209
    252 	pxor	%xmm0,%xmm4
    253 .byte	102,15,56,222,217
    254 	pxor	%xmm0,%xmm5
    255 .byte	102,15,56,222,225
    256 	pxor	%xmm0,%xmm6
    257 .byte	102,15,56,222,233
    258 	pxor	%xmm0,%xmm7
    259 	decl	%eax
    260 .byte	102,15,56,222,241
    261 	movups	(%rcx),%xmm0
    262 .byte	102,15,56,222,249
    263 	jmp	.Ldec_loop6_enter
    264 .align	16
    265 .Ldec_loop6:
    266 .byte	102,15,56,222,209
    267 .byte	102,15,56,222,217
    268 	decl	%eax
    269 .byte	102,15,56,222,225
    270 .byte	102,15,56,222,233
    271 .byte	102,15,56,222,241
    272 .byte	102,15,56,222,249
    273 .Ldec_loop6_enter:
    274 	movups	16(%rcx),%xmm1
    275 .byte	102,15,56,222,208
    276 .byte	102,15,56,222,216
    277 	leaq	32(%rcx),%rcx
    278 .byte	102,15,56,222,224
    279 .byte	102,15,56,222,232
    280 .byte	102,15,56,222,240
    281 .byte	102,15,56,222,248
    282 	movups	(%rcx),%xmm0
    283 	jnz	.Ldec_loop6
    284 
    285 .byte	102,15,56,222,209
    286 .byte	102,15,56,222,217
    287 .byte	102,15,56,222,225
    288 .byte	102,15,56,222,233
    289 .byte	102,15,56,222,241
    290 .byte	102,15,56,222,249
    291 .byte	102,15,56,223,208
    292 .byte	102,15,56,223,216
    293 .byte	102,15,56,223,224
    294 .byte	102,15,56,223,232
    295 .byte	102,15,56,223,240
    296 .byte	102,15,56,223,248
    297 	.byte	0xf3,0xc3
    298 .size	_aesni_decrypt6,.-_aesni_decrypt6
    299 .type	_aesni_encrypt8,@function
    300 .align	16
    301 _aesni_encrypt8:
    302 	movups	(%rcx),%xmm0
    303 	shrl	$1,%eax
    304 	movups	16(%rcx),%xmm1
    305 	leaq	32(%rcx),%rcx
    306 	xorps	%xmm0,%xmm2
    307 	xorps	%xmm0,%xmm3
    308 .byte	102,15,56,220,209
    309 	pxor	%xmm0,%xmm4
    310 .byte	102,15,56,220,217
    311 	pxor	%xmm0,%xmm5
    312 .byte	102,15,56,220,225
    313 	pxor	%xmm0,%xmm6
    314 .byte	102,15,56,220,233
    315 	pxor	%xmm0,%xmm7
    316 	decl	%eax
    317 .byte	102,15,56,220,241
    318 	pxor	%xmm0,%xmm8
    319 .byte	102,15,56,220,249
    320 	pxor	%xmm0,%xmm9
    321 	movups	(%rcx),%xmm0
    322 .byte	102,68,15,56,220,193
    323 .byte	102,68,15,56,220,201
    324 	movups	16(%rcx),%xmm1
    325 	jmp	.Lenc_loop8_enter
    326 .align	16
    327 .Lenc_loop8:
    328 .byte	102,15,56,220,209
    329 .byte	102,15,56,220,217
    330 	decl	%eax
    331 .byte	102,15,56,220,225
    332 .byte	102,15,56,220,233
    333 .byte	102,15,56,220,241
    334 .byte	102,15,56,220,249
    335 .byte	102,68,15,56,220,193
    336 .byte	102,68,15,56,220,201
    337 	movups	16(%rcx),%xmm1
    338 .Lenc_loop8_enter:
    339 .byte	102,15,56,220,208
    340 .byte	102,15,56,220,216
    341 	leaq	32(%rcx),%rcx
    342 .byte	102,15,56,220,224
    343 .byte	102,15,56,220,232
    344 .byte	102,15,56,220,240
    345 .byte	102,15,56,220,248
    346 .byte	102,68,15,56,220,192
    347 .byte	102,68,15,56,220,200
    348 	movups	(%rcx),%xmm0
    349 	jnz	.Lenc_loop8
    350 
    351 .byte	102,15,56,220,209
    352 .byte	102,15,56,220,217
    353 .byte	102,15,56,220,225
    354 .byte	102,15,56,220,233
    355 .byte	102,15,56,220,241
    356 .byte	102,15,56,220,249
    357 .byte	102,68,15,56,220,193
    358 .byte	102,68,15,56,220,201
    359 .byte	102,15,56,221,208
    360 .byte	102,15,56,221,216
    361 .byte	102,15,56,221,224
    362 .byte	102,15,56,221,232
    363 .byte	102,15,56,221,240
    364 .byte	102,15,56,221,248
    365 .byte	102,68,15,56,221,192
    366 .byte	102,68,15,56,221,200
    367 	.byte	0xf3,0xc3
    368 .size	_aesni_encrypt8,.-_aesni_encrypt8
    369 .type	_aesni_decrypt8,@function
    370 .align	16
    371 _aesni_decrypt8:
    372 	movups	(%rcx),%xmm0
    373 	shrl	$1,%eax
    374 	movups	16(%rcx),%xmm1
    375 	leaq	32(%rcx),%rcx
    376 	xorps	%xmm0,%xmm2
    377 	xorps	%xmm0,%xmm3
    378 .byte	102,15,56,222,209
    379 	pxor	%xmm0,%xmm4
    380 .byte	102,15,56,222,217
    381 	pxor	%xmm0,%xmm5
    382 .byte	102,15,56,222,225
    383 	pxor	%xmm0,%xmm6
    384 .byte	102,15,56,222,233
    385 	pxor	%xmm0,%xmm7
    386 	decl	%eax
    387 .byte	102,15,56,222,241
    388 	pxor	%xmm0,%xmm8
    389 .byte	102,15,56,222,249
    390 	pxor	%xmm0,%xmm9
    391 	movups	(%rcx),%xmm0
    392 .byte	102,68,15,56,222,193
    393 .byte	102,68,15,56,222,201
    394 	movups	16(%rcx),%xmm1
    395 	jmp	.Ldec_loop8_enter
    396 .align	16
    397 .Ldec_loop8:
    398 .byte	102,15,56,222,209
    399 .byte	102,15,56,222,217
    400 	decl	%eax
    401 .byte	102,15,56,222,225
    402 .byte	102,15,56,222,233
    403 .byte	102,15,56,222,241
    404 .byte	102,15,56,222,249
    405 .byte	102,68,15,56,222,193
    406 .byte	102,68,15,56,222,201
    407 	movups	16(%rcx),%xmm1
    408 .Ldec_loop8_enter:
    409 .byte	102,15,56,222,208
    410 .byte	102,15,56,222,216
    411 	leaq	32(%rcx),%rcx
    412 .byte	102,15,56,222,224
    413 .byte	102,15,56,222,232
    414 .byte	102,15,56,222,240
    415 .byte	102,15,56,222,248
    416 .byte	102,68,15,56,222,192
    417 .byte	102,68,15,56,222,200
    418 	movups	(%rcx),%xmm0
    419 	jnz	.Ldec_loop8
    420 
    421 .byte	102,15,56,222,209
    422 .byte	102,15,56,222,217
    423 .byte	102,15,56,222,225
    424 .byte	102,15,56,222,233
    425 .byte	102,15,56,222,241
    426 .byte	102,15,56,222,249
    427 .byte	102,68,15,56,222,193
    428 .byte	102,68,15,56,222,201
    429 .byte	102,15,56,223,208
    430 .byte	102,15,56,223,216
    431 .byte	102,15,56,223,224
    432 .byte	102,15,56,223,232
    433 .byte	102,15,56,223,240
    434 .byte	102,15,56,223,248
    435 .byte	102,68,15,56,223,192
    436 .byte	102,68,15,56,223,200
    437 	.byte	0xf3,0xc3
    438 .size	_aesni_decrypt8,.-_aesni_decrypt8
    439 .globl	aesni_ecb_encrypt
    440 .type	aesni_ecb_encrypt,@function
    441 .align	16
    442 aesni_ecb_encrypt:
    443 	andq	$-16,%rdx
    444 	jz	.Lecb_ret
    445 
    446 	movl	240(%rcx),%eax
    447 	movups	(%rcx),%xmm0
    448 	movq	%rcx,%r11
    449 	movl	%eax,%r10d
    450 	testl	%r8d,%r8d
    451 	jz	.Lecb_decrypt
    452 
    453 	cmpq	$128,%rdx
    454 	jb	.Lecb_enc_tail
    455 
    456 	movdqu	(%rdi),%xmm2
    457 	movdqu	16(%rdi),%xmm3
    458 	movdqu	32(%rdi),%xmm4
    459 	movdqu	48(%rdi),%xmm5
    460 	movdqu	64(%rdi),%xmm6
    461 	movdqu	80(%rdi),%xmm7
    462 	movdqu	96(%rdi),%xmm8
    463 	movdqu	112(%rdi),%xmm9
    464 	leaq	128(%rdi),%rdi
    465 	subq	$128,%rdx
    466 	jmp	.Lecb_enc_loop8_enter
    467 .align	16
    468 .Lecb_enc_loop8:
    469 	movups	%xmm2,(%rsi)
    470 	movq	%r11,%rcx
    471 	movdqu	(%rdi),%xmm2
    472 	movl	%r10d,%eax
    473 	movups	%xmm3,16(%rsi)
    474 	movdqu	16(%rdi),%xmm3
    475 	movups	%xmm4,32(%rsi)
    476 	movdqu	32(%rdi),%xmm4
    477 	movups	%xmm5,48(%rsi)
    478 	movdqu	48(%rdi),%xmm5
    479 	movups	%xmm6,64(%rsi)
    480 	movdqu	64(%rdi),%xmm6
    481 	movups	%xmm7,80(%rsi)
    482 	movdqu	80(%rdi),%xmm7
    483 	movups	%xmm8,96(%rsi)
    484 	movdqu	96(%rdi),%xmm8
    485 	movups	%xmm9,112(%rsi)
    486 	leaq	128(%rsi),%rsi
    487 	movdqu	112(%rdi),%xmm9
    488 	leaq	128(%rdi),%rdi
    489 .Lecb_enc_loop8_enter:
    490 
    491 	call	_aesni_encrypt8
    492 
    493 	subq	$128,%rdx
    494 	jnc	.Lecb_enc_loop8
    495 
    496 	movups	%xmm2,(%rsi)
    497 	movq	%r11,%rcx
    498 	movups	%xmm3,16(%rsi)
    499 	movl	%r10d,%eax
    500 	movups	%xmm4,32(%rsi)
    501 	movups	%xmm5,48(%rsi)
    502 	movups	%xmm6,64(%rsi)
    503 	movups	%xmm7,80(%rsi)
    504 	movups	%xmm8,96(%rsi)
    505 	movups	%xmm9,112(%rsi)
    506 	leaq	128(%rsi),%rsi
    507 	addq	$128,%rdx
    508 	jz	.Lecb_ret
    509 
    510 .Lecb_enc_tail:
    511 	movups	(%rdi),%xmm2
    512 	cmpq	$32,%rdx
    513 	jb	.Lecb_enc_one
    514 	movups	16(%rdi),%xmm3
    515 	je	.Lecb_enc_two
    516 	movups	32(%rdi),%xmm4
    517 	cmpq	$64,%rdx
    518 	jb	.Lecb_enc_three
    519 	movups	48(%rdi),%xmm5
    520 	je	.Lecb_enc_four
    521 	movups	64(%rdi),%xmm6
    522 	cmpq	$96,%rdx
    523 	jb	.Lecb_enc_five
    524 	movups	80(%rdi),%xmm7
    525 	je	.Lecb_enc_six
    526 	movdqu	96(%rdi),%xmm8
    527 	call	_aesni_encrypt8
    528 	movups	%xmm2,(%rsi)
    529 	movups	%xmm3,16(%rsi)
    530 	movups	%xmm4,32(%rsi)
    531 	movups	%xmm5,48(%rsi)
    532 	movups	%xmm6,64(%rsi)
    533 	movups	%xmm7,80(%rsi)
    534 	movups	%xmm8,96(%rsi)
    535 	jmp	.Lecb_ret
    536 .align	16
    537 .Lecb_enc_one:
    538 	movups	(%rcx),%xmm0
    539 	movups	16(%rcx),%xmm1
    540 	leaq	32(%rcx),%rcx
    541 	xorps	%xmm0,%xmm2
    542 .Loop_enc1_3:
    543 .byte	102,15,56,220,209
    544 	decl	%eax
    545 	movups	(%rcx),%xmm1
    546 	leaq	16(%rcx),%rcx
    547 	jnz	.Loop_enc1_3
    548 .byte	102,15,56,221,209
    549 	movups	%xmm2,(%rsi)
    550 	jmp	.Lecb_ret
    551 .align	16
    552 .Lecb_enc_two:
    553 	xorps	%xmm4,%xmm4
    554 	call	_aesni_encrypt3
    555 	movups	%xmm2,(%rsi)
    556 	movups	%xmm3,16(%rsi)
    557 	jmp	.Lecb_ret
    558 .align	16
    559 .Lecb_enc_three:
    560 	call	_aesni_encrypt3
    561 	movups	%xmm2,(%rsi)
    562 	movups	%xmm3,16(%rsi)
    563 	movups	%xmm4,32(%rsi)
    564 	jmp	.Lecb_ret
    565 .align	16
    566 .Lecb_enc_four:
    567 	call	_aesni_encrypt4
    568 	movups	%xmm2,(%rsi)
    569 	movups	%xmm3,16(%rsi)
    570 	movups	%xmm4,32(%rsi)
    571 	movups	%xmm5,48(%rsi)
    572 	jmp	.Lecb_ret
    573 .align	16
    574 .Lecb_enc_five:
    575 	xorps	%xmm7,%xmm7
    576 	call	_aesni_encrypt6
    577 	movups	%xmm2,(%rsi)
    578 	movups	%xmm3,16(%rsi)
    579 	movups	%xmm4,32(%rsi)
    580 	movups	%xmm5,48(%rsi)
    581 	movups	%xmm6,64(%rsi)
    582 	jmp	.Lecb_ret
    583 .align	16
    584 .Lecb_enc_six:
    585 	call	_aesni_encrypt6
    586 	movups	%xmm2,(%rsi)
    587 	movups	%xmm3,16(%rsi)
    588 	movups	%xmm4,32(%rsi)
    589 	movups	%xmm5,48(%rsi)
    590 	movups	%xmm6,64(%rsi)
    591 	movups	%xmm7,80(%rsi)
    592 	jmp	.Lecb_ret
    593 
    594 .align	16
    595 .Lecb_decrypt:
    596 	cmpq	$128,%rdx
    597 	jb	.Lecb_dec_tail
    598 
    599 	movdqu	(%rdi),%xmm2
    600 	movdqu	16(%rdi),%xmm3
    601 	movdqu	32(%rdi),%xmm4
    602 	movdqu	48(%rdi),%xmm5
    603 	movdqu	64(%rdi),%xmm6
    604 	movdqu	80(%rdi),%xmm7
    605 	movdqu	96(%rdi),%xmm8
    606 	movdqu	112(%rdi),%xmm9
    607 	leaq	128(%rdi),%rdi
    608 	subq	$128,%rdx
    609 	jmp	.Lecb_dec_loop8_enter
    610 .align	16
    611 .Lecb_dec_loop8:
    612 	movups	%xmm2,(%rsi)
    613 	movq	%r11,%rcx
    614 	movdqu	(%rdi),%xmm2
    615 	movl	%r10d,%eax
    616 	movups	%xmm3,16(%rsi)
    617 	movdqu	16(%rdi),%xmm3
    618 	movups	%xmm4,32(%rsi)
    619 	movdqu	32(%rdi),%xmm4
    620 	movups	%xmm5,48(%rsi)
    621 	movdqu	48(%rdi),%xmm5
    622 	movups	%xmm6,64(%rsi)
    623 	movdqu	64(%rdi),%xmm6
    624 	movups	%xmm7,80(%rsi)
    625 	movdqu	80(%rdi),%xmm7
    626 	movups	%xmm8,96(%rsi)
    627 	movdqu	96(%rdi),%xmm8
    628 	movups	%xmm9,112(%rsi)
    629 	leaq	128(%rsi),%rsi
    630 	movdqu	112(%rdi),%xmm9
    631 	leaq	128(%rdi),%rdi
    632 .Lecb_dec_loop8_enter:
    633 
    634 	call	_aesni_decrypt8
    635 
    636 	movups	(%r11),%xmm0
    637 	subq	$128,%rdx
    638 	jnc	.Lecb_dec_loop8
    639 
    640 	movups	%xmm2,(%rsi)
    641 	movq	%r11,%rcx
    642 	movups	%xmm3,16(%rsi)
    643 	movl	%r10d,%eax
    644 	movups	%xmm4,32(%rsi)
    645 	movups	%xmm5,48(%rsi)
    646 	movups	%xmm6,64(%rsi)
    647 	movups	%xmm7,80(%rsi)
    648 	movups	%xmm8,96(%rsi)
    649 	movups	%xmm9,112(%rsi)
    650 	leaq	128(%rsi),%rsi
    651 	addq	$128,%rdx
    652 	jz	.Lecb_ret
    653 
    654 .Lecb_dec_tail:
    655 	movups	(%rdi),%xmm2
    656 	cmpq	$32,%rdx
    657 	jb	.Lecb_dec_one
    658 	movups	16(%rdi),%xmm3
    659 	je	.Lecb_dec_two
    660 	movups	32(%rdi),%xmm4
    661 	cmpq	$64,%rdx
    662 	jb	.Lecb_dec_three
    663 	movups	48(%rdi),%xmm5
    664 	je	.Lecb_dec_four
    665 	movups	64(%rdi),%xmm6
    666 	cmpq	$96,%rdx
    667 	jb	.Lecb_dec_five
    668 	movups	80(%rdi),%xmm7
    669 	je	.Lecb_dec_six
    670 	movups	96(%rdi),%xmm8
    671 	movups	(%rcx),%xmm0
    672 	call	_aesni_decrypt8
    673 	movups	%xmm2,(%rsi)
    674 	movups	%xmm3,16(%rsi)
    675 	movups	%xmm4,32(%rsi)
    676 	movups	%xmm5,48(%rsi)
    677 	movups	%xmm6,64(%rsi)
    678 	movups	%xmm7,80(%rsi)
    679 	movups	%xmm8,96(%rsi)
    680 	jmp	.Lecb_ret
    681 .align	16
    682 .Lecb_dec_one:
    683 	movups	(%rcx),%xmm0
    684 	movups	16(%rcx),%xmm1
    685 	leaq	32(%rcx),%rcx
    686 	xorps	%xmm0,%xmm2
    687 .Loop_dec1_4:
    688 .byte	102,15,56,222,209
    689 	decl	%eax
    690 	movups	(%rcx),%xmm1
    691 	leaq	16(%rcx),%rcx
    692 	jnz	.Loop_dec1_4
    693 .byte	102,15,56,223,209
    694 	movups	%xmm2,(%rsi)
    695 	jmp	.Lecb_ret
    696 .align	16
    697 .Lecb_dec_two:
    698 	xorps	%xmm4,%xmm4
    699 	call	_aesni_decrypt3
    700 	movups	%xmm2,(%rsi)
    701 	movups	%xmm3,16(%rsi)
    702 	jmp	.Lecb_ret
    703 .align	16
    704 .Lecb_dec_three:
    705 	call	_aesni_decrypt3
    706 	movups	%xmm2,(%rsi)
    707 	movups	%xmm3,16(%rsi)
    708 	movups	%xmm4,32(%rsi)
    709 	jmp	.Lecb_ret
    710 .align	16
    711 .Lecb_dec_four:
    712 	call	_aesni_decrypt4
    713 	movups	%xmm2,(%rsi)
    714 	movups	%xmm3,16(%rsi)
    715 	movups	%xmm4,32(%rsi)
    716 	movups	%xmm5,48(%rsi)
    717 	jmp	.Lecb_ret
    718 .align	16
    719 .Lecb_dec_five:
    720 	xorps	%xmm7,%xmm7
    721 	call	_aesni_decrypt6
    722 	movups	%xmm2,(%rsi)
    723 	movups	%xmm3,16(%rsi)
    724 	movups	%xmm4,32(%rsi)
    725 	movups	%xmm5,48(%rsi)
    726 	movups	%xmm6,64(%rsi)
    727 	jmp	.Lecb_ret
    728 .align	16
    729 .Lecb_dec_six:
    730 	call	_aesni_decrypt6
    731 	movups	%xmm2,(%rsi)
    732 	movups	%xmm3,16(%rsi)
    733 	movups	%xmm4,32(%rsi)
    734 	movups	%xmm5,48(%rsi)
    735 	movups	%xmm6,64(%rsi)
    736 	movups	%xmm7,80(%rsi)
    737 
    738 .Lecb_ret:
    739 	.byte	0xf3,0xc3
    740 .size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
    741 .globl	aesni_ccm64_encrypt_blocks
    742 .type	aesni_ccm64_encrypt_blocks,@function
    743 .align	16
    744 aesni_ccm64_encrypt_blocks:
    745 	movl	240(%rcx),%eax
    746 	movdqu	(%r8),%xmm9
    747 	movdqa	.Lincrement64(%rip),%xmm6
    748 	movdqa	.Lbswap_mask(%rip),%xmm7
    749 
    750 	shrl	$1,%eax
    751 	leaq	0(%rcx),%r11
    752 	movdqu	(%r9),%xmm3
    753 	movdqa	%xmm9,%xmm2
    754 	movl	%eax,%r10d
    755 .byte	102,68,15,56,0,207
    756 	jmp	.Lccm64_enc_outer
    757 .align	16
    758 .Lccm64_enc_outer:
    759 	movups	(%r11),%xmm0
    760 	movl	%r10d,%eax
    761 	movups	(%rdi),%xmm8
    762 
    763 	xorps	%xmm0,%xmm2
    764 	movups	16(%r11),%xmm1
    765 	xorps	%xmm8,%xmm0
    766 	leaq	32(%r11),%rcx
    767 	xorps	%xmm0,%xmm3
    768 	movups	(%rcx),%xmm0
    769 
    770 .Lccm64_enc2_loop:
    771 .byte	102,15,56,220,209
    772 	decl	%eax
    773 .byte	102,15,56,220,217
    774 	movups	16(%rcx),%xmm1
    775 .byte	102,15,56,220,208
    776 	leaq	32(%rcx),%rcx
    777 .byte	102,15,56,220,216
    778 	movups	0(%rcx),%xmm0
    779 	jnz	.Lccm64_enc2_loop
    780 .byte	102,15,56,220,209
    781 .byte	102,15,56,220,217
    782 	paddq	%xmm6,%xmm9
    783 .byte	102,15,56,221,208
    784 .byte	102,15,56,221,216
    785 
    786 	decq	%rdx
    787 	leaq	16(%rdi),%rdi
    788 	xorps	%xmm2,%xmm8
    789 	movdqa	%xmm9,%xmm2
    790 	movups	%xmm8,(%rsi)
    791 	leaq	16(%rsi),%rsi
    792 .byte	102,15,56,0,215
    793 	jnz	.Lccm64_enc_outer
    794 
    795 	movups	%xmm3,(%r9)
    796 	.byte	0xf3,0xc3
    797 .size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
    798 .globl	aesni_ccm64_decrypt_blocks
    799 .type	aesni_ccm64_decrypt_blocks,@function
    800 .align	16
    801 aesni_ccm64_decrypt_blocks:
    802 	movl	240(%rcx),%eax
    803 	movups	(%r8),%xmm9
    804 	movdqu	(%r9),%xmm3
    805 	movdqa	.Lincrement64(%rip),%xmm6
    806 	movdqa	.Lbswap_mask(%rip),%xmm7
    807 
    808 	movaps	%xmm9,%xmm2
    809 	movl	%eax,%r10d
    810 	movq	%rcx,%r11
    811 .byte	102,68,15,56,0,207
    812 	movups	(%rcx),%xmm0
    813 	movups	16(%rcx),%xmm1
    814 	leaq	32(%rcx),%rcx
    815 	xorps	%xmm0,%xmm2
    816 .Loop_enc1_5:
    817 .byte	102,15,56,220,209
    818 	decl	%eax
    819 	movups	(%rcx),%xmm1
    820 	leaq	16(%rcx),%rcx
    821 	jnz	.Loop_enc1_5
    822 .byte	102,15,56,221,209
    823 	movups	(%rdi),%xmm8
    824 	paddq	%xmm6,%xmm9
    825 	leaq	16(%rdi),%rdi
    826 	jmp	.Lccm64_dec_outer
    827 .align	16
    828 .Lccm64_dec_outer:
    829 	xorps	%xmm2,%xmm8
    830 	movdqa	%xmm9,%xmm2
    831 	movl	%r10d,%eax
    832 	movups	%xmm8,(%rsi)
    833 	leaq	16(%rsi),%rsi
    834 .byte	102,15,56,0,215
    835 
    836 	subq	$1,%rdx
    837 	jz	.Lccm64_dec_break
    838 
    839 	movups	(%r11),%xmm0
    840 	shrl	$1,%eax
    841 	movups	16(%r11),%xmm1
    842 	xorps	%xmm0,%xmm8
    843 	leaq	32(%r11),%rcx
    844 	xorps	%xmm0,%xmm2
    845 	xorps	%xmm8,%xmm3
    846 	movups	(%rcx),%xmm0
    847 
    848 .Lccm64_dec2_loop:
    849 .byte	102,15,56,220,209
    850 	decl	%eax
    851 .byte	102,15,56,220,217
    852 	movups	16(%rcx),%xmm1
    853 .byte	102,15,56,220,208
    854 	leaq	32(%rcx),%rcx
    855 .byte	102,15,56,220,216
    856 	movups	0(%rcx),%xmm0
    857 	jnz	.Lccm64_dec2_loop
    858 	movups	(%rdi),%xmm8
    859 	paddq	%xmm6,%xmm9
    860 .byte	102,15,56,220,209
    861 .byte	102,15,56,220,217
    862 	leaq	16(%rdi),%rdi
    863 .byte	102,15,56,221,208
    864 .byte	102,15,56,221,216
    865 	jmp	.Lccm64_dec_outer
    866 
    867 .align	16
    868 .Lccm64_dec_break:
    869 
    870 	movups	(%r11),%xmm0
    871 	movups	16(%r11),%xmm1
    872 	xorps	%xmm0,%xmm8
    873 	leaq	32(%r11),%r11
    874 	xorps	%xmm8,%xmm3
    875 .Loop_enc1_6:
    876 .byte	102,15,56,220,217
    877 	decl	%eax
    878 	movups	(%r11),%xmm1
    879 	leaq	16(%r11),%r11
    880 	jnz	.Loop_enc1_6
    881 .byte	102,15,56,221,217
    882 	movups	%xmm3,(%r9)
    883 	.byte	0xf3,0xc3
    884 .size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
    885 .globl	aesni_ctr32_encrypt_blocks
    886 .type	aesni_ctr32_encrypt_blocks,@function
    887 .align	16
    888 aesni_ctr32_encrypt_blocks:
    889 	cmpq	$1,%rdx
    890 	je	.Lctr32_one_shortcut
    891 
    892 	movdqu	(%r8),%xmm14
    893 	movdqa	.Lbswap_mask(%rip),%xmm15
    894 	xorl	%eax,%eax
    895 .byte	102,69,15,58,22,242,3
    896 .byte	102,68,15,58,34,240,3
    897 
    898 	movl	240(%rcx),%eax
    899 	bswapl	%r10d
    900 	pxor	%xmm12,%xmm12
    901 	pxor	%xmm13,%xmm13
    902 .byte	102,69,15,58,34,226,0
    903 	leaq	3(%r10),%r11
    904 .byte	102,69,15,58,34,235,0
    905 	incl	%r10d
    906 .byte	102,69,15,58,34,226,1
    907 	incq	%r11
    908 .byte	102,69,15,58,34,235,1
    909 	incl	%r10d
    910 .byte	102,69,15,58,34,226,2
    911 	incq	%r11
    912 .byte	102,69,15,58,34,235,2
    913 	movdqa	%xmm12,-40(%rsp)
    914 .byte	102,69,15,56,0,231
    915 	movdqa	%xmm13,-24(%rsp)
    916 .byte	102,69,15,56,0,239
    917 
    918 	pshufd	$192,%xmm12,%xmm2
    919 	pshufd	$128,%xmm12,%xmm3
    920 	pshufd	$64,%xmm12,%xmm4
    921 	cmpq	$6,%rdx
    922 	jb	.Lctr32_tail
    923 	shrl	$1,%eax
    924 	movq	%rcx,%r11
    925 	movl	%eax,%r10d
    926 	subq	$6,%rdx
    927 	jmp	.Lctr32_loop6
    928 
    929 .align	16
    930 .Lctr32_loop6:
    931 	pshufd	$192,%xmm13,%xmm5
    932 	por	%xmm14,%xmm2
    933 	movups	(%r11),%xmm0
    934 	pshufd	$128,%xmm13,%xmm6
    935 	por	%xmm14,%xmm3
    936 	movups	16(%r11),%xmm1
    937 	pshufd	$64,%xmm13,%xmm7
    938 	por	%xmm14,%xmm4
    939 	por	%xmm14,%xmm5
    940 	xorps	%xmm0,%xmm2
    941 	por	%xmm14,%xmm6
    942 	por	%xmm14,%xmm7
    943 
    944 
    945 
    946 
    947 	pxor	%xmm0,%xmm3
    948 .byte	102,15,56,220,209
    949 	leaq	32(%r11),%rcx
    950 	pxor	%xmm0,%xmm4
    951 .byte	102,15,56,220,217
    952 	movdqa	.Lincrement32(%rip),%xmm13
    953 	pxor	%xmm0,%xmm5
    954 .byte	102,15,56,220,225
    955 	movdqa	-40(%rsp),%xmm12
    956 	pxor	%xmm0,%xmm6
    957 .byte	102,15,56,220,233
    958 	pxor	%xmm0,%xmm7
    959 	movups	(%rcx),%xmm0
    960 	decl	%eax
    961 .byte	102,15,56,220,241
    962 .byte	102,15,56,220,249
    963 	jmp	.Lctr32_enc_loop6_enter
    964 .align	16
    965 .Lctr32_enc_loop6:
    966 .byte	102,15,56,220,209
    967 .byte	102,15,56,220,217
    968 	decl	%eax
    969 .byte	102,15,56,220,225
    970 .byte	102,15,56,220,233
    971 .byte	102,15,56,220,241
    972 .byte	102,15,56,220,249
    973 .Lctr32_enc_loop6_enter:
    974 	movups	16(%rcx),%xmm1
    975 .byte	102,15,56,220,208
    976 .byte	102,15,56,220,216
    977 	leaq	32(%rcx),%rcx
    978 .byte	102,15,56,220,224
    979 .byte	102,15,56,220,232
    980 .byte	102,15,56,220,240
    981 .byte	102,15,56,220,248
    982 	movups	(%rcx),%xmm0
    983 	jnz	.Lctr32_enc_loop6
    984 
    985 .byte	102,15,56,220,209
    986 	paddd	%xmm13,%xmm12
    987 .byte	102,15,56,220,217
    988 	paddd	-24(%rsp),%xmm13
    989 .byte	102,15,56,220,225
    990 	movdqa	%xmm12,-40(%rsp)
    991 .byte	102,15,56,220,233
    992 	movdqa	%xmm13,-24(%rsp)
    993 .byte	102,15,56,220,241
    994 .byte	102,69,15,56,0,231
    995 .byte	102,15,56,220,249
    996 .byte	102,69,15,56,0,239
    997 
    998 .byte	102,15,56,221,208
    999 	movups	(%rdi),%xmm8
   1000 .byte	102,15,56,221,216
   1001 	movups	16(%rdi),%xmm9
   1002 .byte	102,15,56,221,224
   1003 	movups	32(%rdi),%xmm10
   1004 .byte	102,15,56,221,232
   1005 	movups	48(%rdi),%xmm11
   1006 .byte	102,15,56,221,240
   1007 	movups	64(%rdi),%xmm1
   1008 .byte	102,15,56,221,248
   1009 	movups	80(%rdi),%xmm0
   1010 	leaq	96(%rdi),%rdi
   1011 
   1012 	xorps	%xmm2,%xmm8
   1013 	pshufd	$192,%xmm12,%xmm2
   1014 	xorps	%xmm3,%xmm9
   1015 	pshufd	$128,%xmm12,%xmm3
   1016 	movups	%xmm8,(%rsi)
   1017 	xorps	%xmm4,%xmm10
   1018 	pshufd	$64,%xmm12,%xmm4
   1019 	movups	%xmm9,16(%rsi)
   1020 	xorps	%xmm5,%xmm11
   1021 	movups	%xmm10,32(%rsi)
   1022 	xorps	%xmm6,%xmm1
   1023 	movups	%xmm11,48(%rsi)
   1024 	xorps	%xmm7,%xmm0
   1025 	movups	%xmm1,64(%rsi)
   1026 	movups	%xmm0,80(%rsi)
   1027 	leaq	96(%rsi),%rsi
   1028 	movl	%r10d,%eax
   1029 	subq	$6,%rdx
   1030 	jnc	.Lctr32_loop6
   1031 
   1032 	addq	$6,%rdx
   1033 	jz	.Lctr32_done
   1034 	movq	%r11,%rcx
   1035 	leal	1(%rax,%rax,1),%eax
   1036 
   1037 .Lctr32_tail:
   1038 	por	%xmm14,%xmm2
   1039 	movups	(%rdi),%xmm8
   1040 	cmpq	$2,%rdx
   1041 	jb	.Lctr32_one
   1042 
   1043 	por	%xmm14,%xmm3
   1044 	movups	16(%rdi),%xmm9
   1045 	je	.Lctr32_two
   1046 
   1047 	pshufd	$192,%xmm13,%xmm5
   1048 	por	%xmm14,%xmm4
   1049 	movups	32(%rdi),%xmm10
   1050 	cmpq	$4,%rdx
   1051 	jb	.Lctr32_three
   1052 
   1053 	pshufd	$128,%xmm13,%xmm6
   1054 	por	%xmm14,%xmm5
   1055 	movups	48(%rdi),%xmm11
   1056 	je	.Lctr32_four
   1057 
   1058 	por	%xmm14,%xmm6
   1059 	xorps	%xmm7,%xmm7
   1060 
   1061 	call	_aesni_encrypt6
   1062 
   1063 	movups	64(%rdi),%xmm1
   1064 	xorps	%xmm2,%xmm8
   1065 	xorps	%xmm3,%xmm9
   1066 	movups	%xmm8,(%rsi)
   1067 	xorps	%xmm4,%xmm10
   1068 	movups	%xmm9,16(%rsi)
   1069 	xorps	%xmm5,%xmm11
   1070 	movups	%xmm10,32(%rsi)
   1071 	xorps	%xmm6,%xmm1
   1072 	movups	%xmm11,48(%rsi)
   1073 	movups	%xmm1,64(%rsi)
   1074 	jmp	.Lctr32_done
   1075 
   1076 .align	16
   1077 .Lctr32_one_shortcut:
   1078 	movups	(%r8),%xmm2
   1079 	movups	(%rdi),%xmm8
   1080 	movl	240(%rcx),%eax
   1081 .Lctr32_one:
   1082 	movups	(%rcx),%xmm0
   1083 	movups	16(%rcx),%xmm1
   1084 	leaq	32(%rcx),%rcx
   1085 	xorps	%xmm0,%xmm2
   1086 .Loop_enc1_7:
   1087 .byte	102,15,56,220,209
   1088 	decl	%eax
   1089 	movups	(%rcx),%xmm1
   1090 	leaq	16(%rcx),%rcx
   1091 	jnz	.Loop_enc1_7
   1092 .byte	102,15,56,221,209
   1093 	xorps	%xmm2,%xmm8
   1094 	movups	%xmm8,(%rsi)
   1095 	jmp	.Lctr32_done
   1096 
   1097 .align	16
   1098 .Lctr32_two:
   1099 	xorps	%xmm4,%xmm4
   1100 	call	_aesni_encrypt3
   1101 	xorps	%xmm2,%xmm8
   1102 	xorps	%xmm3,%xmm9
   1103 	movups	%xmm8,(%rsi)
   1104 	movups	%xmm9,16(%rsi)
   1105 	jmp	.Lctr32_done
   1106 
   1107 .align	16
   1108 .Lctr32_three:
   1109 	call	_aesni_encrypt3
   1110 	xorps	%xmm2,%xmm8
   1111 	xorps	%xmm3,%xmm9
   1112 	movups	%xmm8,(%rsi)
   1113 	xorps	%xmm4,%xmm10
   1114 	movups	%xmm9,16(%rsi)
   1115 	movups	%xmm10,32(%rsi)
   1116 	jmp	.Lctr32_done
   1117 
   1118 .align	16
   1119 .Lctr32_four:
   1120 	call	_aesni_encrypt4
   1121 	xorps	%xmm2,%xmm8
   1122 	xorps	%xmm3,%xmm9
   1123 	movups	%xmm8,(%rsi)
   1124 	xorps	%xmm4,%xmm10
   1125 	movups	%xmm9,16(%rsi)
   1126 	xorps	%xmm5,%xmm11
   1127 	movups	%xmm10,32(%rsi)
   1128 	movups	%xmm11,48(%rsi)
   1129 
   1130 .Lctr32_done:
   1131 	.byte	0xf3,0xc3
   1132 .size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
   1133 .globl	aesni_xts_encrypt
   1134 .type	aesni_xts_encrypt,@function
   1135 .align	16
   1136 aesni_xts_encrypt:
   1137 	leaq	-104(%rsp),%rsp
   1138 	movups	(%r9),%xmm15
   1139 	movl	240(%r8),%eax
   1140 	movl	240(%rcx),%r10d
   1141 	movups	(%r8),%xmm0
   1142 	movups	16(%r8),%xmm1
   1143 	leaq	32(%r8),%r8
   1144 	xorps	%xmm0,%xmm15
   1145 .Loop_enc1_8:
   1146 .byte	102,68,15,56,220,249
   1147 	decl	%eax
   1148 	movups	(%r8),%xmm1
   1149 	leaq	16(%r8),%r8
   1150 	jnz	.Loop_enc1_8
   1151 .byte	102,68,15,56,221,249
   1152 	movq	%rcx,%r11
   1153 	movl	%r10d,%eax
   1154 	movq	%rdx,%r9
   1155 	andq	$-16,%rdx
   1156 
   1157 	movdqa	.Lxts_magic(%rip),%xmm8
   1158 	pxor	%xmm14,%xmm14
   1159 	pcmpgtd	%xmm15,%xmm14
   1160 	pshufd	$19,%xmm14,%xmm9
   1161 	pxor	%xmm14,%xmm14
   1162 	movdqa	%xmm15,%xmm10
   1163 	paddq	%xmm15,%xmm15
   1164 	pand	%xmm8,%xmm9
   1165 	pcmpgtd	%xmm15,%xmm14
   1166 	pxor	%xmm9,%xmm15
   1167 	pshufd	$19,%xmm14,%xmm9
   1168 	pxor	%xmm14,%xmm14
   1169 	movdqa	%xmm15,%xmm11
   1170 	paddq	%xmm15,%xmm15
   1171 	pand	%xmm8,%xmm9
   1172 	pcmpgtd	%xmm15,%xmm14
   1173 	pxor	%xmm9,%xmm15
   1174 	pshufd	$19,%xmm14,%xmm9
   1175 	pxor	%xmm14,%xmm14
   1176 	movdqa	%xmm15,%xmm12
   1177 	paddq	%xmm15,%xmm15
   1178 	pand	%xmm8,%xmm9
   1179 	pcmpgtd	%xmm15,%xmm14
   1180 	pxor	%xmm9,%xmm15
   1181 	pshufd	$19,%xmm14,%xmm9
   1182 	pxor	%xmm14,%xmm14
   1183 	movdqa	%xmm15,%xmm13
   1184 	paddq	%xmm15,%xmm15
   1185 	pand	%xmm8,%xmm9
   1186 	pcmpgtd	%xmm15,%xmm14
   1187 	pxor	%xmm9,%xmm15
   1188 	subq	$96,%rdx
   1189 	jc	.Lxts_enc_short
   1190 
   1191 	shrl	$1,%eax
   1192 	subl	$1,%eax
   1193 	movl	%eax,%r10d
   1194 	jmp	.Lxts_enc_grandloop
   1195 
   1196 .align	16
   1197 .Lxts_enc_grandloop:
   1198 	pshufd	$19,%xmm14,%xmm9
   1199 	movdqa	%xmm15,%xmm14
   1200 	paddq	%xmm15,%xmm15
   1201 	movdqu	0(%rdi),%xmm2
   1202 	pand	%xmm8,%xmm9
   1203 	movdqu	16(%rdi),%xmm3
   1204 	pxor	%xmm9,%xmm15
   1205 
   1206 	movdqu	32(%rdi),%xmm4
   1207 	pxor	%xmm10,%xmm2
   1208 	movdqu	48(%rdi),%xmm5
   1209 	pxor	%xmm11,%xmm3
   1210 	movdqu	64(%rdi),%xmm6
   1211 	pxor	%xmm12,%xmm4
   1212 	movdqu	80(%rdi),%xmm7
   1213 	leaq	96(%rdi),%rdi
   1214 	pxor	%xmm13,%xmm5
   1215 	movups	(%r11),%xmm0
   1216 	pxor	%xmm14,%xmm6
   1217 	pxor	%xmm15,%xmm7
   1218 
   1219 
   1220 
   1221 	movups	16(%r11),%xmm1
   1222 	pxor	%xmm0,%xmm2
   1223 	pxor	%xmm0,%xmm3
   1224 	movdqa	%xmm10,0(%rsp)
   1225 .byte	102,15,56,220,209
   1226 	leaq	32(%r11),%rcx
   1227 	pxor	%xmm0,%xmm4
   1228 	movdqa	%xmm11,16(%rsp)
   1229 .byte	102,15,56,220,217
   1230 	pxor	%xmm0,%xmm5
   1231 	movdqa	%xmm12,32(%rsp)
   1232 .byte	102,15,56,220,225
   1233 	pxor	%xmm0,%xmm6
   1234 	movdqa	%xmm13,48(%rsp)
   1235 .byte	102,15,56,220,233
   1236 	pxor	%xmm0,%xmm7
   1237 	movups	(%rcx),%xmm0
   1238 	decl	%eax
   1239 	movdqa	%xmm14,64(%rsp)
   1240 .byte	102,15,56,220,241
   1241 	movdqa	%xmm15,80(%rsp)
   1242 .byte	102,15,56,220,249
   1243 	pxor	%xmm14,%xmm14
   1244 	pcmpgtd	%xmm15,%xmm14
   1245 	jmp	.Lxts_enc_loop6_enter
   1246 
   1247 .align	16
   1248 .Lxts_enc_loop6:
   1249 .byte	102,15,56,220,209
   1250 .byte	102,15,56,220,217
   1251 	decl	%eax
   1252 .byte	102,15,56,220,225
   1253 .byte	102,15,56,220,233
   1254 .byte	102,15,56,220,241
   1255 .byte	102,15,56,220,249
   1256 .Lxts_enc_loop6_enter:
   1257 	movups	16(%rcx),%xmm1
   1258 .byte	102,15,56,220,208
   1259 .byte	102,15,56,220,216
   1260 	leaq	32(%rcx),%rcx
   1261 .byte	102,15,56,220,224
   1262 .byte	102,15,56,220,232
   1263 .byte	102,15,56,220,240
   1264 .byte	102,15,56,220,248
   1265 	movups	(%rcx),%xmm0
   1266 	jnz	.Lxts_enc_loop6
   1267 
   1268 	pshufd	$19,%xmm14,%xmm9
   1269 	pxor	%xmm14,%xmm14
   1270 	paddq	%xmm15,%xmm15
   1271 .byte	102,15,56,220,209
   1272 	pand	%xmm8,%xmm9
   1273 .byte	102,15,56,220,217
   1274 	pcmpgtd	%xmm15,%xmm14
   1275 .byte	102,15,56,220,225
   1276 	pxor	%xmm9,%xmm15
   1277 .byte	102,15,56,220,233
   1278 .byte	102,15,56,220,241
   1279 .byte	102,15,56,220,249
   1280 	movups	16(%rcx),%xmm1
   1281 
   1282 	pshufd	$19,%xmm14,%xmm9
   1283 	pxor	%xmm14,%xmm14
   1284 	movdqa	%xmm15,%xmm10
   1285 	paddq	%xmm15,%xmm15
   1286 .byte	102,15,56,220,208
   1287 	pand	%xmm8,%xmm9
   1288 .byte	102,15,56,220,216
   1289 	pcmpgtd	%xmm15,%xmm14
   1290 .byte	102,15,56,220,224
   1291 	pxor	%xmm9,%xmm15
   1292 .byte	102,15,56,220,232
   1293 .byte	102,15,56,220,240
   1294 .byte	102,15,56,220,248
   1295 	movups	32(%rcx),%xmm0
   1296 
   1297 	pshufd	$19,%xmm14,%xmm9
   1298 	pxor	%xmm14,%xmm14
   1299 	movdqa	%xmm15,%xmm11
   1300 	paddq	%xmm15,%xmm15
   1301 .byte	102,15,56,220,209
   1302 	pand	%xmm8,%xmm9
   1303 .byte	102,15,56,220,217
   1304 	pcmpgtd	%xmm15,%xmm14
   1305 .byte	102,15,56,220,225
   1306 	pxor	%xmm9,%xmm15
   1307 .byte	102,15,56,220,233
   1308 .byte	102,15,56,220,241
   1309 .byte	102,15,56,220,249
   1310 
   1311 	pshufd	$19,%xmm14,%xmm9
   1312 	pxor	%xmm14,%xmm14
   1313 	movdqa	%xmm15,%xmm12
   1314 	paddq	%xmm15,%xmm15
   1315 .byte	102,15,56,221,208
   1316 	pand	%xmm8,%xmm9
   1317 .byte	102,15,56,221,216
   1318 	pcmpgtd	%xmm15,%xmm14
   1319 .byte	102,15,56,221,224
   1320 	pxor	%xmm9,%xmm15
   1321 .byte	102,15,56,221,232
   1322 .byte	102,15,56,221,240
   1323 .byte	102,15,56,221,248
   1324 
   1325 	pshufd	$19,%xmm14,%xmm9
   1326 	pxor	%xmm14,%xmm14
   1327 	movdqa	%xmm15,%xmm13
   1328 	paddq	%xmm15,%xmm15
   1329 	xorps	0(%rsp),%xmm2
   1330 	pand	%xmm8,%xmm9
   1331 	xorps	16(%rsp),%xmm3
   1332 	pcmpgtd	%xmm15,%xmm14
   1333 	pxor	%xmm9,%xmm15
   1334 
   1335 	xorps	32(%rsp),%xmm4
   1336 	movups	%xmm2,0(%rsi)
   1337 	xorps	48(%rsp),%xmm5
   1338 	movups	%xmm3,16(%rsi)
   1339 	xorps	64(%rsp),%xmm6
   1340 	movups	%xmm4,32(%rsi)
   1341 	xorps	80(%rsp),%xmm7
   1342 	movups	%xmm5,48(%rsi)
   1343 	movl	%r10d,%eax
   1344 	movups	%xmm6,64(%rsi)
   1345 	movups	%xmm7,80(%rsi)
   1346 	leaq	96(%rsi),%rsi
   1347 	subq	$96,%rdx
   1348 	jnc	.Lxts_enc_grandloop
   1349 
   1350 	leal	3(%rax,%rax,1),%eax
   1351 	movq	%r11,%rcx
   1352 	movl	%eax,%r10d
   1353 
   1354 .Lxts_enc_short:
   1355 	addq	$96,%rdx
   1356 	jz	.Lxts_enc_done
   1357 
   1358 	cmpq	$32,%rdx
   1359 	jb	.Lxts_enc_one
   1360 	je	.Lxts_enc_two
   1361 
   1362 	cmpq	$64,%rdx
   1363 	jb	.Lxts_enc_three
   1364 	je	.Lxts_enc_four
   1365 
   1366 	pshufd	$19,%xmm14,%xmm9
   1367 	movdqa	%xmm15,%xmm14
   1368 	paddq	%xmm15,%xmm15
   1369 	movdqu	(%rdi),%xmm2
   1370 	pand	%xmm8,%xmm9
   1371 	movdqu	16(%rdi),%xmm3
   1372 	pxor	%xmm9,%xmm15
   1373 
   1374 	movdqu	32(%rdi),%xmm4
   1375 	pxor	%xmm10,%xmm2
   1376 	movdqu	48(%rdi),%xmm5
   1377 	pxor	%xmm11,%xmm3
   1378 	movdqu	64(%rdi),%xmm6
   1379 	leaq	80(%rdi),%rdi
   1380 	pxor	%xmm12,%xmm4
   1381 	pxor	%xmm13,%xmm5
   1382 	pxor	%xmm14,%xmm6
   1383 
   1384 	call	_aesni_encrypt6
   1385 
   1386 	xorps	%xmm10,%xmm2
   1387 	movdqa	%xmm15,%xmm10
   1388 	xorps	%xmm11,%xmm3
   1389 	xorps	%xmm12,%xmm4
   1390 	movdqu	%xmm2,(%rsi)
   1391 	xorps	%xmm13,%xmm5
   1392 	movdqu	%xmm3,16(%rsi)
   1393 	xorps	%xmm14,%xmm6
   1394 	movdqu	%xmm4,32(%rsi)
   1395 	movdqu	%xmm5,48(%rsi)
   1396 	movdqu	%xmm6,64(%rsi)
   1397 	leaq	80(%rsi),%rsi
   1398 	jmp	.Lxts_enc_done
   1399 
   1400 .align	16
   1401 .Lxts_enc_one:
   1402 	movups	(%rdi),%xmm2
   1403 	leaq	16(%rdi),%rdi
   1404 	xorps	%xmm10,%xmm2
   1405 	movups	(%rcx),%xmm0
   1406 	movups	16(%rcx),%xmm1
   1407 	leaq	32(%rcx),%rcx
   1408 	xorps	%xmm0,%xmm2
   1409 .Loop_enc1_9:
   1410 .byte	102,15,56,220,209
   1411 	decl	%eax
   1412 	movups	(%rcx),%xmm1
   1413 	leaq	16(%rcx),%rcx
   1414 	jnz	.Loop_enc1_9
   1415 .byte	102,15,56,221,209
   1416 	xorps	%xmm10,%xmm2
   1417 	movdqa	%xmm11,%xmm10
   1418 	movups	%xmm2,(%rsi)
   1419 	leaq	16(%rsi),%rsi
   1420 	jmp	.Lxts_enc_done
   1421 
   1422 .align	16
   1423 .Lxts_enc_two:
   1424 	movups	(%rdi),%xmm2
   1425 	movups	16(%rdi),%xmm3
   1426 	leaq	32(%rdi),%rdi
   1427 	xorps	%xmm10,%xmm2
   1428 	xorps	%xmm11,%xmm3
   1429 
   1430 	call	_aesni_encrypt3
   1431 
   1432 	xorps	%xmm10,%xmm2
   1433 	movdqa	%xmm12,%xmm10
   1434 	xorps	%xmm11,%xmm3
   1435 	movups	%xmm2,(%rsi)
   1436 	movups	%xmm3,16(%rsi)
   1437 	leaq	32(%rsi),%rsi
   1438 	jmp	.Lxts_enc_done
   1439 
   1440 .align	16
   1441 .Lxts_enc_three:
   1442 	movups	(%rdi),%xmm2
   1443 	movups	16(%rdi),%xmm3
   1444 	movups	32(%rdi),%xmm4
   1445 	leaq	48(%rdi),%rdi
   1446 	xorps	%xmm10,%xmm2
   1447 	xorps	%xmm11,%xmm3
   1448 	xorps	%xmm12,%xmm4
   1449 
   1450 	call	_aesni_encrypt3
   1451 
   1452 	xorps	%xmm10,%xmm2
   1453 	movdqa	%xmm13,%xmm10
   1454 	xorps	%xmm11,%xmm3
   1455 	xorps	%xmm12,%xmm4
   1456 	movups	%xmm2,(%rsi)
   1457 	movups	%xmm3,16(%rsi)
   1458 	movups	%xmm4,32(%rsi)
   1459 	leaq	48(%rsi),%rsi
   1460 	jmp	.Lxts_enc_done
   1461 
   1462 .align	16
   1463 .Lxts_enc_four:
   1464 	movups	(%rdi),%xmm2
   1465 	movups	16(%rdi),%xmm3
   1466 	movups	32(%rdi),%xmm4
   1467 	xorps	%xmm10,%xmm2
   1468 	movups	48(%rdi),%xmm5
   1469 	leaq	64(%rdi),%rdi
   1470 	xorps	%xmm11,%xmm3
   1471 	xorps	%xmm12,%xmm4
   1472 	xorps	%xmm13,%xmm5
   1473 
   1474 	call	_aesni_encrypt4
   1475 
   1476 	xorps	%xmm10,%xmm2
   1477 	movdqa	%xmm15,%xmm10
   1478 	xorps	%xmm11,%xmm3
   1479 	xorps	%xmm12,%xmm4
   1480 	movups	%xmm2,(%rsi)
   1481 	xorps	%xmm13,%xmm5
   1482 	movups	%xmm3,16(%rsi)
   1483 	movups	%xmm4,32(%rsi)
   1484 	movups	%xmm5,48(%rsi)
   1485 	leaq	64(%rsi),%rsi
   1486 	jmp	.Lxts_enc_done
   1487 
   1488 .align	16
   1489 .Lxts_enc_done:
   1490 	andq	$15,%r9
   1491 	jz	.Lxts_enc_ret
   1492 	movq	%r9,%rdx
   1493 
   1494 .Lxts_enc_steal:
   1495 	movzbl	(%rdi),%eax
   1496 	movzbl	-16(%rsi),%ecx
   1497 	leaq	1(%rdi),%rdi
   1498 	movb	%al,-16(%rsi)
   1499 	movb	%cl,0(%rsi)
   1500 	leaq	1(%rsi),%rsi
   1501 	subq	$1,%rdx
   1502 	jnz	.Lxts_enc_steal
   1503 
   1504 	subq	%r9,%rsi
   1505 	movq	%r11,%rcx
   1506 	movl	%r10d,%eax
   1507 
   1508 	movups	-16(%rsi),%xmm2
   1509 	xorps	%xmm10,%xmm2
   1510 	movups	(%rcx),%xmm0
   1511 	movups	16(%rcx),%xmm1
   1512 	leaq	32(%rcx),%rcx
   1513 	xorps	%xmm0,%xmm2
   1514 .Loop_enc1_10:
   1515 .byte	102,15,56,220,209
   1516 	decl	%eax
   1517 	movups	(%rcx),%xmm1
   1518 	leaq	16(%rcx),%rcx
   1519 	jnz	.Loop_enc1_10
   1520 .byte	102,15,56,221,209
   1521 	xorps	%xmm10,%xmm2
   1522 	movups	%xmm2,-16(%rsi)
   1523 
   1524 .Lxts_enc_ret:
   1525 	leaq	104(%rsp),%rsp
   1526 .Lxts_enc_epilogue:
   1527 	.byte	0xf3,0xc3
   1528 .size	aesni_xts_encrypt,.-aesni_xts_encrypt
   1529 .globl	aesni_xts_decrypt
   1530 .type	aesni_xts_decrypt,@function
   1531 .align	16
   1532 aesni_xts_decrypt:
   1533 	leaq	-104(%rsp),%rsp
   1534 	movups	(%r9),%xmm15
   1535 	movl	240(%r8),%eax
   1536 	movl	240(%rcx),%r10d
   1537 	movups	(%r8),%xmm0
   1538 	movups	16(%r8),%xmm1
   1539 	leaq	32(%r8),%r8
   1540 	xorps	%xmm0,%xmm15
   1541 .Loop_enc1_11:
   1542 .byte	102,68,15,56,220,249
   1543 	decl	%eax
   1544 	movups	(%r8),%xmm1
   1545 	leaq	16(%r8),%r8
   1546 	jnz	.Loop_enc1_11
   1547 .byte	102,68,15,56,221,249
   1548 	xorl	%eax,%eax
   1549 	testq	$15,%rdx
   1550 	setnz	%al
   1551 	shlq	$4,%rax
   1552 	subq	%rax,%rdx
   1553 
   1554 	movq	%rcx,%r11
   1555 	movl	%r10d,%eax
   1556 	movq	%rdx,%r9
   1557 	andq	$-16,%rdx
   1558 
   1559 	movdqa	.Lxts_magic(%rip),%xmm8
   1560 	pxor	%xmm14,%xmm14
   1561 	pcmpgtd	%xmm15,%xmm14
   1562 	pshufd	$19,%xmm14,%xmm9
   1563 	pxor	%xmm14,%xmm14
   1564 	movdqa	%xmm15,%xmm10
   1565 	paddq	%xmm15,%xmm15
   1566 	pand	%xmm8,%xmm9
   1567 	pcmpgtd	%xmm15,%xmm14
   1568 	pxor	%xmm9,%xmm15
   1569 	pshufd	$19,%xmm14,%xmm9
   1570 	pxor	%xmm14,%xmm14
   1571 	movdqa	%xmm15,%xmm11
   1572 	paddq	%xmm15,%xmm15
   1573 	pand	%xmm8,%xmm9
   1574 	pcmpgtd	%xmm15,%xmm14
   1575 	pxor	%xmm9,%xmm15
   1576 	pshufd	$19,%xmm14,%xmm9
   1577 	pxor	%xmm14,%xmm14
   1578 	movdqa	%xmm15,%xmm12
   1579 	paddq	%xmm15,%xmm15
   1580 	pand	%xmm8,%xmm9
   1581 	pcmpgtd	%xmm15,%xmm14
   1582 	pxor	%xmm9,%xmm15
   1583 	pshufd	$19,%xmm14,%xmm9
   1584 	pxor	%xmm14,%xmm14
   1585 	movdqa	%xmm15,%xmm13
   1586 	paddq	%xmm15,%xmm15
   1587 	pand	%xmm8,%xmm9
   1588 	pcmpgtd	%xmm15,%xmm14
   1589 	pxor	%xmm9,%xmm15
   1590 	subq	$96,%rdx
   1591 	jc	.Lxts_dec_short
   1592 
   1593 	shrl	$1,%eax
   1594 	subl	$1,%eax
   1595 	movl	%eax,%r10d
   1596 	jmp	.Lxts_dec_grandloop
   1597 
   1598 .align	16
   1599 .Lxts_dec_grandloop:
   1600 	pshufd	$19,%xmm14,%xmm9
   1601 	movdqa	%xmm15,%xmm14
   1602 	paddq	%xmm15,%xmm15
   1603 	movdqu	0(%rdi),%xmm2
   1604 	pand	%xmm8,%xmm9
   1605 	movdqu	16(%rdi),%xmm3
   1606 	pxor	%xmm9,%xmm15
   1607 
   1608 	movdqu	32(%rdi),%xmm4
   1609 	pxor	%xmm10,%xmm2
   1610 	movdqu	48(%rdi),%xmm5
   1611 	pxor	%xmm11,%xmm3
   1612 	movdqu	64(%rdi),%xmm6
   1613 	pxor	%xmm12,%xmm4
   1614 	movdqu	80(%rdi),%xmm7
   1615 	leaq	96(%rdi),%rdi
   1616 	pxor	%xmm13,%xmm5
   1617 	movups	(%r11),%xmm0
   1618 	pxor	%xmm14,%xmm6
   1619 	pxor	%xmm15,%xmm7
   1620 
   1621 
   1622 
   1623 	movups	16(%r11),%xmm1
   1624 	pxor	%xmm0,%xmm2
   1625 	pxor	%xmm0,%xmm3
   1626 	movdqa	%xmm10,0(%rsp)
   1627 .byte	102,15,56,222,209
   1628 	leaq	32(%r11),%rcx
   1629 	pxor	%xmm0,%xmm4
   1630 	movdqa	%xmm11,16(%rsp)
   1631 .byte	102,15,56,222,217
   1632 	pxor	%xmm0,%xmm5
   1633 	movdqa	%xmm12,32(%rsp)
   1634 .byte	102,15,56,222,225
   1635 	pxor	%xmm0,%xmm6
   1636 	movdqa	%xmm13,48(%rsp)
   1637 .byte	102,15,56,222,233
   1638 	pxor	%xmm0,%xmm7
   1639 	movups	(%rcx),%xmm0
   1640 	decl	%eax
   1641 	movdqa	%xmm14,64(%rsp)
   1642 .byte	102,15,56,222,241
   1643 	movdqa	%xmm15,80(%rsp)
   1644 .byte	102,15,56,222,249
   1645 	pxor	%xmm14,%xmm14
   1646 	pcmpgtd	%xmm15,%xmm14
   1647 	jmp	.Lxts_dec_loop6_enter
   1648 
   1649 .align	16
   1650 .Lxts_dec_loop6:
   1651 .byte	102,15,56,222,209
   1652 .byte	102,15,56,222,217
   1653 	decl	%eax
   1654 .byte	102,15,56,222,225
   1655 .byte	102,15,56,222,233
   1656 .byte	102,15,56,222,241
   1657 .byte	102,15,56,222,249
   1658 .Lxts_dec_loop6_enter:
   1659 	movups	16(%rcx),%xmm1
   1660 .byte	102,15,56,222,208
   1661 .byte	102,15,56,222,216
   1662 	leaq	32(%rcx),%rcx
   1663 .byte	102,15,56,222,224
   1664 .byte	102,15,56,222,232
   1665 .byte	102,15,56,222,240
   1666 .byte	102,15,56,222,248
   1667 	movups	(%rcx),%xmm0
   1668 	jnz	.Lxts_dec_loop6
   1669 
   1670 	pshufd	$19,%xmm14,%xmm9
   1671 	pxor	%xmm14,%xmm14
   1672 	paddq	%xmm15,%xmm15
   1673 .byte	102,15,56,222,209
   1674 	pand	%xmm8,%xmm9
   1675 .byte	102,15,56,222,217
   1676 	pcmpgtd	%xmm15,%xmm14
   1677 .byte	102,15,56,222,225
   1678 	pxor	%xmm9,%xmm15
   1679 .byte	102,15,56,222,233
   1680 .byte	102,15,56,222,241
   1681 .byte	102,15,56,222,249
   1682 	movups	16(%rcx),%xmm1
   1683 
   1684 	pshufd	$19,%xmm14,%xmm9
   1685 	pxor	%xmm14,%xmm14
   1686 	movdqa	%xmm15,%xmm10
   1687 	paddq	%xmm15,%xmm15
   1688 .byte	102,15,56,222,208
   1689 	pand	%xmm8,%xmm9
   1690 .byte	102,15,56,222,216
   1691 	pcmpgtd	%xmm15,%xmm14
   1692 .byte	102,15,56,222,224
   1693 	pxor	%xmm9,%xmm15
   1694 .byte	102,15,56,222,232
   1695 .byte	102,15,56,222,240
   1696 .byte	102,15,56,222,248
   1697 	movups	32(%rcx),%xmm0
   1698 
   1699 	pshufd	$19,%xmm14,%xmm9
   1700 	pxor	%xmm14,%xmm14
   1701 	movdqa	%xmm15,%xmm11
   1702 	paddq	%xmm15,%xmm15
   1703 .byte	102,15,56,222,209
   1704 	pand	%xmm8,%xmm9
   1705 .byte	102,15,56,222,217
   1706 	pcmpgtd	%xmm15,%xmm14
   1707 .byte	102,15,56,222,225
   1708 	pxor	%xmm9,%xmm15
   1709 .byte	102,15,56,222,233
   1710 .byte	102,15,56,222,241
   1711 .byte	102,15,56,222,249
   1712 
   1713 	pshufd	$19,%xmm14,%xmm9
   1714 	pxor	%xmm14,%xmm14
   1715 	movdqa	%xmm15,%xmm12
   1716 	paddq	%xmm15,%xmm15
   1717 .byte	102,15,56,223,208
   1718 	pand	%xmm8,%xmm9
   1719 .byte	102,15,56,223,216
   1720 	pcmpgtd	%xmm15,%xmm14
   1721 .byte	102,15,56,223,224
   1722 	pxor	%xmm9,%xmm15
   1723 .byte	102,15,56,223,232
   1724 .byte	102,15,56,223,240
   1725 .byte	102,15,56,223,248
   1726 
   1727 	pshufd	$19,%xmm14,%xmm9
   1728 	pxor	%xmm14,%xmm14
   1729 	movdqa	%xmm15,%xmm13
   1730 	paddq	%xmm15,%xmm15
   1731 	xorps	0(%rsp),%xmm2
   1732 	pand	%xmm8,%xmm9
   1733 	xorps	16(%rsp),%xmm3
   1734 	pcmpgtd	%xmm15,%xmm14
   1735 	pxor	%xmm9,%xmm15
   1736 
   1737 	xorps	32(%rsp),%xmm4
   1738 	movups	%xmm2,0(%rsi)
   1739 	xorps	48(%rsp),%xmm5
   1740 	movups	%xmm3,16(%rsi)
   1741 	xorps	64(%rsp),%xmm6
   1742 	movups	%xmm4,32(%rsi)
   1743 	xorps	80(%rsp),%xmm7
   1744 	movups	%xmm5,48(%rsi)
   1745 	movl	%r10d,%eax
   1746 	movups	%xmm6,64(%rsi)
   1747 	movups	%xmm7,80(%rsi)
   1748 	leaq	96(%rsi),%rsi
   1749 	subq	$96,%rdx
   1750 	jnc	.Lxts_dec_grandloop
   1751 
   1752 	leal	3(%rax,%rax,1),%eax
   1753 	movq	%r11,%rcx
   1754 	movl	%eax,%r10d
   1755 
   1756 .Lxts_dec_short:
   1757 	addq	$96,%rdx
   1758 	jz	.Lxts_dec_done
   1759 
   1760 	cmpq	$32,%rdx
   1761 	jb	.Lxts_dec_one
   1762 	je	.Lxts_dec_two
   1763 
   1764 	cmpq	$64,%rdx
   1765 	jb	.Lxts_dec_three
   1766 	je	.Lxts_dec_four
   1767 
   1768 	pshufd	$19,%xmm14,%xmm9
   1769 	movdqa	%xmm15,%xmm14
   1770 	paddq	%xmm15,%xmm15
   1771 	movdqu	(%rdi),%xmm2
   1772 	pand	%xmm8,%xmm9
   1773 	movdqu	16(%rdi),%xmm3
   1774 	pxor	%xmm9,%xmm15
   1775 
   1776 	movdqu	32(%rdi),%xmm4
   1777 	pxor	%xmm10,%xmm2
   1778 	movdqu	48(%rdi),%xmm5
   1779 	pxor	%xmm11,%xmm3
   1780 	movdqu	64(%rdi),%xmm6
   1781 	leaq	80(%rdi),%rdi
   1782 	pxor	%xmm12,%xmm4
   1783 	pxor	%xmm13,%xmm5
   1784 	pxor	%xmm14,%xmm6
   1785 
   1786 	call	_aesni_decrypt6
   1787 
   1788 	xorps	%xmm10,%xmm2
   1789 	xorps	%xmm11,%xmm3
   1790 	xorps	%xmm12,%xmm4
   1791 	movdqu	%xmm2,(%rsi)
   1792 	xorps	%xmm13,%xmm5
   1793 	movdqu	%xmm3,16(%rsi)
   1794 	xorps	%xmm14,%xmm6
   1795 	movdqu	%xmm4,32(%rsi)
   1796 	pxor	%xmm14,%xmm14
   1797 	movdqu	%xmm5,48(%rsi)
   1798 	pcmpgtd	%xmm15,%xmm14
   1799 	movdqu	%xmm6,64(%rsi)
   1800 	leaq	80(%rsi),%rsi
   1801 	pshufd	$19,%xmm14,%xmm11
   1802 	andq	$15,%r9
   1803 	jz	.Lxts_dec_ret
   1804 
   1805 	movdqa	%xmm15,%xmm10
   1806 	paddq	%xmm15,%xmm15
   1807 	pand	%xmm8,%xmm11
   1808 	pxor	%xmm15,%xmm11
   1809 	jmp	.Lxts_dec_done2
   1810 
   1811 .align	16
   1812 .Lxts_dec_one:
   1813 	movups	(%rdi),%xmm2
   1814 	leaq	16(%rdi),%rdi
   1815 	xorps	%xmm10,%xmm2
   1816 	movups	(%rcx),%xmm0
   1817 	movups	16(%rcx),%xmm1
   1818 	leaq	32(%rcx),%rcx
   1819 	xorps	%xmm0,%xmm2
   1820 .Loop_dec1_12:
   1821 .byte	102,15,56,222,209
   1822 	decl	%eax
   1823 	movups	(%rcx),%xmm1
   1824 	leaq	16(%rcx),%rcx
   1825 	jnz	.Loop_dec1_12
   1826 .byte	102,15,56,223,209
   1827 	xorps	%xmm10,%xmm2
   1828 	movdqa	%xmm11,%xmm10
   1829 	movups	%xmm2,(%rsi)
   1830 	movdqa	%xmm12,%xmm11
   1831 	leaq	16(%rsi),%rsi
   1832 	jmp	.Lxts_dec_done
   1833 
   1834 .align	16
   1835 .Lxts_dec_two:
   1836 	movups	(%rdi),%xmm2
   1837 	movups	16(%rdi),%xmm3
   1838 	leaq	32(%rdi),%rdi
   1839 	xorps	%xmm10,%xmm2
   1840 	xorps	%xmm11,%xmm3
   1841 
   1842 	call	_aesni_decrypt3
   1843 
   1844 	xorps	%xmm10,%xmm2
   1845 	movdqa	%xmm12,%xmm10
   1846 	xorps	%xmm11,%xmm3
   1847 	movdqa	%xmm13,%xmm11
   1848 	movups	%xmm2,(%rsi)
   1849 	movups	%xmm3,16(%rsi)
   1850 	leaq	32(%rsi),%rsi
   1851 	jmp	.Lxts_dec_done
   1852 
   1853 .align	16
   1854 .Lxts_dec_three:
   1855 	movups	(%rdi),%xmm2
   1856 	movups	16(%rdi),%xmm3
   1857 	movups	32(%rdi),%xmm4
   1858 	leaq	48(%rdi),%rdi
   1859 	xorps	%xmm10,%xmm2
   1860 	xorps	%xmm11,%xmm3
   1861 	xorps	%xmm12,%xmm4
   1862 
   1863 	call	_aesni_decrypt3
   1864 
   1865 	xorps	%xmm10,%xmm2
   1866 	movdqa	%xmm13,%xmm10
   1867 	xorps	%xmm11,%xmm3
   1868 	movdqa	%xmm15,%xmm11
   1869 	xorps	%xmm12,%xmm4
   1870 	movups	%xmm2,(%rsi)
   1871 	movups	%xmm3,16(%rsi)
   1872 	movups	%xmm4,32(%rsi)
   1873 	leaq	48(%rsi),%rsi
   1874 	jmp	.Lxts_dec_done
   1875 
   1876 .align	16
   1877 .Lxts_dec_four:
   1878 	pshufd	$19,%xmm14,%xmm9
   1879 	movdqa	%xmm15,%xmm14
   1880 	paddq	%xmm15,%xmm15
   1881 	movups	(%rdi),%xmm2
   1882 	pand	%xmm8,%xmm9
   1883 	movups	16(%rdi),%xmm3
   1884 	pxor	%xmm9,%xmm15
   1885 
   1886 	movups	32(%rdi),%xmm4
   1887 	xorps	%xmm10,%xmm2
   1888 	movups	48(%rdi),%xmm5
   1889 	leaq	64(%rdi),%rdi
   1890 	xorps	%xmm11,%xmm3
   1891 	xorps	%xmm12,%xmm4
   1892 	xorps	%xmm13,%xmm5
   1893 
   1894 	call	_aesni_decrypt4
   1895 
   1896 	xorps	%xmm10,%xmm2
   1897 	movdqa	%xmm14,%xmm10
   1898 	xorps	%xmm11,%xmm3
   1899 	movdqa	%xmm15,%xmm11
   1900 	xorps	%xmm12,%xmm4
   1901 	movups	%xmm2,(%rsi)
   1902 	xorps	%xmm13,%xmm5
   1903 	movups	%xmm3,16(%rsi)
   1904 	movups	%xmm4,32(%rsi)
   1905 	movups	%xmm5,48(%rsi)
   1906 	leaq	64(%rsi),%rsi
   1907 	jmp	.Lxts_dec_done
   1908 
   1909 .align	16
   1910 .Lxts_dec_done:
   1911 	andq	$15,%r9
   1912 	jz	.Lxts_dec_ret
   1913 .Lxts_dec_done2:
   1914 	movq	%r9,%rdx
   1915 	movq	%r11,%rcx
   1916 	movl	%r10d,%eax
   1917 
   1918 	movups	(%rdi),%xmm2
   1919 	xorps	%xmm11,%xmm2
   1920 	movups	(%rcx),%xmm0
   1921 	movups	16(%rcx),%xmm1
   1922 	leaq	32(%rcx),%rcx
   1923 	xorps	%xmm0,%xmm2
   1924 .Loop_dec1_13:
   1925 .byte	102,15,56,222,209
   1926 	decl	%eax
   1927 	movups	(%rcx),%xmm1
   1928 	leaq	16(%rcx),%rcx
   1929 	jnz	.Loop_dec1_13
   1930 .byte	102,15,56,223,209
   1931 	xorps	%xmm11,%xmm2
   1932 	movups	%xmm2,(%rsi)
   1933 
   1934 .Lxts_dec_steal:
   1935 	movzbl	16(%rdi),%eax
   1936 	movzbl	(%rsi),%ecx
   1937 	leaq	1(%rdi),%rdi
   1938 	movb	%al,(%rsi)
   1939 	movb	%cl,16(%rsi)
   1940 	leaq	1(%rsi),%rsi
   1941 	subq	$1,%rdx
   1942 	jnz	.Lxts_dec_steal
   1943 
   1944 	subq	%r9,%rsi
   1945 	movq	%r11,%rcx
   1946 	movl	%r10d,%eax
   1947 
   1948 	movups	(%rsi),%xmm2
   1949 	xorps	%xmm10,%xmm2
   1950 	movups	(%rcx),%xmm0
   1951 	movups	16(%rcx),%xmm1
   1952 	leaq	32(%rcx),%rcx
   1953 	xorps	%xmm0,%xmm2
   1954 .Loop_dec1_14:
   1955 .byte	102,15,56,222,209
   1956 	decl	%eax
   1957 	movups	(%rcx),%xmm1
   1958 	leaq	16(%rcx),%rcx
   1959 	jnz	.Loop_dec1_14
   1960 .byte	102,15,56,223,209
   1961 	xorps	%xmm10,%xmm2
   1962 	movups	%xmm2,(%rsi)
   1963 
   1964 .Lxts_dec_ret:
   1965 	leaq	104(%rsp),%rsp
   1966 .Lxts_dec_epilogue:
   1967 	.byte	0xf3,0xc3
   1968 .size	aesni_xts_decrypt,.-aesni_xts_decrypt
   1969 .globl	aesni_cbc_encrypt
   1970 .type	aesni_cbc_encrypt,@function
   1971 .align	16
   1972 aesni_cbc_encrypt:
   1973 	testq	%rdx,%rdx
   1974 	jz	.Lcbc_ret
   1975 
   1976 	movl	240(%rcx),%r10d
   1977 	movq	%rcx,%r11
   1978 	testl	%r9d,%r9d
   1979 	jz	.Lcbc_decrypt
   1980 
   1981 	movups	(%r8),%xmm2
   1982 	movl	%r10d,%eax
   1983 	cmpq	$16,%rdx
   1984 	jb	.Lcbc_enc_tail
   1985 	subq	$16,%rdx
   1986 	jmp	.Lcbc_enc_loop
   1987 .align	16
   1988 .Lcbc_enc_loop:
   1989 	movups	(%rdi),%xmm3
   1990 	leaq	16(%rdi),%rdi
   1991 
   1992 	movups	(%rcx),%xmm0
   1993 	movups	16(%rcx),%xmm1
   1994 	xorps	%xmm0,%xmm3
   1995 	leaq	32(%rcx),%rcx
   1996 	xorps	%xmm3,%xmm2
   1997 .Loop_enc1_15:
   1998 .byte	102,15,56,220,209
   1999 	decl	%eax
   2000 	movups	(%rcx),%xmm1
   2001 	leaq	16(%rcx),%rcx
   2002 	jnz	.Loop_enc1_15
   2003 .byte	102,15,56,221,209
   2004 	movl	%r10d,%eax
   2005 	movq	%r11,%rcx
   2006 	movups	%xmm2,0(%rsi)
   2007 	leaq	16(%rsi),%rsi
   2008 	subq	$16,%rdx
   2009 	jnc	.Lcbc_enc_loop
   2010 	addq	$16,%rdx
   2011 	jnz	.Lcbc_enc_tail
   2012 	movups	%xmm2,(%r8)
   2013 	jmp	.Lcbc_ret
   2014 
   2015 .Lcbc_enc_tail:
   2016 	movq	%rdx,%rcx
   2017 	xchgq	%rdi,%rsi
   2018 .long	0x9066A4F3
   2019 	movl	$16,%ecx
   2020 	subq	%rdx,%rcx
   2021 	xorl	%eax,%eax
   2022 .long	0x9066AAF3
   2023 	leaq	-16(%rdi),%rdi
   2024 	movl	%r10d,%eax
   2025 	movq	%rdi,%rsi
   2026 	movq	%r11,%rcx
   2027 	xorq	%rdx,%rdx
   2028 	jmp	.Lcbc_enc_loop
   2029 
   2030 .align	16
   2031 .Lcbc_decrypt:
   2032 	movups	(%r8),%xmm9
   2033 	movl	%r10d,%eax
   2034 	cmpq	$112,%rdx
   2035 	jbe	.Lcbc_dec_tail
   2036 	shrl	$1,%r10d
   2037 	subq	$112,%rdx
   2038 	movl	%r10d,%eax
   2039 	movaps	%xmm9,-24(%rsp)
   2040 	jmp	.Lcbc_dec_loop8_enter
   2041 .align	16
   2042 .Lcbc_dec_loop8:
   2043 	movaps	%xmm0,-24(%rsp)
   2044 	movups	%xmm9,(%rsi)
   2045 	leaq	16(%rsi),%rsi
   2046 .Lcbc_dec_loop8_enter:
   2047 	movups	(%rcx),%xmm0
   2048 	movups	(%rdi),%xmm2
   2049 	movups	16(%rdi),%xmm3
   2050 	movups	16(%rcx),%xmm1
   2051 
   2052 	leaq	32(%rcx),%rcx
   2053 	movdqu	32(%rdi),%xmm4
   2054 	xorps	%xmm0,%xmm2
   2055 	movdqu	48(%rdi),%xmm5
   2056 	xorps	%xmm0,%xmm3
   2057 	movdqu	64(%rdi),%xmm6
   2058 .byte	102,15,56,222,209
   2059 	pxor	%xmm0,%xmm4
   2060 	movdqu	80(%rdi),%xmm7
   2061 .byte	102,15,56,222,217
   2062 	pxor	%xmm0,%xmm5
   2063 	movdqu	96(%rdi),%xmm8
   2064 .byte	102,15,56,222,225
   2065 	pxor	%xmm0,%xmm6
   2066 	movdqu	112(%rdi),%xmm9
   2067 .byte	102,15,56,222,233
   2068 	pxor	%xmm0,%xmm7
   2069 	decl	%eax
   2070 .byte	102,15,56,222,241
   2071 	pxor	%xmm0,%xmm8
   2072 .byte	102,15,56,222,249
   2073 	pxor	%xmm0,%xmm9
   2074 	movups	(%rcx),%xmm0
   2075 .byte	102,68,15,56,222,193
   2076 .byte	102,68,15,56,222,201
   2077 	movups	16(%rcx),%xmm1
   2078 
   2079 	call	.Ldec_loop8_enter
   2080 
   2081 	movups	(%rdi),%xmm1
   2082 	movups	16(%rdi),%xmm0
   2083 	xorps	-24(%rsp),%xmm2
   2084 	xorps	%xmm1,%xmm3
   2085 	movups	32(%rdi),%xmm1
   2086 	xorps	%xmm0,%xmm4
   2087 	movups	48(%rdi),%xmm0
   2088 	xorps	%xmm1,%xmm5
   2089 	movups	64(%rdi),%xmm1
   2090 	xorps	%xmm0,%xmm6
   2091 	movups	80(%rdi),%xmm0
   2092 	xorps	%xmm1,%xmm7
   2093 	movups	96(%rdi),%xmm1
   2094 	xorps	%xmm0,%xmm8
   2095 	movups	112(%rdi),%xmm0
   2096 	xorps	%xmm1,%xmm9
   2097 	movups	%xmm2,(%rsi)
   2098 	movups	%xmm3,16(%rsi)
   2099 	movups	%xmm4,32(%rsi)
   2100 	movups	%xmm5,48(%rsi)
   2101 	movl	%r10d,%eax
   2102 	movups	%xmm6,64(%rsi)
   2103 	movq	%r11,%rcx
   2104 	movups	%xmm7,80(%rsi)
   2105 	leaq	128(%rdi),%rdi
   2106 	movups	%xmm8,96(%rsi)
   2107 	leaq	112(%rsi),%rsi
   2108 	subq	$128,%rdx
   2109 	ja	.Lcbc_dec_loop8
   2110 
   2111 	movaps	%xmm9,%xmm2
   2112 	movaps	%xmm0,%xmm9
   2113 	addq	$112,%rdx
   2114 	jle	.Lcbc_dec_tail_collected
   2115 	movups	%xmm2,(%rsi)
   2116 	leal	1(%r10,%r10,1),%eax
   2117 	leaq	16(%rsi),%rsi
   2118 .Lcbc_dec_tail:
   2119 	movups	(%rdi),%xmm2
   2120 	movaps	%xmm2,%xmm8
   2121 	cmpq	$16,%rdx
   2122 	jbe	.Lcbc_dec_one
   2123 
   2124 	movups	16(%rdi),%xmm3
   2125 	movaps	%xmm3,%xmm7
   2126 	cmpq	$32,%rdx
   2127 	jbe	.Lcbc_dec_two
   2128 
   2129 	movups	32(%rdi),%xmm4
   2130 	movaps	%xmm4,%xmm6
   2131 	cmpq	$48,%rdx
   2132 	jbe	.Lcbc_dec_three
   2133 
   2134 	movups	48(%rdi),%xmm5
   2135 	cmpq	$64,%rdx
   2136 	jbe	.Lcbc_dec_four
   2137 
   2138 	movups	64(%rdi),%xmm6
   2139 	cmpq	$80,%rdx
   2140 	jbe	.Lcbc_dec_five
   2141 
   2142 	movups	80(%rdi),%xmm7
   2143 	cmpq	$96,%rdx
   2144 	jbe	.Lcbc_dec_six
   2145 
   2146 	movups	96(%rdi),%xmm8
   2147 	movaps	%xmm9,-24(%rsp)
   2148 	call	_aesni_decrypt8
   2149 	movups	(%rdi),%xmm1
   2150 	movups	16(%rdi),%xmm0
   2151 	xorps	-24(%rsp),%xmm2
   2152 	xorps	%xmm1,%xmm3
   2153 	movups	32(%rdi),%xmm1
   2154 	xorps	%xmm0,%xmm4
   2155 	movups	48(%rdi),%xmm0
   2156 	xorps	%xmm1,%xmm5
   2157 	movups	64(%rdi),%xmm1
   2158 	xorps	%xmm0,%xmm6
   2159 	movups	80(%rdi),%xmm0
   2160 	xorps	%xmm1,%xmm7
   2161 	movups	96(%rdi),%xmm9
   2162 	xorps	%xmm0,%xmm8
   2163 	movups	%xmm2,(%rsi)
   2164 	movups	%xmm3,16(%rsi)
   2165 	movups	%xmm4,32(%rsi)
   2166 	movups	%xmm5,48(%rsi)
   2167 	movups	%xmm6,64(%rsi)
   2168 	movups	%xmm7,80(%rsi)
   2169 	leaq	96(%rsi),%rsi
   2170 	movaps	%xmm8,%xmm2
   2171 	subq	$112,%rdx
   2172 	jmp	.Lcbc_dec_tail_collected
   2173 .align	16
   2174 .Lcbc_dec_one:
   2175 	movups	(%rcx),%xmm0
   2176 	movups	16(%rcx),%xmm1
   2177 	leaq	32(%rcx),%rcx
   2178 	xorps	%xmm0,%xmm2
   2179 .Loop_dec1_16:
   2180 .byte	102,15,56,222,209
   2181 	decl	%eax
   2182 	movups	(%rcx),%xmm1
   2183 	leaq	16(%rcx),%rcx
   2184 	jnz	.Loop_dec1_16
   2185 .byte	102,15,56,223,209
   2186 	xorps	%xmm9,%xmm2
   2187 	movaps	%xmm8,%xmm9
   2188 	subq	$16,%rdx
   2189 	jmp	.Lcbc_dec_tail_collected
   2190 .align	16
   2191 .Lcbc_dec_two:
   2192 	xorps	%xmm4,%xmm4
   2193 	call	_aesni_decrypt3
   2194 	xorps	%xmm9,%xmm2
   2195 	xorps	%xmm8,%xmm3
   2196 	movups	%xmm2,(%rsi)
   2197 	movaps	%xmm7,%xmm9
   2198 	movaps	%xmm3,%xmm2
   2199 	leaq	16(%rsi),%rsi
   2200 	subq	$32,%rdx
   2201 	jmp	.Lcbc_dec_tail_collected
   2202 .align	16
   2203 .Lcbc_dec_three:
   2204 	call	_aesni_decrypt3
   2205 	xorps	%xmm9,%xmm2
   2206 	xorps	%xmm8,%xmm3
   2207 	movups	%xmm2,(%rsi)
   2208 	xorps	%xmm7,%xmm4
   2209 	movups	%xmm3,16(%rsi)
   2210 	movaps	%xmm6,%xmm9
   2211 	movaps	%xmm4,%xmm2
   2212 	leaq	32(%rsi),%rsi
   2213 	subq	$48,%rdx
   2214 	jmp	.Lcbc_dec_tail_collected
   2215 .align	16
   2216 .Lcbc_dec_four:
   2217 	call	_aesni_decrypt4
   2218 	xorps	%xmm9,%xmm2
   2219 	movups	48(%rdi),%xmm9
   2220 	xorps	%xmm8,%xmm3
   2221 	movups	%xmm2,(%rsi)
   2222 	xorps	%xmm7,%xmm4
   2223 	movups	%xmm3,16(%rsi)
   2224 	xorps	%xmm6,%xmm5
   2225 	movups	%xmm4,32(%rsi)
   2226 	movaps	%xmm5,%xmm2
   2227 	leaq	48(%rsi),%rsi
   2228 	subq	$64,%rdx
   2229 	jmp	.Lcbc_dec_tail_collected
   2230 .align	16
   2231 .Lcbc_dec_five:
   2232 	xorps	%xmm7,%xmm7
   2233 	call	_aesni_decrypt6
   2234 	movups	16(%rdi),%xmm1
   2235 	movups	32(%rdi),%xmm0
   2236 	xorps	%xmm9,%xmm2
   2237 	xorps	%xmm8,%xmm3
   2238 	xorps	%xmm1,%xmm4
   2239 	movups	48(%rdi),%xmm1
   2240 	xorps	%xmm0,%xmm5
   2241 	movups	64(%rdi),%xmm9
   2242 	xorps	%xmm1,%xmm6
   2243 	movups	%xmm2,(%rsi)
   2244 	movups	%xmm3,16(%rsi)
   2245 	movups	%xmm4,32(%rsi)
   2246 	movups	%xmm5,48(%rsi)
   2247 	leaq	64(%rsi),%rsi
   2248 	movaps	%xmm6,%xmm2
   2249 	subq	$80,%rdx
   2250 	jmp	.Lcbc_dec_tail_collected
   2251 .align	16
   2252 .Lcbc_dec_six:
   2253 	call	_aesni_decrypt6
   2254 	movups	16(%rdi),%xmm1
   2255 	movups	32(%rdi),%xmm0
   2256 	xorps	%xmm9,%xmm2
   2257 	xorps	%xmm8,%xmm3
   2258 	xorps	%xmm1,%xmm4
   2259 	movups	48(%rdi),%xmm1
   2260 	xorps	%xmm0,%xmm5
   2261 	movups	64(%rdi),%xmm0
   2262 	xorps	%xmm1,%xmm6
   2263 	movups	80(%rdi),%xmm9
   2264 	xorps	%xmm0,%xmm7
   2265 	movups	%xmm2,(%rsi)
   2266 	movups	%xmm3,16(%rsi)
   2267 	movups	%xmm4,32(%rsi)
   2268 	movups	%xmm5,48(%rsi)
   2269 	movups	%xmm6,64(%rsi)
   2270 	leaq	80(%rsi),%rsi
   2271 	movaps	%xmm7,%xmm2
   2272 	subq	$96,%rdx
   2273 	jmp	.Lcbc_dec_tail_collected
   2274 .align	16
   2275 .Lcbc_dec_tail_collected:
   2276 	andq	$15,%rdx
   2277 	movups	%xmm9,(%r8)
   2278 	jnz	.Lcbc_dec_tail_partial
   2279 	movups	%xmm2,(%rsi)
   2280 	jmp	.Lcbc_dec_ret
   2281 .align	16
   2282 .Lcbc_dec_tail_partial:
   2283 	movaps	%xmm2,-24(%rsp)
   2284 	movq	$16,%rcx
   2285 	movq	%rsi,%rdi
   2286 	subq	%rdx,%rcx
   2287 	leaq	-24(%rsp),%rsi
   2288 .long	0x9066A4F3
   2289 
   2290 .Lcbc_dec_ret:
   2291 .Lcbc_ret:
   2292 	.byte	0xf3,0xc3
   2293 .size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
   2294 .globl	aesni_set_decrypt_key
   2295 .type	aesni_set_decrypt_key,@function
   2296 .align	16
   2297 aesni_set_decrypt_key:
   2298 .byte	0x48,0x83,0xEC,0x08
   2299 	call	__aesni_set_encrypt_key
   2300 	shll	$4,%esi
   2301 	testl	%eax,%eax
   2302 	jnz	.Ldec_key_ret
   2303 	leaq	16(%rdx,%rsi,1),%rdi
   2304 
   2305 	movups	(%rdx),%xmm0
   2306 	movups	(%rdi),%xmm1
   2307 	movups	%xmm0,(%rdi)
   2308 	movups	%xmm1,(%rdx)
   2309 	leaq	16(%rdx),%rdx
   2310 	leaq	-16(%rdi),%rdi
   2311 
   2312 .Ldec_key_inverse:
   2313 	movups	(%rdx),%xmm0
   2314 	movups	(%rdi),%xmm1
   2315 .byte	102,15,56,219,192
   2316 .byte	102,15,56,219,201
   2317 	leaq	16(%rdx),%rdx
   2318 	leaq	-16(%rdi),%rdi
   2319 	movups	%xmm0,16(%rdi)
   2320 	movups	%xmm1,-16(%rdx)
   2321 	cmpq	%rdx,%rdi
   2322 	ja	.Ldec_key_inverse
   2323 
   2324 	movups	(%rdx),%xmm0
   2325 .byte	102,15,56,219,192
   2326 	movups	%xmm0,(%rdi)
   2327 .Ldec_key_ret:
   2328 	addq	$8,%rsp
   2329 	.byte	0xf3,0xc3
   2330 .LSEH_end_set_decrypt_key:
   2331 .size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
   2332 .globl	aesni_set_encrypt_key
   2333 .type	aesni_set_encrypt_key,@function
   2334 .align	16
   2335 aesni_set_encrypt_key:
   2336 __aesni_set_encrypt_key:
   2337 .byte	0x48,0x83,0xEC,0x08
   2338 	movq	$-1,%rax
   2339 	testq	%rdi,%rdi
   2340 	jz	.Lenc_key_ret
   2341 	testq	%rdx,%rdx
   2342 	jz	.Lenc_key_ret
   2343 
   2344 	movups	(%rdi),%xmm0
   2345 	xorps	%xmm4,%xmm4
   2346 	leaq	16(%rdx),%rax
   2347 	cmpl	$256,%esi
   2348 	je	.L14rounds
   2349 	cmpl	$192,%esi
   2350 	je	.L12rounds
   2351 	cmpl	$128,%esi
   2352 	jne	.Lbad_keybits
   2353 
   2354 .L10rounds:
   2355 	movl	$9,%esi
   2356 	movups	%xmm0,(%rdx)
   2357 .byte	102,15,58,223,200,1
   2358 	call	.Lkey_expansion_128_cold
   2359 .byte	102,15,58,223,200,2
   2360 	call	.Lkey_expansion_128
   2361 .byte	102,15,58,223,200,4
   2362 	call	.Lkey_expansion_128
   2363 .byte	102,15,58,223,200,8
   2364 	call	.Lkey_expansion_128
   2365 .byte	102,15,58,223,200,16
   2366 	call	.Lkey_expansion_128
   2367 .byte	102,15,58,223,200,32
   2368 	call	.Lkey_expansion_128
   2369 .byte	102,15,58,223,200,64
   2370 	call	.Lkey_expansion_128
   2371 .byte	102,15,58,223,200,128
   2372 	call	.Lkey_expansion_128
   2373 .byte	102,15,58,223,200,27
   2374 	call	.Lkey_expansion_128
   2375 .byte	102,15,58,223,200,54
   2376 	call	.Lkey_expansion_128
   2377 	movups	%xmm0,(%rax)
   2378 	movl	%esi,80(%rax)
   2379 	xorl	%eax,%eax
   2380 	jmp	.Lenc_key_ret
   2381 
   2382 .align	16
   2383 .L12rounds:
   2384 	movq	16(%rdi),%xmm2
   2385 	movl	$11,%esi
   2386 	movups	%xmm0,(%rdx)
   2387 .byte	102,15,58,223,202,1
   2388 	call	.Lkey_expansion_192a_cold
   2389 .byte	102,15,58,223,202,2
   2390 	call	.Lkey_expansion_192b
   2391 .byte	102,15,58,223,202,4
   2392 	call	.Lkey_expansion_192a
   2393 .byte	102,15,58,223,202,8
   2394 	call	.Lkey_expansion_192b
   2395 .byte	102,15,58,223,202,16
   2396 	call	.Lkey_expansion_192a
   2397 .byte	102,15,58,223,202,32
   2398 	call	.Lkey_expansion_192b
   2399 .byte	102,15,58,223,202,64
   2400 	call	.Lkey_expansion_192a
   2401 .byte	102,15,58,223,202,128
   2402 	call	.Lkey_expansion_192b
   2403 	movups	%xmm0,(%rax)
   2404 	movl	%esi,48(%rax)
   2405 	xorq	%rax,%rax
   2406 	jmp	.Lenc_key_ret
   2407 
   2408 .align	16
   2409 .L14rounds:
   2410 	movups	16(%rdi),%xmm2
   2411 	movl	$13,%esi
   2412 	leaq	16(%rax),%rax
   2413 	movups	%xmm0,(%rdx)
   2414 	movups	%xmm2,16(%rdx)
   2415 .byte	102,15,58,223,202,1
   2416 	call	.Lkey_expansion_256a_cold
   2417 .byte	102,15,58,223,200,1
   2418 	call	.Lkey_expansion_256b
   2419 .byte	102,15,58,223,202,2
   2420 	call	.Lkey_expansion_256a
   2421 .byte	102,15,58,223,200,2
   2422 	call	.Lkey_expansion_256b
   2423 .byte	102,15,58,223,202,4
   2424 	call	.Lkey_expansion_256a
   2425 .byte	102,15,58,223,200,4
   2426 	call	.Lkey_expansion_256b
   2427 .byte	102,15,58,223,202,8
   2428 	call	.Lkey_expansion_256a
   2429 .byte	102,15,58,223,200,8
   2430 	call	.Lkey_expansion_256b
   2431 .byte	102,15,58,223,202,16
   2432 	call	.Lkey_expansion_256a
   2433 .byte	102,15,58,223,200,16
   2434 	call	.Lkey_expansion_256b
   2435 .byte	102,15,58,223,202,32
   2436 	call	.Lkey_expansion_256a
   2437 .byte	102,15,58,223,200,32
   2438 	call	.Lkey_expansion_256b
   2439 .byte	102,15,58,223,202,64
   2440 	call	.Lkey_expansion_256a
   2441 	movups	%xmm0,(%rax)
   2442 	movl	%esi,16(%rax)
   2443 	xorq	%rax,%rax
   2444 	jmp	.Lenc_key_ret
   2445 
   2446 .align	16
   2447 .Lbad_keybits:
   2448 	movq	$-2,%rax
   2449 .Lenc_key_ret:
   2450 	addq	$8,%rsp
   2451 	.byte	0xf3,0xc3
   2452 .LSEH_end_set_encrypt_key:
   2453 
   2454 .align	16
   2455 .Lkey_expansion_128:
   2456 	movups	%xmm0,(%rax)
   2457 	leaq	16(%rax),%rax
   2458 .Lkey_expansion_128_cold:
   2459 	shufps	$16,%xmm0,%xmm4
   2460 	xorps	%xmm4,%xmm0
   2461 	shufps	$140,%xmm0,%xmm4
   2462 	xorps	%xmm4,%xmm0
   2463 	shufps	$255,%xmm1,%xmm1
   2464 	xorps	%xmm1,%xmm0
   2465 	.byte	0xf3,0xc3
   2466 
   2467 .align	16
   2468 .Lkey_expansion_192a:
   2469 	movups	%xmm0,(%rax)
   2470 	leaq	16(%rax),%rax
   2471 .Lkey_expansion_192a_cold:
   2472 	movaps	%xmm2,%xmm5
   2473 .Lkey_expansion_192b_warm:
   2474 	shufps	$16,%xmm0,%xmm4
   2475 	movdqa	%xmm2,%xmm3
   2476 	xorps	%xmm4,%xmm0
   2477 	shufps	$140,%xmm0,%xmm4
   2478 	pslldq	$4,%xmm3
   2479 	xorps	%xmm4,%xmm0
   2480 	pshufd	$85,%xmm1,%xmm1
   2481 	pxor	%xmm3,%xmm2
   2482 	pxor	%xmm1,%xmm0
   2483 	pshufd	$255,%xmm0,%xmm3
   2484 	pxor	%xmm3,%xmm2
   2485 	.byte	0xf3,0xc3
   2486 
   2487 .align	16
   2488 .Lkey_expansion_192b:
   2489 	movaps	%xmm0,%xmm3
   2490 	shufps	$68,%xmm0,%xmm5
   2491 	movups	%xmm5,(%rax)
   2492 	shufps	$78,%xmm2,%xmm3
   2493 	movups	%xmm3,16(%rax)
   2494 	leaq	32(%rax),%rax
   2495 	jmp	.Lkey_expansion_192b_warm
   2496 
   2497 .align	16
   2498 .Lkey_expansion_256a:
   2499 	movups	%xmm2,(%rax)
   2500 	leaq	16(%rax),%rax
   2501 .Lkey_expansion_256a_cold:
   2502 	shufps	$16,%xmm0,%xmm4
   2503 	xorps	%xmm4,%xmm0
   2504 	shufps	$140,%xmm0,%xmm4
   2505 	xorps	%xmm4,%xmm0
   2506 	shufps	$255,%xmm1,%xmm1
   2507 	xorps	%xmm1,%xmm0
   2508 	.byte	0xf3,0xc3
   2509 
   2510 .align	16
   2511 .Lkey_expansion_256b:
   2512 	movups	%xmm0,(%rax)
   2513 	leaq	16(%rax),%rax
   2514 
   2515 	shufps	$16,%xmm2,%xmm4
   2516 	xorps	%xmm4,%xmm2
   2517 	shufps	$140,%xmm2,%xmm4
   2518 	xorps	%xmm4,%xmm2
   2519 	shufps	$170,%xmm1,%xmm1
   2520 	xorps	%xmm1,%xmm2
   2521 	.byte	0xf3,0xc3
   2522 .size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
   2523 .size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
   2524 .align	64
   2525 .Lbswap_mask:
   2526 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   2527 .Lincrement32:
   2528 .long	6,6,6,0
   2529 .Lincrement64:
   2530 .long	1,0,0,0
   2531 .Lxts_magic:
   2532 .long	0x87,0,1,0
   2533 
   2534 .byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   2535 .align	64
   2536