Home | History | Annotate | Download | only in aes
      1 #if defined(__x86_64__)
      2 .text
      3 .extern	OPENSSL_ia32cap_P
      4 .hidden OPENSSL_ia32cap_P
      5 .globl	aesni_encrypt
      6 .hidden aesni_encrypt
      7 .type	aesni_encrypt,@function
      8 .align	16
      9 aesni_encrypt:
     10 	movups	(%rdi),%xmm2
     11 	movl	240(%rdx),%eax
     12 	movups	(%rdx),%xmm0
     13 	movups	16(%rdx),%xmm1
     14 	leaq	32(%rdx),%rdx
     15 	xorps	%xmm0,%xmm2
     16 .Loop_enc1_1:
     17 .byte	102,15,56,220,209
     18 	decl	%eax
     19 	movups	(%rdx),%xmm1
     20 	leaq	16(%rdx),%rdx
     21 	jnz	.Loop_enc1_1
     22 .byte	102,15,56,221,209
     23 	pxor	%xmm0,%xmm0
     24 	pxor	%xmm1,%xmm1
     25 	movups	%xmm2,(%rsi)
     26 	pxor	%xmm2,%xmm2
     27 	.byte	0xf3,0xc3
     28 .size	aesni_encrypt,.-aesni_encrypt
     29 
     30 .globl	aesni_decrypt
     31 .hidden aesni_decrypt
     32 .type	aesni_decrypt,@function
     33 .align	16
     34 aesni_decrypt:
     35 	movups	(%rdi),%xmm2
     36 	movl	240(%rdx),%eax
     37 	movups	(%rdx),%xmm0
     38 	movups	16(%rdx),%xmm1
     39 	leaq	32(%rdx),%rdx
     40 	xorps	%xmm0,%xmm2
     41 .Loop_dec1_2:
     42 .byte	102,15,56,222,209
     43 	decl	%eax
     44 	movups	(%rdx),%xmm1
     45 	leaq	16(%rdx),%rdx
     46 	jnz	.Loop_dec1_2
     47 .byte	102,15,56,223,209
     48 	pxor	%xmm0,%xmm0
     49 	pxor	%xmm1,%xmm1
     50 	movups	%xmm2,(%rsi)
     51 	pxor	%xmm2,%xmm2
     52 	.byte	0xf3,0xc3
     53 .size	aesni_decrypt, .-aesni_decrypt
     54 .type	_aesni_encrypt2,@function
     55 .align	16
     56 _aesni_encrypt2:
     57 	movups	(%rcx),%xmm0
     58 	shll	$4,%eax
     59 	movups	16(%rcx),%xmm1
     60 	xorps	%xmm0,%xmm2
     61 	xorps	%xmm0,%xmm3
     62 	movups	32(%rcx),%xmm0
     63 	leaq	32(%rcx,%rax,1),%rcx
     64 	negq	%rax
     65 	addq	$16,%rax
     66 
     67 .Lenc_loop2:
     68 .byte	102,15,56,220,209
     69 .byte	102,15,56,220,217
     70 	movups	(%rcx,%rax,1),%xmm1
     71 	addq	$32,%rax
     72 .byte	102,15,56,220,208
     73 .byte	102,15,56,220,216
     74 	movups	-16(%rcx,%rax,1),%xmm0
     75 	jnz	.Lenc_loop2
     76 
     77 .byte	102,15,56,220,209
     78 .byte	102,15,56,220,217
     79 .byte	102,15,56,221,208
     80 .byte	102,15,56,221,216
     81 	.byte	0xf3,0xc3
     82 .size	_aesni_encrypt2,.-_aesni_encrypt2
     83 .type	_aesni_decrypt2,@function
     84 .align	16
     85 _aesni_decrypt2:
     86 	movups	(%rcx),%xmm0
     87 	shll	$4,%eax
     88 	movups	16(%rcx),%xmm1
     89 	xorps	%xmm0,%xmm2
     90 	xorps	%xmm0,%xmm3
     91 	movups	32(%rcx),%xmm0
     92 	leaq	32(%rcx,%rax,1),%rcx
     93 	negq	%rax
     94 	addq	$16,%rax
     95 
     96 .Ldec_loop2:
     97 .byte	102,15,56,222,209
     98 .byte	102,15,56,222,217
     99 	movups	(%rcx,%rax,1),%xmm1
    100 	addq	$32,%rax
    101 .byte	102,15,56,222,208
    102 .byte	102,15,56,222,216
    103 	movups	-16(%rcx,%rax,1),%xmm0
    104 	jnz	.Ldec_loop2
    105 
    106 .byte	102,15,56,222,209
    107 .byte	102,15,56,222,217
    108 .byte	102,15,56,223,208
    109 .byte	102,15,56,223,216
    110 	.byte	0xf3,0xc3
    111 .size	_aesni_decrypt2,.-_aesni_decrypt2
    112 .type	_aesni_encrypt3,@function
    113 .align	16
    114 _aesni_encrypt3:
    115 	movups	(%rcx),%xmm0
    116 	shll	$4,%eax
    117 	movups	16(%rcx),%xmm1
    118 	xorps	%xmm0,%xmm2
    119 	xorps	%xmm0,%xmm3
    120 	xorps	%xmm0,%xmm4
    121 	movups	32(%rcx),%xmm0
    122 	leaq	32(%rcx,%rax,1),%rcx
    123 	negq	%rax
    124 	addq	$16,%rax
    125 
    126 .Lenc_loop3:
    127 .byte	102,15,56,220,209
    128 .byte	102,15,56,220,217
    129 .byte	102,15,56,220,225
    130 	movups	(%rcx,%rax,1),%xmm1
    131 	addq	$32,%rax
    132 .byte	102,15,56,220,208
    133 .byte	102,15,56,220,216
    134 .byte	102,15,56,220,224
    135 	movups	-16(%rcx,%rax,1),%xmm0
    136 	jnz	.Lenc_loop3
    137 
    138 .byte	102,15,56,220,209
    139 .byte	102,15,56,220,217
    140 .byte	102,15,56,220,225
    141 .byte	102,15,56,221,208
    142 .byte	102,15,56,221,216
    143 .byte	102,15,56,221,224
    144 	.byte	0xf3,0xc3
    145 .size	_aesni_encrypt3,.-_aesni_encrypt3
    146 .type	_aesni_decrypt3,@function
    147 .align	16
    148 _aesni_decrypt3:
    149 	movups	(%rcx),%xmm0
    150 	shll	$4,%eax
    151 	movups	16(%rcx),%xmm1
    152 	xorps	%xmm0,%xmm2
    153 	xorps	%xmm0,%xmm3
    154 	xorps	%xmm0,%xmm4
    155 	movups	32(%rcx),%xmm0
    156 	leaq	32(%rcx,%rax,1),%rcx
    157 	negq	%rax
    158 	addq	$16,%rax
    159 
    160 .Ldec_loop3:
    161 .byte	102,15,56,222,209
    162 .byte	102,15,56,222,217
    163 .byte	102,15,56,222,225
    164 	movups	(%rcx,%rax,1),%xmm1
    165 	addq	$32,%rax
    166 .byte	102,15,56,222,208
    167 .byte	102,15,56,222,216
    168 .byte	102,15,56,222,224
    169 	movups	-16(%rcx,%rax,1),%xmm0
    170 	jnz	.Ldec_loop3
    171 
    172 .byte	102,15,56,222,209
    173 .byte	102,15,56,222,217
    174 .byte	102,15,56,222,225
    175 .byte	102,15,56,223,208
    176 .byte	102,15,56,223,216
    177 .byte	102,15,56,223,224
    178 	.byte	0xf3,0xc3
    179 .size	_aesni_decrypt3,.-_aesni_decrypt3
    180 .type	_aesni_encrypt4,@function
    181 .align	16
    182 _aesni_encrypt4:
    183 	movups	(%rcx),%xmm0
    184 	shll	$4,%eax
    185 	movups	16(%rcx),%xmm1
    186 	xorps	%xmm0,%xmm2
    187 	xorps	%xmm0,%xmm3
    188 	xorps	%xmm0,%xmm4
    189 	xorps	%xmm0,%xmm5
    190 	movups	32(%rcx),%xmm0
    191 	leaq	32(%rcx,%rax,1),%rcx
    192 	negq	%rax
    193 .byte	0x0f,0x1f,0x00
    194 	addq	$16,%rax
    195 
    196 .Lenc_loop4:
    197 .byte	102,15,56,220,209
    198 .byte	102,15,56,220,217
    199 .byte	102,15,56,220,225
    200 .byte	102,15,56,220,233
    201 	movups	(%rcx,%rax,1),%xmm1
    202 	addq	$32,%rax
    203 .byte	102,15,56,220,208
    204 .byte	102,15,56,220,216
    205 .byte	102,15,56,220,224
    206 .byte	102,15,56,220,232
    207 	movups	-16(%rcx,%rax,1),%xmm0
    208 	jnz	.Lenc_loop4
    209 
    210 .byte	102,15,56,220,209
    211 .byte	102,15,56,220,217
    212 .byte	102,15,56,220,225
    213 .byte	102,15,56,220,233
    214 .byte	102,15,56,221,208
    215 .byte	102,15,56,221,216
    216 .byte	102,15,56,221,224
    217 .byte	102,15,56,221,232
    218 	.byte	0xf3,0xc3
    219 .size	_aesni_encrypt4,.-_aesni_encrypt4
    220 .type	_aesni_decrypt4,@function
    221 .align	16
    222 _aesni_decrypt4:
    223 	movups	(%rcx),%xmm0
    224 	shll	$4,%eax
    225 	movups	16(%rcx),%xmm1
    226 	xorps	%xmm0,%xmm2
    227 	xorps	%xmm0,%xmm3
    228 	xorps	%xmm0,%xmm4
    229 	xorps	%xmm0,%xmm5
    230 	movups	32(%rcx),%xmm0
    231 	leaq	32(%rcx,%rax,1),%rcx
    232 	negq	%rax
    233 .byte	0x0f,0x1f,0x00
    234 	addq	$16,%rax
    235 
    236 .Ldec_loop4:
    237 .byte	102,15,56,222,209
    238 .byte	102,15,56,222,217
    239 .byte	102,15,56,222,225
    240 .byte	102,15,56,222,233
    241 	movups	(%rcx,%rax,1),%xmm1
    242 	addq	$32,%rax
    243 .byte	102,15,56,222,208
    244 .byte	102,15,56,222,216
    245 .byte	102,15,56,222,224
    246 .byte	102,15,56,222,232
    247 	movups	-16(%rcx,%rax,1),%xmm0
    248 	jnz	.Ldec_loop4
    249 
    250 .byte	102,15,56,222,209
    251 .byte	102,15,56,222,217
    252 .byte	102,15,56,222,225
    253 .byte	102,15,56,222,233
    254 .byte	102,15,56,223,208
    255 .byte	102,15,56,223,216
    256 .byte	102,15,56,223,224
    257 .byte	102,15,56,223,232
    258 	.byte	0xf3,0xc3
    259 .size	_aesni_decrypt4,.-_aesni_decrypt4
    260 .type	_aesni_encrypt6,@function
    261 .align	16
    262 _aesni_encrypt6:
    263 	movups	(%rcx),%xmm0
    264 	shll	$4,%eax
    265 	movups	16(%rcx),%xmm1
    266 	xorps	%xmm0,%xmm2
    267 	pxor	%xmm0,%xmm3
    268 	pxor	%xmm0,%xmm4
    269 .byte	102,15,56,220,209
    270 	leaq	32(%rcx,%rax,1),%rcx
    271 	negq	%rax
    272 .byte	102,15,56,220,217
    273 	pxor	%xmm0,%xmm5
    274 	pxor	%xmm0,%xmm6
    275 .byte	102,15,56,220,225
    276 	pxor	%xmm0,%xmm7
    277 	movups	(%rcx,%rax,1),%xmm0
    278 	addq	$16,%rax
    279 	jmp	.Lenc_loop6_enter
    280 .align	16
    281 .Lenc_loop6:
    282 .byte	102,15,56,220,209
    283 .byte	102,15,56,220,217
    284 .byte	102,15,56,220,225
    285 .Lenc_loop6_enter:
    286 .byte	102,15,56,220,233
    287 .byte	102,15,56,220,241
    288 .byte	102,15,56,220,249
    289 	movups	(%rcx,%rax,1),%xmm1
    290 	addq	$32,%rax
    291 .byte	102,15,56,220,208
    292 .byte	102,15,56,220,216
    293 .byte	102,15,56,220,224
    294 .byte	102,15,56,220,232
    295 .byte	102,15,56,220,240
    296 .byte	102,15,56,220,248
    297 	movups	-16(%rcx,%rax,1),%xmm0
    298 	jnz	.Lenc_loop6
    299 
    300 .byte	102,15,56,220,209
    301 .byte	102,15,56,220,217
    302 .byte	102,15,56,220,225
    303 .byte	102,15,56,220,233
    304 .byte	102,15,56,220,241
    305 .byte	102,15,56,220,249
    306 .byte	102,15,56,221,208
    307 .byte	102,15,56,221,216
    308 .byte	102,15,56,221,224
    309 .byte	102,15,56,221,232
    310 .byte	102,15,56,221,240
    311 .byte	102,15,56,221,248
    312 	.byte	0xf3,0xc3
    313 .size	_aesni_encrypt6,.-_aesni_encrypt6
    314 .type	_aesni_decrypt6,@function
    315 .align	16
    316 _aesni_decrypt6:
    317 	movups	(%rcx),%xmm0
    318 	shll	$4,%eax
    319 	movups	16(%rcx),%xmm1
    320 	xorps	%xmm0,%xmm2
    321 	pxor	%xmm0,%xmm3
    322 	pxor	%xmm0,%xmm4
    323 .byte	102,15,56,222,209
    324 	leaq	32(%rcx,%rax,1),%rcx
    325 	negq	%rax
    326 .byte	102,15,56,222,217
    327 	pxor	%xmm0,%xmm5
    328 	pxor	%xmm0,%xmm6
    329 .byte	102,15,56,222,225
    330 	pxor	%xmm0,%xmm7
    331 	movups	(%rcx,%rax,1),%xmm0
    332 	addq	$16,%rax
    333 	jmp	.Ldec_loop6_enter
    334 .align	16
    335 .Ldec_loop6:
    336 .byte	102,15,56,222,209
    337 .byte	102,15,56,222,217
    338 .byte	102,15,56,222,225
    339 .Ldec_loop6_enter:
    340 .byte	102,15,56,222,233
    341 .byte	102,15,56,222,241
    342 .byte	102,15,56,222,249
    343 	movups	(%rcx,%rax,1),%xmm1
    344 	addq	$32,%rax
    345 .byte	102,15,56,222,208
    346 .byte	102,15,56,222,216
    347 .byte	102,15,56,222,224
    348 .byte	102,15,56,222,232
    349 .byte	102,15,56,222,240
    350 .byte	102,15,56,222,248
    351 	movups	-16(%rcx,%rax,1),%xmm0
    352 	jnz	.Ldec_loop6
    353 
    354 .byte	102,15,56,222,209
    355 .byte	102,15,56,222,217
    356 .byte	102,15,56,222,225
    357 .byte	102,15,56,222,233
    358 .byte	102,15,56,222,241
    359 .byte	102,15,56,222,249
    360 .byte	102,15,56,223,208
    361 .byte	102,15,56,223,216
    362 .byte	102,15,56,223,224
    363 .byte	102,15,56,223,232
    364 .byte	102,15,56,223,240
    365 .byte	102,15,56,223,248
    366 	.byte	0xf3,0xc3
    367 .size	_aesni_decrypt6,.-_aesni_decrypt6
    368 .type	_aesni_encrypt8,@function
    369 .align	16
    370 _aesni_encrypt8:
    371 	movups	(%rcx),%xmm0
    372 	shll	$4,%eax
    373 	movups	16(%rcx),%xmm1
    374 	xorps	%xmm0,%xmm2
    375 	xorps	%xmm0,%xmm3
    376 	pxor	%xmm0,%xmm4
    377 	pxor	%xmm0,%xmm5
    378 	pxor	%xmm0,%xmm6
    379 	leaq	32(%rcx,%rax,1),%rcx
    380 	negq	%rax
    381 .byte	102,15,56,220,209
    382 	pxor	%xmm0,%xmm7
    383 	pxor	%xmm0,%xmm8
    384 .byte	102,15,56,220,217
    385 	pxor	%xmm0,%xmm9
    386 	movups	(%rcx,%rax,1),%xmm0
    387 	addq	$16,%rax
    388 	jmp	.Lenc_loop8_inner
    389 .align	16
    390 .Lenc_loop8:
    391 .byte	102,15,56,220,209
    392 .byte	102,15,56,220,217
    393 .Lenc_loop8_inner:
    394 .byte	102,15,56,220,225
    395 .byte	102,15,56,220,233
    396 .byte	102,15,56,220,241
    397 .byte	102,15,56,220,249
    398 .byte	102,68,15,56,220,193
    399 .byte	102,68,15,56,220,201
    400 .Lenc_loop8_enter:
    401 	movups	(%rcx,%rax,1),%xmm1
    402 	addq	$32,%rax
    403 .byte	102,15,56,220,208
    404 .byte	102,15,56,220,216
    405 .byte	102,15,56,220,224
    406 .byte	102,15,56,220,232
    407 .byte	102,15,56,220,240
    408 .byte	102,15,56,220,248
    409 .byte	102,68,15,56,220,192
    410 .byte	102,68,15,56,220,200
    411 	movups	-16(%rcx,%rax,1),%xmm0
    412 	jnz	.Lenc_loop8
    413 
    414 .byte	102,15,56,220,209
    415 .byte	102,15,56,220,217
    416 .byte	102,15,56,220,225
    417 .byte	102,15,56,220,233
    418 .byte	102,15,56,220,241
    419 .byte	102,15,56,220,249
    420 .byte	102,68,15,56,220,193
    421 .byte	102,68,15,56,220,201
    422 .byte	102,15,56,221,208
    423 .byte	102,15,56,221,216
    424 .byte	102,15,56,221,224
    425 .byte	102,15,56,221,232
    426 .byte	102,15,56,221,240
    427 .byte	102,15,56,221,248
    428 .byte	102,68,15,56,221,192
    429 .byte	102,68,15,56,221,200
    430 	.byte	0xf3,0xc3
    431 .size	_aesni_encrypt8,.-_aesni_encrypt8
    432 .type	_aesni_decrypt8,@function
    433 .align	16
    434 _aesni_decrypt8:
    435 	movups	(%rcx),%xmm0
    436 	shll	$4,%eax
    437 	movups	16(%rcx),%xmm1
    438 	xorps	%xmm0,%xmm2
    439 	xorps	%xmm0,%xmm3
    440 	pxor	%xmm0,%xmm4
    441 	pxor	%xmm0,%xmm5
    442 	pxor	%xmm0,%xmm6
    443 	leaq	32(%rcx,%rax,1),%rcx
    444 	negq	%rax
    445 .byte	102,15,56,222,209
    446 	pxor	%xmm0,%xmm7
    447 	pxor	%xmm0,%xmm8
    448 .byte	102,15,56,222,217
    449 	pxor	%xmm0,%xmm9
    450 	movups	(%rcx,%rax,1),%xmm0
    451 	addq	$16,%rax
    452 	jmp	.Ldec_loop8_inner
    453 .align	16
    454 .Ldec_loop8:
    455 .byte	102,15,56,222,209
    456 .byte	102,15,56,222,217
    457 .Ldec_loop8_inner:
    458 .byte	102,15,56,222,225
    459 .byte	102,15,56,222,233
    460 .byte	102,15,56,222,241
    461 .byte	102,15,56,222,249
    462 .byte	102,68,15,56,222,193
    463 .byte	102,68,15,56,222,201
    464 .Ldec_loop8_enter:
    465 	movups	(%rcx,%rax,1),%xmm1
    466 	addq	$32,%rax
    467 .byte	102,15,56,222,208
    468 .byte	102,15,56,222,216
    469 .byte	102,15,56,222,224
    470 .byte	102,15,56,222,232
    471 .byte	102,15,56,222,240
    472 .byte	102,15,56,222,248
    473 .byte	102,68,15,56,222,192
    474 .byte	102,68,15,56,222,200
    475 	movups	-16(%rcx,%rax,1),%xmm0
    476 	jnz	.Ldec_loop8
    477 
    478 .byte	102,15,56,222,209
    479 .byte	102,15,56,222,217
    480 .byte	102,15,56,222,225
    481 .byte	102,15,56,222,233
    482 .byte	102,15,56,222,241
    483 .byte	102,15,56,222,249
    484 .byte	102,68,15,56,222,193
    485 .byte	102,68,15,56,222,201
    486 .byte	102,15,56,223,208
    487 .byte	102,15,56,223,216
    488 .byte	102,15,56,223,224
    489 .byte	102,15,56,223,232
    490 .byte	102,15,56,223,240
    491 .byte	102,15,56,223,248
    492 .byte	102,68,15,56,223,192
    493 .byte	102,68,15,56,223,200
    494 	.byte	0xf3,0xc3
    495 .size	_aesni_decrypt8,.-_aesni_decrypt8
    496 .globl	aesni_ecb_encrypt
    497 .hidden aesni_ecb_encrypt
    498 .type	aesni_ecb_encrypt,@function
    499 .align	16
    500 aesni_ecb_encrypt:
    501 	andq	$-16,%rdx
    502 	jz	.Lecb_ret
    503 
    504 	movl	240(%rcx),%eax
    505 	movups	(%rcx),%xmm0
    506 	movq	%rcx,%r11
    507 	movl	%eax,%r10d
    508 	testl	%r8d,%r8d
    509 	jz	.Lecb_decrypt
    510 
    511 	cmpq	$128,%rdx
    512 	jb	.Lecb_enc_tail
    513 
    514 	movdqu	(%rdi),%xmm2
    515 	movdqu	16(%rdi),%xmm3
    516 	movdqu	32(%rdi),%xmm4
    517 	movdqu	48(%rdi),%xmm5
    518 	movdqu	64(%rdi),%xmm6
    519 	movdqu	80(%rdi),%xmm7
    520 	movdqu	96(%rdi),%xmm8
    521 	movdqu	112(%rdi),%xmm9
    522 	leaq	128(%rdi),%rdi
    523 	subq	$128,%rdx
    524 	jmp	.Lecb_enc_loop8_enter
    525 .align	16
    526 .Lecb_enc_loop8:
    527 	movups	%xmm2,(%rsi)
    528 	movq	%r11,%rcx
    529 	movdqu	(%rdi),%xmm2
    530 	movl	%r10d,%eax
    531 	movups	%xmm3,16(%rsi)
    532 	movdqu	16(%rdi),%xmm3
    533 	movups	%xmm4,32(%rsi)
    534 	movdqu	32(%rdi),%xmm4
    535 	movups	%xmm5,48(%rsi)
    536 	movdqu	48(%rdi),%xmm5
    537 	movups	%xmm6,64(%rsi)
    538 	movdqu	64(%rdi),%xmm6
    539 	movups	%xmm7,80(%rsi)
    540 	movdqu	80(%rdi),%xmm7
    541 	movups	%xmm8,96(%rsi)
    542 	movdqu	96(%rdi),%xmm8
    543 	movups	%xmm9,112(%rsi)
    544 	leaq	128(%rsi),%rsi
    545 	movdqu	112(%rdi),%xmm9
    546 	leaq	128(%rdi),%rdi
    547 .Lecb_enc_loop8_enter:
    548 
    549 	call	_aesni_encrypt8
    550 
    551 	subq	$128,%rdx
    552 	jnc	.Lecb_enc_loop8
    553 
    554 	movups	%xmm2,(%rsi)
    555 	movq	%r11,%rcx
    556 	movups	%xmm3,16(%rsi)
    557 	movl	%r10d,%eax
    558 	movups	%xmm4,32(%rsi)
    559 	movups	%xmm5,48(%rsi)
    560 	movups	%xmm6,64(%rsi)
    561 	movups	%xmm7,80(%rsi)
    562 	movups	%xmm8,96(%rsi)
    563 	movups	%xmm9,112(%rsi)
    564 	leaq	128(%rsi),%rsi
    565 	addq	$128,%rdx
    566 	jz	.Lecb_ret
    567 
    568 .Lecb_enc_tail:
    569 	movups	(%rdi),%xmm2
    570 	cmpq	$32,%rdx
    571 	jb	.Lecb_enc_one
    572 	movups	16(%rdi),%xmm3
    573 	je	.Lecb_enc_two
    574 	movups	32(%rdi),%xmm4
    575 	cmpq	$64,%rdx
    576 	jb	.Lecb_enc_three
    577 	movups	48(%rdi),%xmm5
    578 	je	.Lecb_enc_four
    579 	movups	64(%rdi),%xmm6
    580 	cmpq	$96,%rdx
    581 	jb	.Lecb_enc_five
    582 	movups	80(%rdi),%xmm7
    583 	je	.Lecb_enc_six
    584 	movdqu	96(%rdi),%xmm8
    585 	xorps	%xmm9,%xmm9
    586 	call	_aesni_encrypt8
    587 	movups	%xmm2,(%rsi)
    588 	movups	%xmm3,16(%rsi)
    589 	movups	%xmm4,32(%rsi)
    590 	movups	%xmm5,48(%rsi)
    591 	movups	%xmm6,64(%rsi)
    592 	movups	%xmm7,80(%rsi)
    593 	movups	%xmm8,96(%rsi)
    594 	jmp	.Lecb_ret
    595 .align	16
    596 .Lecb_enc_one:
    597 	movups	(%rcx),%xmm0
    598 	movups	16(%rcx),%xmm1
    599 	leaq	32(%rcx),%rcx
    600 	xorps	%xmm0,%xmm2
    601 .Loop_enc1_3:
    602 .byte	102,15,56,220,209
    603 	decl	%eax
    604 	movups	(%rcx),%xmm1
    605 	leaq	16(%rcx),%rcx
    606 	jnz	.Loop_enc1_3
    607 .byte	102,15,56,221,209
    608 	movups	%xmm2,(%rsi)
    609 	jmp	.Lecb_ret
    610 .align	16
    611 .Lecb_enc_two:
    612 	call	_aesni_encrypt2
    613 	movups	%xmm2,(%rsi)
    614 	movups	%xmm3,16(%rsi)
    615 	jmp	.Lecb_ret
    616 .align	16
    617 .Lecb_enc_three:
    618 	call	_aesni_encrypt3
    619 	movups	%xmm2,(%rsi)
    620 	movups	%xmm3,16(%rsi)
    621 	movups	%xmm4,32(%rsi)
    622 	jmp	.Lecb_ret
    623 .align	16
    624 .Lecb_enc_four:
    625 	call	_aesni_encrypt4
    626 	movups	%xmm2,(%rsi)
    627 	movups	%xmm3,16(%rsi)
    628 	movups	%xmm4,32(%rsi)
    629 	movups	%xmm5,48(%rsi)
    630 	jmp	.Lecb_ret
    631 .align	16
    632 .Lecb_enc_five:
    633 	xorps	%xmm7,%xmm7
    634 	call	_aesni_encrypt6
    635 	movups	%xmm2,(%rsi)
    636 	movups	%xmm3,16(%rsi)
    637 	movups	%xmm4,32(%rsi)
    638 	movups	%xmm5,48(%rsi)
    639 	movups	%xmm6,64(%rsi)
    640 	jmp	.Lecb_ret
    641 .align	16
    642 .Lecb_enc_six:
    643 	call	_aesni_encrypt6
    644 	movups	%xmm2,(%rsi)
    645 	movups	%xmm3,16(%rsi)
    646 	movups	%xmm4,32(%rsi)
    647 	movups	%xmm5,48(%rsi)
    648 	movups	%xmm6,64(%rsi)
    649 	movups	%xmm7,80(%rsi)
    650 	jmp	.Lecb_ret
    651 
    652 .align	16
    653 .Lecb_decrypt:
    654 	cmpq	$128,%rdx
    655 	jb	.Lecb_dec_tail
    656 
    657 	movdqu	(%rdi),%xmm2
    658 	movdqu	16(%rdi),%xmm3
    659 	movdqu	32(%rdi),%xmm4
    660 	movdqu	48(%rdi),%xmm5
    661 	movdqu	64(%rdi),%xmm6
    662 	movdqu	80(%rdi),%xmm7
    663 	movdqu	96(%rdi),%xmm8
    664 	movdqu	112(%rdi),%xmm9
    665 	leaq	128(%rdi),%rdi
    666 	subq	$128,%rdx
    667 	jmp	.Lecb_dec_loop8_enter
    668 .align	16
    669 .Lecb_dec_loop8:
    670 	movups	%xmm2,(%rsi)
    671 	movq	%r11,%rcx
    672 	movdqu	(%rdi),%xmm2
    673 	movl	%r10d,%eax
    674 	movups	%xmm3,16(%rsi)
    675 	movdqu	16(%rdi),%xmm3
    676 	movups	%xmm4,32(%rsi)
    677 	movdqu	32(%rdi),%xmm4
    678 	movups	%xmm5,48(%rsi)
    679 	movdqu	48(%rdi),%xmm5
    680 	movups	%xmm6,64(%rsi)
    681 	movdqu	64(%rdi),%xmm6
    682 	movups	%xmm7,80(%rsi)
    683 	movdqu	80(%rdi),%xmm7
    684 	movups	%xmm8,96(%rsi)
    685 	movdqu	96(%rdi),%xmm8
    686 	movups	%xmm9,112(%rsi)
    687 	leaq	128(%rsi),%rsi
    688 	movdqu	112(%rdi),%xmm9
    689 	leaq	128(%rdi),%rdi
    690 .Lecb_dec_loop8_enter:
    691 
    692 	call	_aesni_decrypt8
    693 
    694 	movups	(%r11),%xmm0
    695 	subq	$128,%rdx
    696 	jnc	.Lecb_dec_loop8
    697 
    698 	movups	%xmm2,(%rsi)
    699 	pxor	%xmm2,%xmm2
    700 	movq	%r11,%rcx
    701 	movups	%xmm3,16(%rsi)
    702 	pxor	%xmm3,%xmm3
    703 	movl	%r10d,%eax
    704 	movups	%xmm4,32(%rsi)
    705 	pxor	%xmm4,%xmm4
    706 	movups	%xmm5,48(%rsi)
    707 	pxor	%xmm5,%xmm5
    708 	movups	%xmm6,64(%rsi)
    709 	pxor	%xmm6,%xmm6
    710 	movups	%xmm7,80(%rsi)
    711 	pxor	%xmm7,%xmm7
    712 	movups	%xmm8,96(%rsi)
    713 	pxor	%xmm8,%xmm8
    714 	movups	%xmm9,112(%rsi)
    715 	pxor	%xmm9,%xmm9
    716 	leaq	128(%rsi),%rsi
    717 	addq	$128,%rdx
    718 	jz	.Lecb_ret
    719 
    720 .Lecb_dec_tail:
    721 	movups	(%rdi),%xmm2
    722 	cmpq	$32,%rdx
    723 	jb	.Lecb_dec_one
    724 	movups	16(%rdi),%xmm3
    725 	je	.Lecb_dec_two
    726 	movups	32(%rdi),%xmm4
    727 	cmpq	$64,%rdx
    728 	jb	.Lecb_dec_three
    729 	movups	48(%rdi),%xmm5
    730 	je	.Lecb_dec_four
    731 	movups	64(%rdi),%xmm6
    732 	cmpq	$96,%rdx
    733 	jb	.Lecb_dec_five
    734 	movups	80(%rdi),%xmm7
    735 	je	.Lecb_dec_six
    736 	movups	96(%rdi),%xmm8
    737 	movups	(%rcx),%xmm0
    738 	xorps	%xmm9,%xmm9
    739 	call	_aesni_decrypt8
    740 	movups	%xmm2,(%rsi)
    741 	pxor	%xmm2,%xmm2
    742 	movups	%xmm3,16(%rsi)
    743 	pxor	%xmm3,%xmm3
    744 	movups	%xmm4,32(%rsi)
    745 	pxor	%xmm4,%xmm4
    746 	movups	%xmm5,48(%rsi)
    747 	pxor	%xmm5,%xmm5
    748 	movups	%xmm6,64(%rsi)
    749 	pxor	%xmm6,%xmm6
    750 	movups	%xmm7,80(%rsi)
    751 	pxor	%xmm7,%xmm7
    752 	movups	%xmm8,96(%rsi)
    753 	pxor	%xmm8,%xmm8
    754 	pxor	%xmm9,%xmm9
    755 	jmp	.Lecb_ret
    756 .align	16
    757 .Lecb_dec_one:
    758 	movups	(%rcx),%xmm0
    759 	movups	16(%rcx),%xmm1
    760 	leaq	32(%rcx),%rcx
    761 	xorps	%xmm0,%xmm2
    762 .Loop_dec1_4:
    763 .byte	102,15,56,222,209
    764 	decl	%eax
    765 	movups	(%rcx),%xmm1
    766 	leaq	16(%rcx),%rcx
    767 	jnz	.Loop_dec1_4
    768 .byte	102,15,56,223,209
    769 	movups	%xmm2,(%rsi)
    770 	pxor	%xmm2,%xmm2
    771 	jmp	.Lecb_ret
    772 .align	16
    773 .Lecb_dec_two:
    774 	call	_aesni_decrypt2
    775 	movups	%xmm2,(%rsi)
    776 	pxor	%xmm2,%xmm2
    777 	movups	%xmm3,16(%rsi)
    778 	pxor	%xmm3,%xmm3
    779 	jmp	.Lecb_ret
    780 .align	16
    781 .Lecb_dec_three:
    782 	call	_aesni_decrypt3
    783 	movups	%xmm2,(%rsi)
    784 	pxor	%xmm2,%xmm2
    785 	movups	%xmm3,16(%rsi)
    786 	pxor	%xmm3,%xmm3
    787 	movups	%xmm4,32(%rsi)
    788 	pxor	%xmm4,%xmm4
    789 	jmp	.Lecb_ret
    790 .align	16
    791 .Lecb_dec_four:
    792 	call	_aesni_decrypt4
    793 	movups	%xmm2,(%rsi)
    794 	pxor	%xmm2,%xmm2
    795 	movups	%xmm3,16(%rsi)
    796 	pxor	%xmm3,%xmm3
    797 	movups	%xmm4,32(%rsi)
    798 	pxor	%xmm4,%xmm4
    799 	movups	%xmm5,48(%rsi)
    800 	pxor	%xmm5,%xmm5
    801 	jmp	.Lecb_ret
    802 .align	16
    803 .Lecb_dec_five:
    804 	xorps	%xmm7,%xmm7
    805 	call	_aesni_decrypt6
    806 	movups	%xmm2,(%rsi)
    807 	pxor	%xmm2,%xmm2
    808 	movups	%xmm3,16(%rsi)
    809 	pxor	%xmm3,%xmm3
    810 	movups	%xmm4,32(%rsi)
    811 	pxor	%xmm4,%xmm4
    812 	movups	%xmm5,48(%rsi)
    813 	pxor	%xmm5,%xmm5
    814 	movups	%xmm6,64(%rsi)
    815 	pxor	%xmm6,%xmm6
    816 	pxor	%xmm7,%xmm7
    817 	jmp	.Lecb_ret
    818 .align	16
    819 .Lecb_dec_six:
    820 	call	_aesni_decrypt6
    821 	movups	%xmm2,(%rsi)
    822 	pxor	%xmm2,%xmm2
    823 	movups	%xmm3,16(%rsi)
    824 	pxor	%xmm3,%xmm3
    825 	movups	%xmm4,32(%rsi)
    826 	pxor	%xmm4,%xmm4
    827 	movups	%xmm5,48(%rsi)
    828 	pxor	%xmm5,%xmm5
    829 	movups	%xmm6,64(%rsi)
    830 	pxor	%xmm6,%xmm6
    831 	movups	%xmm7,80(%rsi)
    832 	pxor	%xmm7,%xmm7
    833 
    834 .Lecb_ret:
    835 	xorps	%xmm0,%xmm0
    836 	pxor	%xmm1,%xmm1
    837 	.byte	0xf3,0xc3
    838 .size	aesni_ecb_encrypt,.-aesni_ecb_encrypt
    839 .globl	aesni_ccm64_encrypt_blocks
    840 .hidden aesni_ccm64_encrypt_blocks
    841 .type	aesni_ccm64_encrypt_blocks,@function
    842 .align	16
    843 aesni_ccm64_encrypt_blocks:
    844 	movl	240(%rcx),%eax
    845 	movdqu	(%r8),%xmm6
    846 	movdqa	.Lincrement64(%rip),%xmm9
    847 	movdqa	.Lbswap_mask(%rip),%xmm7
    848 
    849 	shll	$4,%eax
    850 	movl	$16,%r10d
    851 	leaq	0(%rcx),%r11
    852 	movdqu	(%r9),%xmm3
    853 	movdqa	%xmm6,%xmm2
    854 	leaq	32(%rcx,%rax,1),%rcx
    855 .byte	102,15,56,0,247
    856 	subq	%rax,%r10
    857 	jmp	.Lccm64_enc_outer
    858 .align	16
    859 .Lccm64_enc_outer:
    860 	movups	(%r11),%xmm0
    861 	movq	%r10,%rax
    862 	movups	(%rdi),%xmm8
    863 
    864 	xorps	%xmm0,%xmm2
    865 	movups	16(%r11),%xmm1
    866 	xorps	%xmm8,%xmm0
    867 	xorps	%xmm0,%xmm3
    868 	movups	32(%r11),%xmm0
    869 
    870 .Lccm64_enc2_loop:
    871 .byte	102,15,56,220,209
    872 .byte	102,15,56,220,217
    873 	movups	(%rcx,%rax,1),%xmm1
    874 	addq	$32,%rax
    875 .byte	102,15,56,220,208
    876 .byte	102,15,56,220,216
    877 	movups	-16(%rcx,%rax,1),%xmm0
    878 	jnz	.Lccm64_enc2_loop
    879 .byte	102,15,56,220,209
    880 .byte	102,15,56,220,217
    881 	paddq	%xmm9,%xmm6
    882 	decq	%rdx
    883 .byte	102,15,56,221,208
    884 .byte	102,15,56,221,216
    885 
    886 	leaq	16(%rdi),%rdi
    887 	xorps	%xmm2,%xmm8
    888 	movdqa	%xmm6,%xmm2
    889 	movups	%xmm8,(%rsi)
    890 .byte	102,15,56,0,215
    891 	leaq	16(%rsi),%rsi
    892 	jnz	.Lccm64_enc_outer
    893 
    894 	pxor	%xmm0,%xmm0
    895 	pxor	%xmm1,%xmm1
    896 	pxor	%xmm2,%xmm2
    897 	movups	%xmm3,(%r9)
    898 	pxor	%xmm3,%xmm3
    899 	pxor	%xmm8,%xmm8
    900 	pxor	%xmm6,%xmm6
    901 	.byte	0xf3,0xc3
    902 .size	aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
    903 .globl	aesni_ccm64_decrypt_blocks
    904 .hidden aesni_ccm64_decrypt_blocks
    905 .type	aesni_ccm64_decrypt_blocks,@function
    906 .align	16
    907 aesni_ccm64_decrypt_blocks:
    908 	movl	240(%rcx),%eax
    909 	movups	(%r8),%xmm6
    910 	movdqu	(%r9),%xmm3
    911 	movdqa	.Lincrement64(%rip),%xmm9
    912 	movdqa	.Lbswap_mask(%rip),%xmm7
    913 
    914 	movaps	%xmm6,%xmm2
    915 	movl	%eax,%r10d
    916 	movq	%rcx,%r11
    917 .byte	102,15,56,0,247
    918 	movups	(%rcx),%xmm0
    919 	movups	16(%rcx),%xmm1
    920 	leaq	32(%rcx),%rcx
    921 	xorps	%xmm0,%xmm2
    922 .Loop_enc1_5:
    923 .byte	102,15,56,220,209
    924 	decl	%eax
    925 	movups	(%rcx),%xmm1
    926 	leaq	16(%rcx),%rcx
    927 	jnz	.Loop_enc1_5
    928 .byte	102,15,56,221,209
    929 	shll	$4,%r10d
    930 	movl	$16,%eax
    931 	movups	(%rdi),%xmm8
    932 	paddq	%xmm9,%xmm6
    933 	leaq	16(%rdi),%rdi
    934 	subq	%r10,%rax
    935 	leaq	32(%r11,%r10,1),%rcx
    936 	movq	%rax,%r10
    937 	jmp	.Lccm64_dec_outer
    938 .align	16
    939 .Lccm64_dec_outer:
    940 	xorps	%xmm2,%xmm8
    941 	movdqa	%xmm6,%xmm2
    942 	movups	%xmm8,(%rsi)
    943 	leaq	16(%rsi),%rsi
    944 .byte	102,15,56,0,215
    945 
    946 	subq	$1,%rdx
    947 	jz	.Lccm64_dec_break
    948 
    949 	movups	(%r11),%xmm0
    950 	movq	%r10,%rax
    951 	movups	16(%r11),%xmm1
    952 	xorps	%xmm0,%xmm8
    953 	xorps	%xmm0,%xmm2
    954 	xorps	%xmm8,%xmm3
    955 	movups	32(%r11),%xmm0
    956 	jmp	.Lccm64_dec2_loop
    957 .align	16
    958 .Lccm64_dec2_loop:
    959 .byte	102,15,56,220,209
    960 .byte	102,15,56,220,217
    961 	movups	(%rcx,%rax,1),%xmm1
    962 	addq	$32,%rax
    963 .byte	102,15,56,220,208
    964 .byte	102,15,56,220,216
    965 	movups	-16(%rcx,%rax,1),%xmm0
    966 	jnz	.Lccm64_dec2_loop
    967 	movups	(%rdi),%xmm8
    968 	paddq	%xmm9,%xmm6
    969 .byte	102,15,56,220,209
    970 .byte	102,15,56,220,217
    971 .byte	102,15,56,221,208
    972 .byte	102,15,56,221,216
    973 	leaq	16(%rdi),%rdi
    974 	jmp	.Lccm64_dec_outer
    975 
    976 .align	16
    977 .Lccm64_dec_break:
    978 
    979 	movl	240(%r11),%eax
    980 	movups	(%r11),%xmm0
    981 	movups	16(%r11),%xmm1
    982 	xorps	%xmm0,%xmm8
    983 	leaq	32(%r11),%r11
    984 	xorps	%xmm8,%xmm3
    985 .Loop_enc1_6:
    986 .byte	102,15,56,220,217
    987 	decl	%eax
    988 	movups	(%r11),%xmm1
    989 	leaq	16(%r11),%r11
    990 	jnz	.Loop_enc1_6
    991 .byte	102,15,56,221,217
    992 	pxor	%xmm0,%xmm0
    993 	pxor	%xmm1,%xmm1
    994 	pxor	%xmm2,%xmm2
    995 	movups	%xmm3,(%r9)
    996 	pxor	%xmm3,%xmm3
    997 	pxor	%xmm8,%xmm8
    998 	pxor	%xmm6,%xmm6
    999 	.byte	0xf3,0xc3
   1000 .size	aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
   1001 .globl	aesni_ctr32_encrypt_blocks
   1002 .hidden aesni_ctr32_encrypt_blocks
   1003 .type	aesni_ctr32_encrypt_blocks,@function
   1004 .align	16
   1005 aesni_ctr32_encrypt_blocks:
   1006 	cmpq	$1,%rdx
   1007 	jne	.Lctr32_bulk
   1008 
   1009 
   1010 
   1011 	movups	(%r8),%xmm2
   1012 	movups	(%rdi),%xmm3
   1013 	movl	240(%rcx),%edx
   1014 	movups	(%rcx),%xmm0
   1015 	movups	16(%rcx),%xmm1
   1016 	leaq	32(%rcx),%rcx
   1017 	xorps	%xmm0,%xmm2
   1018 .Loop_enc1_7:
   1019 .byte	102,15,56,220,209
   1020 	decl	%edx
   1021 	movups	(%rcx),%xmm1
   1022 	leaq	16(%rcx),%rcx
   1023 	jnz	.Loop_enc1_7
   1024 .byte	102,15,56,221,209
   1025 	pxor	%xmm0,%xmm0
   1026 	pxor	%xmm1,%xmm1
   1027 	xorps	%xmm3,%xmm2
   1028 	pxor	%xmm3,%xmm3
   1029 	movups	%xmm2,(%rsi)
   1030 	xorps	%xmm2,%xmm2
   1031 	jmp	.Lctr32_epilogue
   1032 
   1033 .align	16
   1034 .Lctr32_bulk:
   1035 	leaq	(%rsp),%rax
   1036 	pushq	%rbp
   1037 	subq	$128,%rsp
   1038 	andq	$-16,%rsp
   1039 	leaq	-8(%rax),%rbp
   1040 
   1041 
   1042 
   1043 
   1044 	movdqu	(%r8),%xmm2
   1045 	movdqu	(%rcx),%xmm0
   1046 	movl	12(%r8),%r8d
   1047 	pxor	%xmm0,%xmm2
   1048 	movl	12(%rcx),%r11d
   1049 	movdqa	%xmm2,0(%rsp)
   1050 	bswapl	%r8d
   1051 	movdqa	%xmm2,%xmm3
   1052 	movdqa	%xmm2,%xmm4
   1053 	movdqa	%xmm2,%xmm5
   1054 	movdqa	%xmm2,64(%rsp)
   1055 	movdqa	%xmm2,80(%rsp)
   1056 	movdqa	%xmm2,96(%rsp)
   1057 	movq	%rdx,%r10
   1058 	movdqa	%xmm2,112(%rsp)
   1059 
   1060 	leaq	1(%r8),%rax
   1061 	leaq	2(%r8),%rdx
   1062 	bswapl	%eax
   1063 	bswapl	%edx
   1064 	xorl	%r11d,%eax
   1065 	xorl	%r11d,%edx
   1066 .byte	102,15,58,34,216,3
   1067 	leaq	3(%r8),%rax
   1068 	movdqa	%xmm3,16(%rsp)
   1069 .byte	102,15,58,34,226,3
   1070 	bswapl	%eax
   1071 	movq	%r10,%rdx
   1072 	leaq	4(%r8),%r10
   1073 	movdqa	%xmm4,32(%rsp)
   1074 	xorl	%r11d,%eax
   1075 	bswapl	%r10d
   1076 .byte	102,15,58,34,232,3
   1077 	xorl	%r11d,%r10d
   1078 	movdqa	%xmm5,48(%rsp)
   1079 	leaq	5(%r8),%r9
   1080 	movl	%r10d,64+12(%rsp)
   1081 	bswapl	%r9d
   1082 	leaq	6(%r8),%r10
   1083 	movl	240(%rcx),%eax
   1084 	xorl	%r11d,%r9d
   1085 	bswapl	%r10d
   1086 	movl	%r9d,80+12(%rsp)
   1087 	xorl	%r11d,%r10d
   1088 	leaq	7(%r8),%r9
   1089 	movl	%r10d,96+12(%rsp)
   1090 	bswapl	%r9d
   1091 	movl	OPENSSL_ia32cap_P+4(%rip),%r10d
   1092 	xorl	%r11d,%r9d
   1093 	andl	$71303168,%r10d
   1094 	movl	%r9d,112+12(%rsp)
   1095 
   1096 	movups	16(%rcx),%xmm1
   1097 
   1098 	movdqa	64(%rsp),%xmm6
   1099 	movdqa	80(%rsp),%xmm7
   1100 
   1101 	cmpq	$8,%rdx
   1102 	jb	.Lctr32_tail
   1103 
   1104 	subq	$6,%rdx
   1105 	cmpl	$4194304,%r10d
   1106 	je	.Lctr32_6x
   1107 
   1108 	leaq	128(%rcx),%rcx
   1109 	subq	$2,%rdx
   1110 	jmp	.Lctr32_loop8
   1111 
   1112 .align	16
   1113 .Lctr32_6x:
   1114 	shll	$4,%eax
   1115 	movl	$48,%r10d
   1116 	bswapl	%r11d
   1117 	leaq	32(%rcx,%rax,1),%rcx
   1118 	subq	%rax,%r10
   1119 	jmp	.Lctr32_loop6
   1120 
   1121 .align	16
   1122 .Lctr32_loop6:
   1123 	addl	$6,%r8d
   1124 	movups	-48(%rcx,%r10,1),%xmm0
   1125 .byte	102,15,56,220,209
   1126 	movl	%r8d,%eax
   1127 	xorl	%r11d,%eax
   1128 .byte	102,15,56,220,217
   1129 .byte	0x0f,0x38,0xf1,0x44,0x24,12
   1130 	leal	1(%r8),%eax
   1131 .byte	102,15,56,220,225
   1132 	xorl	%r11d,%eax
   1133 .byte	0x0f,0x38,0xf1,0x44,0x24,28
   1134 .byte	102,15,56,220,233
   1135 	leal	2(%r8),%eax
   1136 	xorl	%r11d,%eax
   1137 .byte	102,15,56,220,241
   1138 .byte	0x0f,0x38,0xf1,0x44,0x24,44
   1139 	leal	3(%r8),%eax
   1140 .byte	102,15,56,220,249
   1141 	movups	-32(%rcx,%r10,1),%xmm1
   1142 	xorl	%r11d,%eax
   1143 
   1144 .byte	102,15,56,220,208
   1145 .byte	0x0f,0x38,0xf1,0x44,0x24,60
   1146 	leal	4(%r8),%eax
   1147 .byte	102,15,56,220,216
   1148 	xorl	%r11d,%eax
   1149 .byte	0x0f,0x38,0xf1,0x44,0x24,76
   1150 .byte	102,15,56,220,224
   1151 	leal	5(%r8),%eax
   1152 	xorl	%r11d,%eax
   1153 .byte	102,15,56,220,232
   1154 .byte	0x0f,0x38,0xf1,0x44,0x24,92
   1155 	movq	%r10,%rax
   1156 .byte	102,15,56,220,240
   1157 .byte	102,15,56,220,248
   1158 	movups	-16(%rcx,%r10,1),%xmm0
   1159 
   1160 	call	.Lenc_loop6
   1161 
   1162 	movdqu	(%rdi),%xmm8
   1163 	movdqu	16(%rdi),%xmm9
   1164 	movdqu	32(%rdi),%xmm10
   1165 	movdqu	48(%rdi),%xmm11
   1166 	movdqu	64(%rdi),%xmm12
   1167 	movdqu	80(%rdi),%xmm13
   1168 	leaq	96(%rdi),%rdi
   1169 	movups	-64(%rcx,%r10,1),%xmm1
   1170 	pxor	%xmm2,%xmm8
   1171 	movaps	0(%rsp),%xmm2
   1172 	pxor	%xmm3,%xmm9
   1173 	movaps	16(%rsp),%xmm3
   1174 	pxor	%xmm4,%xmm10
   1175 	movaps	32(%rsp),%xmm4
   1176 	pxor	%xmm5,%xmm11
   1177 	movaps	48(%rsp),%xmm5
   1178 	pxor	%xmm6,%xmm12
   1179 	movaps	64(%rsp),%xmm6
   1180 	pxor	%xmm7,%xmm13
   1181 	movaps	80(%rsp),%xmm7
   1182 	movdqu	%xmm8,(%rsi)
   1183 	movdqu	%xmm9,16(%rsi)
   1184 	movdqu	%xmm10,32(%rsi)
   1185 	movdqu	%xmm11,48(%rsi)
   1186 	movdqu	%xmm12,64(%rsi)
   1187 	movdqu	%xmm13,80(%rsi)
   1188 	leaq	96(%rsi),%rsi
   1189 
   1190 	subq	$6,%rdx
   1191 	jnc	.Lctr32_loop6
   1192 
   1193 	addq	$6,%rdx
   1194 	jz	.Lctr32_done
   1195 
   1196 	leal	-48(%r10),%eax
   1197 	leaq	-80(%rcx,%r10,1),%rcx
   1198 	negl	%eax
   1199 	shrl	$4,%eax
   1200 	jmp	.Lctr32_tail
   1201 
   1202 .align	32
   1203 .Lctr32_loop8:
   1204 	addl	$8,%r8d
   1205 	movdqa	96(%rsp),%xmm8
   1206 .byte	102,15,56,220,209
   1207 	movl	%r8d,%r9d
   1208 	movdqa	112(%rsp),%xmm9
   1209 .byte	102,15,56,220,217
   1210 	bswapl	%r9d
   1211 	movups	32-128(%rcx),%xmm0
   1212 .byte	102,15,56,220,225
   1213 	xorl	%r11d,%r9d
   1214 	nop
   1215 .byte	102,15,56,220,233
   1216 	movl	%r9d,0+12(%rsp)
   1217 	leaq	1(%r8),%r9
   1218 .byte	102,15,56,220,241
   1219 .byte	102,15,56,220,249
   1220 .byte	102,68,15,56,220,193
   1221 .byte	102,68,15,56,220,201
   1222 	movups	48-128(%rcx),%xmm1
   1223 	bswapl	%r9d
   1224 .byte	102,15,56,220,208
   1225 .byte	102,15,56,220,216
   1226 	xorl	%r11d,%r9d
   1227 .byte	0x66,0x90
   1228 .byte	102,15,56,220,224
   1229 .byte	102,15,56,220,232
   1230 	movl	%r9d,16+12(%rsp)
   1231 	leaq	2(%r8),%r9
   1232 .byte	102,15,56,220,240
   1233 .byte	102,15,56,220,248
   1234 .byte	102,68,15,56,220,192
   1235 .byte	102,68,15,56,220,200
   1236 	movups	64-128(%rcx),%xmm0
   1237 	bswapl	%r9d
   1238 .byte	102,15,56,220,209
   1239 .byte	102,15,56,220,217
   1240 	xorl	%r11d,%r9d
   1241 .byte	0x66,0x90
   1242 .byte	102,15,56,220,225
   1243 .byte	102,15,56,220,233
   1244 	movl	%r9d,32+12(%rsp)
   1245 	leaq	3(%r8),%r9
   1246 .byte	102,15,56,220,241
   1247 .byte	102,15,56,220,249
   1248 .byte	102,68,15,56,220,193
   1249 .byte	102,68,15,56,220,201
   1250 	movups	80-128(%rcx),%xmm1
   1251 	bswapl	%r9d
   1252 .byte	102,15,56,220,208
   1253 .byte	102,15,56,220,216
   1254 	xorl	%r11d,%r9d
   1255 .byte	0x66,0x90
   1256 .byte	102,15,56,220,224
   1257 .byte	102,15,56,220,232
   1258 	movl	%r9d,48+12(%rsp)
   1259 	leaq	4(%r8),%r9
   1260 .byte	102,15,56,220,240
   1261 .byte	102,15,56,220,248
   1262 .byte	102,68,15,56,220,192
   1263 .byte	102,68,15,56,220,200
   1264 	movups	96-128(%rcx),%xmm0
   1265 	bswapl	%r9d
   1266 .byte	102,15,56,220,209
   1267 .byte	102,15,56,220,217
   1268 	xorl	%r11d,%r9d
   1269 .byte	0x66,0x90
   1270 .byte	102,15,56,220,225
   1271 .byte	102,15,56,220,233
   1272 	movl	%r9d,64+12(%rsp)
   1273 	leaq	5(%r8),%r9
   1274 .byte	102,15,56,220,241
   1275 .byte	102,15,56,220,249
   1276 .byte	102,68,15,56,220,193
   1277 .byte	102,68,15,56,220,201
   1278 	movups	112-128(%rcx),%xmm1
   1279 	bswapl	%r9d
   1280 .byte	102,15,56,220,208
   1281 .byte	102,15,56,220,216
   1282 	xorl	%r11d,%r9d
   1283 .byte	0x66,0x90
   1284 .byte	102,15,56,220,224
   1285 .byte	102,15,56,220,232
   1286 	movl	%r9d,80+12(%rsp)
   1287 	leaq	6(%r8),%r9
   1288 .byte	102,15,56,220,240
   1289 .byte	102,15,56,220,248
   1290 .byte	102,68,15,56,220,192
   1291 .byte	102,68,15,56,220,200
   1292 	movups	128-128(%rcx),%xmm0
   1293 	bswapl	%r9d
   1294 .byte	102,15,56,220,209
   1295 .byte	102,15,56,220,217
   1296 	xorl	%r11d,%r9d
   1297 .byte	0x66,0x90
   1298 .byte	102,15,56,220,225
   1299 .byte	102,15,56,220,233
   1300 	movl	%r9d,96+12(%rsp)
   1301 	leaq	7(%r8),%r9
   1302 .byte	102,15,56,220,241
   1303 .byte	102,15,56,220,249
   1304 .byte	102,68,15,56,220,193
   1305 .byte	102,68,15,56,220,201
   1306 	movups	144-128(%rcx),%xmm1
   1307 	bswapl	%r9d
   1308 .byte	102,15,56,220,208
   1309 .byte	102,15,56,220,216
   1310 .byte	102,15,56,220,224
   1311 	xorl	%r11d,%r9d
   1312 	movdqu	0(%rdi),%xmm10
   1313 .byte	102,15,56,220,232
   1314 	movl	%r9d,112+12(%rsp)
   1315 	cmpl	$11,%eax
   1316 .byte	102,15,56,220,240
   1317 .byte	102,15,56,220,248
   1318 .byte	102,68,15,56,220,192
   1319 .byte	102,68,15,56,220,200
   1320 	movups	160-128(%rcx),%xmm0
   1321 
   1322 	jb	.Lctr32_enc_done
   1323 
   1324 .byte	102,15,56,220,209
   1325 .byte	102,15,56,220,217
   1326 .byte	102,15,56,220,225
   1327 .byte	102,15,56,220,233
   1328 .byte	102,15,56,220,241
   1329 .byte	102,15,56,220,249
   1330 .byte	102,68,15,56,220,193
   1331 .byte	102,68,15,56,220,201
   1332 	movups	176-128(%rcx),%xmm1
   1333 
   1334 .byte	102,15,56,220,208
   1335 .byte	102,15,56,220,216
   1336 .byte	102,15,56,220,224
   1337 .byte	102,15,56,220,232
   1338 .byte	102,15,56,220,240
   1339 .byte	102,15,56,220,248
   1340 .byte	102,68,15,56,220,192
   1341 .byte	102,68,15,56,220,200
   1342 	movups	192-128(%rcx),%xmm0
   1343 	je	.Lctr32_enc_done
   1344 
   1345 .byte	102,15,56,220,209
   1346 .byte	102,15,56,220,217
   1347 .byte	102,15,56,220,225
   1348 .byte	102,15,56,220,233
   1349 .byte	102,15,56,220,241
   1350 .byte	102,15,56,220,249
   1351 .byte	102,68,15,56,220,193
   1352 .byte	102,68,15,56,220,201
   1353 	movups	208-128(%rcx),%xmm1
   1354 
   1355 .byte	102,15,56,220,208
   1356 .byte	102,15,56,220,216
   1357 .byte	102,15,56,220,224
   1358 .byte	102,15,56,220,232
   1359 .byte	102,15,56,220,240
   1360 .byte	102,15,56,220,248
   1361 .byte	102,68,15,56,220,192
   1362 .byte	102,68,15,56,220,200
   1363 	movups	224-128(%rcx),%xmm0
   1364 	jmp	.Lctr32_enc_done
   1365 
   1366 .align	16
   1367 .Lctr32_enc_done:
   1368 	movdqu	16(%rdi),%xmm11
   1369 	pxor	%xmm0,%xmm10
   1370 	movdqu	32(%rdi),%xmm12
   1371 	pxor	%xmm0,%xmm11
   1372 	movdqu	48(%rdi),%xmm13
   1373 	pxor	%xmm0,%xmm12
   1374 	movdqu	64(%rdi),%xmm14
   1375 	pxor	%xmm0,%xmm13
   1376 	movdqu	80(%rdi),%xmm15
   1377 	pxor	%xmm0,%xmm14
   1378 	pxor	%xmm0,%xmm15
   1379 .byte	102,15,56,220,209
   1380 .byte	102,15,56,220,217
   1381 .byte	102,15,56,220,225
   1382 .byte	102,15,56,220,233
   1383 .byte	102,15,56,220,241
   1384 .byte	102,15,56,220,249
   1385 .byte	102,68,15,56,220,193
   1386 .byte	102,68,15,56,220,201
   1387 	movdqu	96(%rdi),%xmm1
   1388 	leaq	128(%rdi),%rdi
   1389 
   1390 .byte	102,65,15,56,221,210
   1391 	pxor	%xmm0,%xmm1
   1392 	movdqu	112-128(%rdi),%xmm10
   1393 .byte	102,65,15,56,221,219
   1394 	pxor	%xmm0,%xmm10
   1395 	movdqa	0(%rsp),%xmm11
   1396 .byte	102,65,15,56,221,228
   1397 .byte	102,65,15,56,221,237
   1398 	movdqa	16(%rsp),%xmm12
   1399 	movdqa	32(%rsp),%xmm13
   1400 .byte	102,65,15,56,221,246
   1401 .byte	102,65,15,56,221,255
   1402 	movdqa	48(%rsp),%xmm14
   1403 	movdqa	64(%rsp),%xmm15
   1404 .byte	102,68,15,56,221,193
   1405 	movdqa	80(%rsp),%xmm0
   1406 	movups	16-128(%rcx),%xmm1
   1407 .byte	102,69,15,56,221,202
   1408 
   1409 	movups	%xmm2,(%rsi)
   1410 	movdqa	%xmm11,%xmm2
   1411 	movups	%xmm3,16(%rsi)
   1412 	movdqa	%xmm12,%xmm3
   1413 	movups	%xmm4,32(%rsi)
   1414 	movdqa	%xmm13,%xmm4
   1415 	movups	%xmm5,48(%rsi)
   1416 	movdqa	%xmm14,%xmm5
   1417 	movups	%xmm6,64(%rsi)
   1418 	movdqa	%xmm15,%xmm6
   1419 	movups	%xmm7,80(%rsi)
   1420 	movdqa	%xmm0,%xmm7
   1421 	movups	%xmm8,96(%rsi)
   1422 	movups	%xmm9,112(%rsi)
   1423 	leaq	128(%rsi),%rsi
   1424 
   1425 	subq	$8,%rdx
   1426 	jnc	.Lctr32_loop8
   1427 
   1428 	addq	$8,%rdx
   1429 	jz	.Lctr32_done
   1430 	leaq	-128(%rcx),%rcx
   1431 
   1432 .Lctr32_tail:
   1433 
   1434 
   1435 	leaq	16(%rcx),%rcx
   1436 	cmpq	$4,%rdx
   1437 	jb	.Lctr32_loop3
   1438 	je	.Lctr32_loop4
   1439 
   1440 
   1441 	shll	$4,%eax
   1442 	movdqa	96(%rsp),%xmm8
   1443 	pxor	%xmm9,%xmm9
   1444 
   1445 	movups	16(%rcx),%xmm0
   1446 .byte	102,15,56,220,209
   1447 .byte	102,15,56,220,217
   1448 	leaq	32-16(%rcx,%rax,1),%rcx
   1449 	negq	%rax
   1450 .byte	102,15,56,220,225
   1451 	addq	$16,%rax
   1452 	movups	(%rdi),%xmm10
   1453 .byte	102,15,56,220,233
   1454 .byte	102,15,56,220,241
   1455 	movups	16(%rdi),%xmm11
   1456 	movups	32(%rdi),%xmm12
   1457 .byte	102,15,56,220,249
   1458 .byte	102,68,15,56,220,193
   1459 
   1460 	call	.Lenc_loop8_enter
   1461 
   1462 	movdqu	48(%rdi),%xmm13
   1463 	pxor	%xmm10,%xmm2
   1464 	movdqu	64(%rdi),%xmm10
   1465 	pxor	%xmm11,%xmm3
   1466 	movdqu	%xmm2,(%rsi)
   1467 	pxor	%xmm12,%xmm4
   1468 	movdqu	%xmm3,16(%rsi)
   1469 	pxor	%xmm13,%xmm5
   1470 	movdqu	%xmm4,32(%rsi)
   1471 	pxor	%xmm10,%xmm6
   1472 	movdqu	%xmm5,48(%rsi)
   1473 	movdqu	%xmm6,64(%rsi)
   1474 	cmpq	$6,%rdx
   1475 	jb	.Lctr32_done
   1476 
   1477 	movups	80(%rdi),%xmm11
   1478 	xorps	%xmm11,%xmm7
   1479 	movups	%xmm7,80(%rsi)
   1480 	je	.Lctr32_done
   1481 
   1482 	movups	96(%rdi),%xmm12
   1483 	xorps	%xmm12,%xmm8
   1484 	movups	%xmm8,96(%rsi)
   1485 	jmp	.Lctr32_done
   1486 
   1487 .align	32
   1488 .Lctr32_loop4:
   1489 .byte	102,15,56,220,209
   1490 	leaq	16(%rcx),%rcx
   1491 	decl	%eax
   1492 .byte	102,15,56,220,217
   1493 .byte	102,15,56,220,225
   1494 .byte	102,15,56,220,233
   1495 	movups	(%rcx),%xmm1
   1496 	jnz	.Lctr32_loop4
   1497 .byte	102,15,56,221,209
   1498 .byte	102,15,56,221,217
   1499 	movups	(%rdi),%xmm10
   1500 	movups	16(%rdi),%xmm11
   1501 .byte	102,15,56,221,225
   1502 .byte	102,15,56,221,233
   1503 	movups	32(%rdi),%xmm12
   1504 	movups	48(%rdi),%xmm13
   1505 
   1506 	xorps	%xmm10,%xmm2
   1507 	movups	%xmm2,(%rsi)
   1508 	xorps	%xmm11,%xmm3
   1509 	movups	%xmm3,16(%rsi)
   1510 	pxor	%xmm12,%xmm4
   1511 	movdqu	%xmm4,32(%rsi)
   1512 	pxor	%xmm13,%xmm5
   1513 	movdqu	%xmm5,48(%rsi)
   1514 	jmp	.Lctr32_done
   1515 
   1516 .align	32
   1517 .Lctr32_loop3:
   1518 .byte	102,15,56,220,209
   1519 	leaq	16(%rcx),%rcx
   1520 	decl	%eax
   1521 .byte	102,15,56,220,217
   1522 .byte	102,15,56,220,225
   1523 	movups	(%rcx),%xmm1
   1524 	jnz	.Lctr32_loop3
   1525 .byte	102,15,56,221,209
   1526 .byte	102,15,56,221,217
   1527 .byte	102,15,56,221,225
   1528 
   1529 	movups	(%rdi),%xmm10
   1530 	xorps	%xmm10,%xmm2
   1531 	movups	%xmm2,(%rsi)
   1532 	cmpq	$2,%rdx
   1533 	jb	.Lctr32_done
   1534 
   1535 	movups	16(%rdi),%xmm11
   1536 	xorps	%xmm11,%xmm3
   1537 	movups	%xmm3,16(%rsi)
   1538 	je	.Lctr32_done
   1539 
   1540 	movups	32(%rdi),%xmm12
   1541 	xorps	%xmm12,%xmm4
   1542 	movups	%xmm4,32(%rsi)
   1543 
   1544 .Lctr32_done:
   1545 	xorps	%xmm0,%xmm0
   1546 	xorl	%r11d,%r11d
   1547 	pxor	%xmm1,%xmm1
   1548 	pxor	%xmm2,%xmm2
   1549 	pxor	%xmm3,%xmm3
   1550 	pxor	%xmm4,%xmm4
   1551 	pxor	%xmm5,%xmm5
   1552 	pxor	%xmm6,%xmm6
   1553 	pxor	%xmm7,%xmm7
   1554 	movaps	%xmm0,0(%rsp)
   1555 	pxor	%xmm8,%xmm8
   1556 	movaps	%xmm0,16(%rsp)
   1557 	pxor	%xmm9,%xmm9
   1558 	movaps	%xmm0,32(%rsp)
   1559 	pxor	%xmm10,%xmm10
   1560 	movaps	%xmm0,48(%rsp)
   1561 	pxor	%xmm11,%xmm11
   1562 	movaps	%xmm0,64(%rsp)
   1563 	pxor	%xmm12,%xmm12
   1564 	movaps	%xmm0,80(%rsp)
   1565 	pxor	%xmm13,%xmm13
   1566 	movaps	%xmm0,96(%rsp)
   1567 	pxor	%xmm14,%xmm14
   1568 	movaps	%xmm0,112(%rsp)
   1569 	pxor	%xmm15,%xmm15
   1570 	leaq	(%rbp),%rsp
   1571 	popq	%rbp
   1572 .Lctr32_epilogue:
   1573 	.byte	0xf3,0xc3
   1574 .size	aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
   1575 .globl	aesni_xts_encrypt
   1576 .hidden aesni_xts_encrypt
   1577 .type	aesni_xts_encrypt,@function
   1578 .align	16
   1579 aesni_xts_encrypt:
   1580 	leaq	(%rsp),%rax
   1581 	pushq	%rbp
   1582 	subq	$112,%rsp
   1583 	andq	$-16,%rsp
   1584 	leaq	-8(%rax),%rbp
   1585 	movups	(%r9),%xmm2
   1586 	movl	240(%r8),%eax
   1587 	movl	240(%rcx),%r10d
   1588 	movups	(%r8),%xmm0
   1589 	movups	16(%r8),%xmm1
   1590 	leaq	32(%r8),%r8
   1591 	xorps	%xmm0,%xmm2
   1592 .Loop_enc1_8:
   1593 .byte	102,15,56,220,209
   1594 	decl	%eax
   1595 	movups	(%r8),%xmm1
   1596 	leaq	16(%r8),%r8
   1597 	jnz	.Loop_enc1_8
   1598 .byte	102,15,56,221,209
   1599 	movups	(%rcx),%xmm0
   1600 	movq	%rcx,%r11
   1601 	movl	%r10d,%eax
   1602 	shll	$4,%r10d
   1603 	movq	%rdx,%r9
   1604 	andq	$-16,%rdx
   1605 
   1606 	movups	16(%rcx,%r10,1),%xmm1
   1607 
   1608 	movdqa	.Lxts_magic(%rip),%xmm8
   1609 	movdqa	%xmm2,%xmm15
   1610 	pshufd	$95,%xmm2,%xmm9
   1611 	pxor	%xmm0,%xmm1
   1612 	movdqa	%xmm9,%xmm14
   1613 	paddd	%xmm9,%xmm9
   1614 	movdqa	%xmm15,%xmm10
   1615 	psrad	$31,%xmm14
   1616 	paddq	%xmm15,%xmm15
   1617 	pand	%xmm8,%xmm14
   1618 	pxor	%xmm0,%xmm10
   1619 	pxor	%xmm14,%xmm15
   1620 	movdqa	%xmm9,%xmm14
   1621 	paddd	%xmm9,%xmm9
   1622 	movdqa	%xmm15,%xmm11
   1623 	psrad	$31,%xmm14
   1624 	paddq	%xmm15,%xmm15
   1625 	pand	%xmm8,%xmm14
   1626 	pxor	%xmm0,%xmm11
   1627 	pxor	%xmm14,%xmm15
   1628 	movdqa	%xmm9,%xmm14
   1629 	paddd	%xmm9,%xmm9
   1630 	movdqa	%xmm15,%xmm12
   1631 	psrad	$31,%xmm14
   1632 	paddq	%xmm15,%xmm15
   1633 	pand	%xmm8,%xmm14
   1634 	pxor	%xmm0,%xmm12
   1635 	pxor	%xmm14,%xmm15
   1636 	movdqa	%xmm9,%xmm14
   1637 	paddd	%xmm9,%xmm9
   1638 	movdqa	%xmm15,%xmm13
   1639 	psrad	$31,%xmm14
   1640 	paddq	%xmm15,%xmm15
   1641 	pand	%xmm8,%xmm14
   1642 	pxor	%xmm0,%xmm13
   1643 	pxor	%xmm14,%xmm15
   1644 	movdqa	%xmm15,%xmm14
   1645 	psrad	$31,%xmm9
   1646 	paddq	%xmm15,%xmm15
   1647 	pand	%xmm8,%xmm9
   1648 	pxor	%xmm0,%xmm14
   1649 	pxor	%xmm9,%xmm15
   1650 	movaps	%xmm1,96(%rsp)
   1651 
   1652 	subq	$96,%rdx
   1653 	jc	.Lxts_enc_short
   1654 
   1655 	movl	$16+96,%eax
   1656 	leaq	32(%r11,%r10,1),%rcx
   1657 	subq	%r10,%rax
   1658 	movups	16(%r11),%xmm1
   1659 	movq	%rax,%r10
   1660 	leaq	.Lxts_magic(%rip),%r8
   1661 	jmp	.Lxts_enc_grandloop
   1662 
   1663 .align	32
   1664 .Lxts_enc_grandloop:
   1665 	movdqu	0(%rdi),%xmm2
   1666 	movdqa	%xmm0,%xmm8
   1667 	movdqu	16(%rdi),%xmm3
   1668 	pxor	%xmm10,%xmm2
   1669 	movdqu	32(%rdi),%xmm4
   1670 	pxor	%xmm11,%xmm3
   1671 .byte	102,15,56,220,209
   1672 	movdqu	48(%rdi),%xmm5
   1673 	pxor	%xmm12,%xmm4
   1674 .byte	102,15,56,220,217
   1675 	movdqu	64(%rdi),%xmm6
   1676 	pxor	%xmm13,%xmm5
   1677 .byte	102,15,56,220,225
   1678 	movdqu	80(%rdi),%xmm7
   1679 	pxor	%xmm15,%xmm8
   1680 	movdqa	96(%rsp),%xmm9
   1681 	pxor	%xmm14,%xmm6
   1682 .byte	102,15,56,220,233
   1683 	movups	32(%r11),%xmm0
   1684 	leaq	96(%rdi),%rdi
   1685 	pxor	%xmm8,%xmm7
   1686 
   1687 	pxor	%xmm9,%xmm10
   1688 .byte	102,15,56,220,241
   1689 	pxor	%xmm9,%xmm11
   1690 	movdqa	%xmm10,0(%rsp)
   1691 .byte	102,15,56,220,249
   1692 	movups	48(%r11),%xmm1
   1693 	pxor	%xmm9,%xmm12
   1694 
   1695 .byte	102,15,56,220,208
   1696 	pxor	%xmm9,%xmm13
   1697 	movdqa	%xmm11,16(%rsp)
   1698 .byte	102,15,56,220,216
   1699 	pxor	%xmm9,%xmm14
   1700 	movdqa	%xmm12,32(%rsp)
   1701 .byte	102,15,56,220,224
   1702 .byte	102,15,56,220,232
   1703 	pxor	%xmm9,%xmm8
   1704 	movdqa	%xmm14,64(%rsp)
   1705 .byte	102,15,56,220,240
   1706 .byte	102,15,56,220,248
   1707 	movups	64(%r11),%xmm0
   1708 	movdqa	%xmm8,80(%rsp)
   1709 	pshufd	$95,%xmm15,%xmm9
   1710 	jmp	.Lxts_enc_loop6
   1711 .align	32
   1712 .Lxts_enc_loop6:
   1713 .byte	102,15,56,220,209
   1714 .byte	102,15,56,220,217
   1715 .byte	102,15,56,220,225
   1716 .byte	102,15,56,220,233
   1717 .byte	102,15,56,220,241
   1718 .byte	102,15,56,220,249
   1719 	movups	-64(%rcx,%rax,1),%xmm1
   1720 	addq	$32,%rax
   1721 
   1722 .byte	102,15,56,220,208
   1723 .byte	102,15,56,220,216
   1724 .byte	102,15,56,220,224
   1725 .byte	102,15,56,220,232
   1726 .byte	102,15,56,220,240
   1727 .byte	102,15,56,220,248
   1728 	movups	-80(%rcx,%rax,1),%xmm0
   1729 	jnz	.Lxts_enc_loop6
   1730 
   1731 	movdqa	(%r8),%xmm8
   1732 	movdqa	%xmm9,%xmm14
   1733 	paddd	%xmm9,%xmm9
   1734 .byte	102,15,56,220,209
   1735 	paddq	%xmm15,%xmm15
   1736 	psrad	$31,%xmm14
   1737 .byte	102,15,56,220,217
   1738 	pand	%xmm8,%xmm14
   1739 	movups	(%r11),%xmm10
   1740 .byte	102,15,56,220,225
   1741 .byte	102,15,56,220,233
   1742 .byte	102,15,56,220,241
   1743 	pxor	%xmm14,%xmm15
   1744 	movaps	%xmm10,%xmm11
   1745 .byte	102,15,56,220,249
   1746 	movups	-64(%rcx),%xmm1
   1747 
   1748 	movdqa	%xmm9,%xmm14
   1749 .byte	102,15,56,220,208
   1750 	paddd	%xmm9,%xmm9
   1751 	pxor	%xmm15,%xmm10
   1752 .byte	102,15,56,220,216
   1753 	psrad	$31,%xmm14
   1754 	paddq	%xmm15,%xmm15
   1755 .byte	102,15,56,220,224
   1756 .byte	102,15,56,220,232
   1757 	pand	%xmm8,%xmm14
   1758 	movaps	%xmm11,%xmm12
   1759 .byte	102,15,56,220,240
   1760 	pxor	%xmm14,%xmm15
   1761 	movdqa	%xmm9,%xmm14
   1762 .byte	102,15,56,220,248
   1763 	movups	-48(%rcx),%xmm0
   1764 
   1765 	paddd	%xmm9,%xmm9
   1766 .byte	102,15,56,220,209
   1767 	pxor	%xmm15,%xmm11
   1768 	psrad	$31,%xmm14
   1769 .byte	102,15,56,220,217
   1770 	paddq	%xmm15,%xmm15
   1771 	pand	%xmm8,%xmm14
   1772 .byte	102,15,56,220,225
   1773 .byte	102,15,56,220,233
   1774 	movdqa	%xmm13,48(%rsp)
   1775 	pxor	%xmm14,%xmm15
   1776 .byte	102,15,56,220,241
   1777 	movaps	%xmm12,%xmm13
   1778 	movdqa	%xmm9,%xmm14
   1779 .byte	102,15,56,220,249
   1780 	movups	-32(%rcx),%xmm1
   1781 
   1782 	paddd	%xmm9,%xmm9
   1783 .byte	102,15,56,220,208
   1784 	pxor	%xmm15,%xmm12
   1785 	psrad	$31,%xmm14
   1786 .byte	102,15,56,220,216
   1787 	paddq	%xmm15,%xmm15
   1788 	pand	%xmm8,%xmm14
   1789 .byte	102,15,56,220,224
   1790 .byte	102,15,56,220,232
   1791 .byte	102,15,56,220,240
   1792 	pxor	%xmm14,%xmm15
   1793 	movaps	%xmm13,%xmm14
   1794 .byte	102,15,56,220,248
   1795 
   1796 	movdqa	%xmm9,%xmm0
   1797 	paddd	%xmm9,%xmm9
   1798 .byte	102,15,56,220,209
   1799 	pxor	%xmm15,%xmm13
   1800 	psrad	$31,%xmm0
   1801 .byte	102,15,56,220,217
   1802 	paddq	%xmm15,%xmm15
   1803 	pand	%xmm8,%xmm0
   1804 .byte	102,15,56,220,225
   1805 .byte	102,15,56,220,233
   1806 	pxor	%xmm0,%xmm15
   1807 	movups	(%r11),%xmm0
   1808 .byte	102,15,56,220,241
   1809 .byte	102,15,56,220,249
   1810 	movups	16(%r11),%xmm1
   1811 
   1812 	pxor	%xmm15,%xmm14
   1813 .byte	102,15,56,221,84,36,0
   1814 	psrad	$31,%xmm9
   1815 	paddq	%xmm15,%xmm15
   1816 .byte	102,15,56,221,92,36,16
   1817 .byte	102,15,56,221,100,36,32
   1818 	pand	%xmm8,%xmm9
   1819 	movq	%r10,%rax
   1820 .byte	102,15,56,221,108,36,48
   1821 .byte	102,15,56,221,116,36,64
   1822 .byte	102,15,56,221,124,36,80
   1823 	pxor	%xmm9,%xmm15
   1824 
   1825 	leaq	96(%rsi),%rsi
   1826 	movups	%xmm2,-96(%rsi)
   1827 	movups	%xmm3,-80(%rsi)
   1828 	movups	%xmm4,-64(%rsi)
   1829 	movups	%xmm5,-48(%rsi)
   1830 	movups	%xmm6,-32(%rsi)
   1831 	movups	%xmm7,-16(%rsi)
   1832 	subq	$96,%rdx
   1833 	jnc	.Lxts_enc_grandloop
   1834 
   1835 	movl	$16+96,%eax
   1836 	subl	%r10d,%eax
   1837 	movq	%r11,%rcx
   1838 	shrl	$4,%eax
   1839 
   1840 .Lxts_enc_short:
   1841 
   1842 	movl	%eax,%r10d
   1843 	pxor	%xmm0,%xmm10
   1844 	addq	$96,%rdx
   1845 	jz	.Lxts_enc_done
   1846 
   1847 	pxor	%xmm0,%xmm11
   1848 	cmpq	$32,%rdx
   1849 	jb	.Lxts_enc_one
   1850 	pxor	%xmm0,%xmm12
   1851 	je	.Lxts_enc_two
   1852 
   1853 	pxor	%xmm0,%xmm13
   1854 	cmpq	$64,%rdx
   1855 	jb	.Lxts_enc_three
   1856 	pxor	%xmm0,%xmm14
   1857 	je	.Lxts_enc_four
   1858 
   1859 	movdqu	(%rdi),%xmm2
   1860 	movdqu	16(%rdi),%xmm3
   1861 	movdqu	32(%rdi),%xmm4
   1862 	pxor	%xmm10,%xmm2
   1863 	movdqu	48(%rdi),%xmm5
   1864 	pxor	%xmm11,%xmm3
   1865 	movdqu	64(%rdi),%xmm6
   1866 	leaq	80(%rdi),%rdi
   1867 	pxor	%xmm12,%xmm4
   1868 	pxor	%xmm13,%xmm5
   1869 	pxor	%xmm14,%xmm6
   1870 	pxor	%xmm7,%xmm7
   1871 
   1872 	call	_aesni_encrypt6
   1873 
   1874 	xorps	%xmm10,%xmm2
   1875 	movdqa	%xmm15,%xmm10
   1876 	xorps	%xmm11,%xmm3
   1877 	xorps	%xmm12,%xmm4
   1878 	movdqu	%xmm2,(%rsi)
   1879 	xorps	%xmm13,%xmm5
   1880 	movdqu	%xmm3,16(%rsi)
   1881 	xorps	%xmm14,%xmm6
   1882 	movdqu	%xmm4,32(%rsi)
   1883 	movdqu	%xmm5,48(%rsi)
   1884 	movdqu	%xmm6,64(%rsi)
   1885 	leaq	80(%rsi),%rsi
   1886 	jmp	.Lxts_enc_done
   1887 
   1888 .align	16
   1889 .Lxts_enc_one:
   1890 	movups	(%rdi),%xmm2
   1891 	leaq	16(%rdi),%rdi
   1892 	xorps	%xmm10,%xmm2
   1893 	movups	(%rcx),%xmm0
   1894 	movups	16(%rcx),%xmm1
   1895 	leaq	32(%rcx),%rcx
   1896 	xorps	%xmm0,%xmm2
   1897 .Loop_enc1_9:
   1898 .byte	102,15,56,220,209
   1899 	decl	%eax
   1900 	movups	(%rcx),%xmm1
   1901 	leaq	16(%rcx),%rcx
   1902 	jnz	.Loop_enc1_9
   1903 .byte	102,15,56,221,209
   1904 	xorps	%xmm10,%xmm2
   1905 	movdqa	%xmm11,%xmm10
   1906 	movups	%xmm2,(%rsi)
   1907 	leaq	16(%rsi),%rsi
   1908 	jmp	.Lxts_enc_done
   1909 
   1910 .align	16
   1911 .Lxts_enc_two:
   1912 	movups	(%rdi),%xmm2
   1913 	movups	16(%rdi),%xmm3
   1914 	leaq	32(%rdi),%rdi
   1915 	xorps	%xmm10,%xmm2
   1916 	xorps	%xmm11,%xmm3
   1917 
   1918 	call	_aesni_encrypt2
   1919 
   1920 	xorps	%xmm10,%xmm2
   1921 	movdqa	%xmm12,%xmm10
   1922 	xorps	%xmm11,%xmm3
   1923 	movups	%xmm2,(%rsi)
   1924 	movups	%xmm3,16(%rsi)
   1925 	leaq	32(%rsi),%rsi
   1926 	jmp	.Lxts_enc_done
   1927 
   1928 .align	16
   1929 .Lxts_enc_three:
   1930 	movups	(%rdi),%xmm2
   1931 	movups	16(%rdi),%xmm3
   1932 	movups	32(%rdi),%xmm4
   1933 	leaq	48(%rdi),%rdi
   1934 	xorps	%xmm10,%xmm2
   1935 	xorps	%xmm11,%xmm3
   1936 	xorps	%xmm12,%xmm4
   1937 
   1938 	call	_aesni_encrypt3
   1939 
   1940 	xorps	%xmm10,%xmm2
   1941 	movdqa	%xmm13,%xmm10
   1942 	xorps	%xmm11,%xmm3
   1943 	xorps	%xmm12,%xmm4
   1944 	movups	%xmm2,(%rsi)
   1945 	movups	%xmm3,16(%rsi)
   1946 	movups	%xmm4,32(%rsi)
   1947 	leaq	48(%rsi),%rsi
   1948 	jmp	.Lxts_enc_done
   1949 
   1950 .align	16
   1951 .Lxts_enc_four:
   1952 	movups	(%rdi),%xmm2
   1953 	movups	16(%rdi),%xmm3
   1954 	movups	32(%rdi),%xmm4
   1955 	xorps	%xmm10,%xmm2
   1956 	movups	48(%rdi),%xmm5
   1957 	leaq	64(%rdi),%rdi
   1958 	xorps	%xmm11,%xmm3
   1959 	xorps	%xmm12,%xmm4
   1960 	xorps	%xmm13,%xmm5
   1961 
   1962 	call	_aesni_encrypt4
   1963 
   1964 	pxor	%xmm10,%xmm2
   1965 	movdqa	%xmm14,%xmm10
   1966 	pxor	%xmm11,%xmm3
   1967 	pxor	%xmm12,%xmm4
   1968 	movdqu	%xmm2,(%rsi)
   1969 	pxor	%xmm13,%xmm5
   1970 	movdqu	%xmm3,16(%rsi)
   1971 	movdqu	%xmm4,32(%rsi)
   1972 	movdqu	%xmm5,48(%rsi)
   1973 	leaq	64(%rsi),%rsi
   1974 	jmp	.Lxts_enc_done
   1975 
   1976 .align	16
   1977 .Lxts_enc_done:
   1978 	andq	$15,%r9
   1979 	jz	.Lxts_enc_ret
   1980 	movq	%r9,%rdx
   1981 
   1982 .Lxts_enc_steal:
   1983 	movzbl	(%rdi),%eax
   1984 	movzbl	-16(%rsi),%ecx
   1985 	leaq	1(%rdi),%rdi
   1986 	movb	%al,-16(%rsi)
   1987 	movb	%cl,0(%rsi)
   1988 	leaq	1(%rsi),%rsi
   1989 	subq	$1,%rdx
   1990 	jnz	.Lxts_enc_steal
   1991 
   1992 	subq	%r9,%rsi
   1993 	movq	%r11,%rcx
   1994 	movl	%r10d,%eax
   1995 
   1996 	movups	-16(%rsi),%xmm2
   1997 	xorps	%xmm10,%xmm2
   1998 	movups	(%rcx),%xmm0
   1999 	movups	16(%rcx),%xmm1
   2000 	leaq	32(%rcx),%rcx
   2001 	xorps	%xmm0,%xmm2
   2002 .Loop_enc1_10:
   2003 .byte	102,15,56,220,209
   2004 	decl	%eax
   2005 	movups	(%rcx),%xmm1
   2006 	leaq	16(%rcx),%rcx
   2007 	jnz	.Loop_enc1_10
   2008 .byte	102,15,56,221,209
   2009 	xorps	%xmm10,%xmm2
   2010 	movups	%xmm2,-16(%rsi)
   2011 
   2012 .Lxts_enc_ret:
   2013 	xorps	%xmm0,%xmm0
   2014 	pxor	%xmm1,%xmm1
   2015 	pxor	%xmm2,%xmm2
   2016 	pxor	%xmm3,%xmm3
   2017 	pxor	%xmm4,%xmm4
   2018 	pxor	%xmm5,%xmm5
   2019 	pxor	%xmm6,%xmm6
   2020 	pxor	%xmm7,%xmm7
   2021 	movaps	%xmm0,0(%rsp)
   2022 	pxor	%xmm8,%xmm8
   2023 	movaps	%xmm0,16(%rsp)
   2024 	pxor	%xmm9,%xmm9
   2025 	movaps	%xmm0,32(%rsp)
   2026 	pxor	%xmm10,%xmm10
   2027 	movaps	%xmm0,48(%rsp)
   2028 	pxor	%xmm11,%xmm11
   2029 	movaps	%xmm0,64(%rsp)
   2030 	pxor	%xmm12,%xmm12
   2031 	movaps	%xmm0,80(%rsp)
   2032 	pxor	%xmm13,%xmm13
   2033 	movaps	%xmm0,96(%rsp)
   2034 	pxor	%xmm14,%xmm14
   2035 	pxor	%xmm15,%xmm15
   2036 	leaq	(%rbp),%rsp
   2037 	popq	%rbp
   2038 .Lxts_enc_epilogue:
   2039 	.byte	0xf3,0xc3
   2040 .size	aesni_xts_encrypt,.-aesni_xts_encrypt
   2041 .globl	aesni_xts_decrypt
   2042 .hidden aesni_xts_decrypt
   2043 .type	aesni_xts_decrypt,@function
   2044 .align	16
   2045 aesni_xts_decrypt:
   2046 	leaq	(%rsp),%rax
   2047 	pushq	%rbp
   2048 	subq	$112,%rsp
   2049 	andq	$-16,%rsp
   2050 	leaq	-8(%rax),%rbp
   2051 	movups	(%r9),%xmm2
   2052 	movl	240(%r8),%eax
   2053 	movl	240(%rcx),%r10d
   2054 	movups	(%r8),%xmm0
   2055 	movups	16(%r8),%xmm1
   2056 	leaq	32(%r8),%r8
   2057 	xorps	%xmm0,%xmm2
   2058 .Loop_enc1_11:
   2059 .byte	102,15,56,220,209
   2060 	decl	%eax
   2061 	movups	(%r8),%xmm1
   2062 	leaq	16(%r8),%r8
   2063 	jnz	.Loop_enc1_11
   2064 .byte	102,15,56,221,209
   2065 	xorl	%eax,%eax
   2066 	testq	$15,%rdx
   2067 	setnz	%al
   2068 	shlq	$4,%rax
   2069 	subq	%rax,%rdx
   2070 
   2071 	movups	(%rcx),%xmm0
   2072 	movq	%rcx,%r11
   2073 	movl	%r10d,%eax
   2074 	shll	$4,%r10d
   2075 	movq	%rdx,%r9
   2076 	andq	$-16,%rdx
   2077 
   2078 	movups	16(%rcx,%r10,1),%xmm1
   2079 
   2080 	movdqa	.Lxts_magic(%rip),%xmm8
   2081 	movdqa	%xmm2,%xmm15
   2082 	pshufd	$95,%xmm2,%xmm9
   2083 	pxor	%xmm0,%xmm1
   2084 	movdqa	%xmm9,%xmm14
   2085 	paddd	%xmm9,%xmm9
   2086 	movdqa	%xmm15,%xmm10
   2087 	psrad	$31,%xmm14
   2088 	paddq	%xmm15,%xmm15
   2089 	pand	%xmm8,%xmm14
   2090 	pxor	%xmm0,%xmm10
   2091 	pxor	%xmm14,%xmm15
   2092 	movdqa	%xmm9,%xmm14
   2093 	paddd	%xmm9,%xmm9
   2094 	movdqa	%xmm15,%xmm11
   2095 	psrad	$31,%xmm14
   2096 	paddq	%xmm15,%xmm15
   2097 	pand	%xmm8,%xmm14
   2098 	pxor	%xmm0,%xmm11
   2099 	pxor	%xmm14,%xmm15
   2100 	movdqa	%xmm9,%xmm14
   2101 	paddd	%xmm9,%xmm9
   2102 	movdqa	%xmm15,%xmm12
   2103 	psrad	$31,%xmm14
   2104 	paddq	%xmm15,%xmm15
   2105 	pand	%xmm8,%xmm14
   2106 	pxor	%xmm0,%xmm12
   2107 	pxor	%xmm14,%xmm15
   2108 	movdqa	%xmm9,%xmm14
   2109 	paddd	%xmm9,%xmm9
   2110 	movdqa	%xmm15,%xmm13
   2111 	psrad	$31,%xmm14
   2112 	paddq	%xmm15,%xmm15
   2113 	pand	%xmm8,%xmm14
   2114 	pxor	%xmm0,%xmm13
   2115 	pxor	%xmm14,%xmm15
   2116 	movdqa	%xmm15,%xmm14
   2117 	psrad	$31,%xmm9
   2118 	paddq	%xmm15,%xmm15
   2119 	pand	%xmm8,%xmm9
   2120 	pxor	%xmm0,%xmm14
   2121 	pxor	%xmm9,%xmm15
   2122 	movaps	%xmm1,96(%rsp)
   2123 
   2124 	subq	$96,%rdx
   2125 	jc	.Lxts_dec_short
   2126 
   2127 	movl	$16+96,%eax
   2128 	leaq	32(%r11,%r10,1),%rcx
   2129 	subq	%r10,%rax
   2130 	movups	16(%r11),%xmm1
   2131 	movq	%rax,%r10
   2132 	leaq	.Lxts_magic(%rip),%r8
   2133 	jmp	.Lxts_dec_grandloop
   2134 
   2135 .align	32
   2136 .Lxts_dec_grandloop:
   2137 	movdqu	0(%rdi),%xmm2
   2138 	movdqa	%xmm0,%xmm8
   2139 	movdqu	16(%rdi),%xmm3
   2140 	pxor	%xmm10,%xmm2
   2141 	movdqu	32(%rdi),%xmm4
   2142 	pxor	%xmm11,%xmm3
   2143 .byte	102,15,56,222,209
   2144 	movdqu	48(%rdi),%xmm5
   2145 	pxor	%xmm12,%xmm4
   2146 .byte	102,15,56,222,217
   2147 	movdqu	64(%rdi),%xmm6
   2148 	pxor	%xmm13,%xmm5
   2149 .byte	102,15,56,222,225
   2150 	movdqu	80(%rdi),%xmm7
   2151 	pxor	%xmm15,%xmm8
   2152 	movdqa	96(%rsp),%xmm9
   2153 	pxor	%xmm14,%xmm6
   2154 .byte	102,15,56,222,233
   2155 	movups	32(%r11),%xmm0
   2156 	leaq	96(%rdi),%rdi
   2157 	pxor	%xmm8,%xmm7
   2158 
   2159 	pxor	%xmm9,%xmm10
   2160 .byte	102,15,56,222,241
   2161 	pxor	%xmm9,%xmm11
   2162 	movdqa	%xmm10,0(%rsp)
   2163 .byte	102,15,56,222,249
   2164 	movups	48(%r11),%xmm1
   2165 	pxor	%xmm9,%xmm12
   2166 
   2167 .byte	102,15,56,222,208
   2168 	pxor	%xmm9,%xmm13
   2169 	movdqa	%xmm11,16(%rsp)
   2170 .byte	102,15,56,222,216
   2171 	pxor	%xmm9,%xmm14
   2172 	movdqa	%xmm12,32(%rsp)
   2173 .byte	102,15,56,222,224
   2174 .byte	102,15,56,222,232
   2175 	pxor	%xmm9,%xmm8
   2176 	movdqa	%xmm14,64(%rsp)
   2177 .byte	102,15,56,222,240
   2178 .byte	102,15,56,222,248
   2179 	movups	64(%r11),%xmm0
   2180 	movdqa	%xmm8,80(%rsp)
   2181 	pshufd	$95,%xmm15,%xmm9
   2182 	jmp	.Lxts_dec_loop6
   2183 .align	32
   2184 .Lxts_dec_loop6:
   2185 .byte	102,15,56,222,209
   2186 .byte	102,15,56,222,217
   2187 .byte	102,15,56,222,225
   2188 .byte	102,15,56,222,233
   2189 .byte	102,15,56,222,241
   2190 .byte	102,15,56,222,249
   2191 	movups	-64(%rcx,%rax,1),%xmm1
   2192 	addq	$32,%rax
   2193 
   2194 .byte	102,15,56,222,208
   2195 .byte	102,15,56,222,216
   2196 .byte	102,15,56,222,224
   2197 .byte	102,15,56,222,232
   2198 .byte	102,15,56,222,240
   2199 .byte	102,15,56,222,248
   2200 	movups	-80(%rcx,%rax,1),%xmm0
   2201 	jnz	.Lxts_dec_loop6
   2202 
   2203 	movdqa	(%r8),%xmm8
   2204 	movdqa	%xmm9,%xmm14
   2205 	paddd	%xmm9,%xmm9
   2206 .byte	102,15,56,222,209
   2207 	paddq	%xmm15,%xmm15
   2208 	psrad	$31,%xmm14
   2209 .byte	102,15,56,222,217
   2210 	pand	%xmm8,%xmm14
   2211 	movups	(%r11),%xmm10
   2212 .byte	102,15,56,222,225
   2213 .byte	102,15,56,222,233
   2214 .byte	102,15,56,222,241
   2215 	pxor	%xmm14,%xmm15
   2216 	movaps	%xmm10,%xmm11
   2217 .byte	102,15,56,222,249
   2218 	movups	-64(%rcx),%xmm1
   2219 
   2220 	movdqa	%xmm9,%xmm14
   2221 .byte	102,15,56,222,208
   2222 	paddd	%xmm9,%xmm9
   2223 	pxor	%xmm15,%xmm10
   2224 .byte	102,15,56,222,216
   2225 	psrad	$31,%xmm14
   2226 	paddq	%xmm15,%xmm15
   2227 .byte	102,15,56,222,224
   2228 .byte	102,15,56,222,232
   2229 	pand	%xmm8,%xmm14
   2230 	movaps	%xmm11,%xmm12
   2231 .byte	102,15,56,222,240
   2232 	pxor	%xmm14,%xmm15
   2233 	movdqa	%xmm9,%xmm14
   2234 .byte	102,15,56,222,248
   2235 	movups	-48(%rcx),%xmm0
   2236 
   2237 	paddd	%xmm9,%xmm9
   2238 .byte	102,15,56,222,209
   2239 	pxor	%xmm15,%xmm11
   2240 	psrad	$31,%xmm14
   2241 .byte	102,15,56,222,217
   2242 	paddq	%xmm15,%xmm15
   2243 	pand	%xmm8,%xmm14
   2244 .byte	102,15,56,222,225
   2245 .byte	102,15,56,222,233
   2246 	movdqa	%xmm13,48(%rsp)
   2247 	pxor	%xmm14,%xmm15
   2248 .byte	102,15,56,222,241
   2249 	movaps	%xmm12,%xmm13
   2250 	movdqa	%xmm9,%xmm14
   2251 .byte	102,15,56,222,249
   2252 	movups	-32(%rcx),%xmm1
   2253 
   2254 	paddd	%xmm9,%xmm9
   2255 .byte	102,15,56,222,208
   2256 	pxor	%xmm15,%xmm12
   2257 	psrad	$31,%xmm14
   2258 .byte	102,15,56,222,216
   2259 	paddq	%xmm15,%xmm15
   2260 	pand	%xmm8,%xmm14
   2261 .byte	102,15,56,222,224
   2262 .byte	102,15,56,222,232
   2263 .byte	102,15,56,222,240
   2264 	pxor	%xmm14,%xmm15
   2265 	movaps	%xmm13,%xmm14
   2266 .byte	102,15,56,222,248
   2267 
   2268 	movdqa	%xmm9,%xmm0
   2269 	paddd	%xmm9,%xmm9
   2270 .byte	102,15,56,222,209
   2271 	pxor	%xmm15,%xmm13
   2272 	psrad	$31,%xmm0
   2273 .byte	102,15,56,222,217
   2274 	paddq	%xmm15,%xmm15
   2275 	pand	%xmm8,%xmm0
   2276 .byte	102,15,56,222,225
   2277 .byte	102,15,56,222,233
   2278 	pxor	%xmm0,%xmm15
   2279 	movups	(%r11),%xmm0
   2280 .byte	102,15,56,222,241
   2281 .byte	102,15,56,222,249
   2282 	movups	16(%r11),%xmm1
   2283 
   2284 	pxor	%xmm15,%xmm14
   2285 .byte	102,15,56,223,84,36,0
   2286 	psrad	$31,%xmm9
   2287 	paddq	%xmm15,%xmm15
   2288 .byte	102,15,56,223,92,36,16
   2289 .byte	102,15,56,223,100,36,32
   2290 	pand	%xmm8,%xmm9
   2291 	movq	%r10,%rax
   2292 .byte	102,15,56,223,108,36,48
   2293 .byte	102,15,56,223,116,36,64
   2294 .byte	102,15,56,223,124,36,80
   2295 	pxor	%xmm9,%xmm15
   2296 
   2297 	leaq	96(%rsi),%rsi
   2298 	movups	%xmm2,-96(%rsi)
   2299 	movups	%xmm3,-80(%rsi)
   2300 	movups	%xmm4,-64(%rsi)
   2301 	movups	%xmm5,-48(%rsi)
   2302 	movups	%xmm6,-32(%rsi)
   2303 	movups	%xmm7,-16(%rsi)
   2304 	subq	$96,%rdx
   2305 	jnc	.Lxts_dec_grandloop
   2306 
   2307 	movl	$16+96,%eax
   2308 	subl	%r10d,%eax
   2309 	movq	%r11,%rcx
   2310 	shrl	$4,%eax
   2311 
   2312 .Lxts_dec_short:
   2313 
   2314 	movl	%eax,%r10d
   2315 	pxor	%xmm0,%xmm10
   2316 	pxor	%xmm0,%xmm11
   2317 	addq	$96,%rdx
   2318 	jz	.Lxts_dec_done
   2319 
   2320 	pxor	%xmm0,%xmm12
   2321 	cmpq	$32,%rdx
   2322 	jb	.Lxts_dec_one
   2323 	pxor	%xmm0,%xmm13
   2324 	je	.Lxts_dec_two
   2325 
   2326 	pxor	%xmm0,%xmm14
   2327 	cmpq	$64,%rdx
   2328 	jb	.Lxts_dec_three
   2329 	je	.Lxts_dec_four
   2330 
   2331 	movdqu	(%rdi),%xmm2
   2332 	movdqu	16(%rdi),%xmm3
   2333 	movdqu	32(%rdi),%xmm4
   2334 	pxor	%xmm10,%xmm2
   2335 	movdqu	48(%rdi),%xmm5
   2336 	pxor	%xmm11,%xmm3
   2337 	movdqu	64(%rdi),%xmm6
   2338 	leaq	80(%rdi),%rdi
   2339 	pxor	%xmm12,%xmm4
   2340 	pxor	%xmm13,%xmm5
   2341 	pxor	%xmm14,%xmm6
   2342 
   2343 	call	_aesni_decrypt6
   2344 
   2345 	xorps	%xmm10,%xmm2
   2346 	xorps	%xmm11,%xmm3
   2347 	xorps	%xmm12,%xmm4
   2348 	movdqu	%xmm2,(%rsi)
   2349 	xorps	%xmm13,%xmm5
   2350 	movdqu	%xmm3,16(%rsi)
   2351 	xorps	%xmm14,%xmm6
   2352 	movdqu	%xmm4,32(%rsi)
   2353 	pxor	%xmm14,%xmm14
   2354 	movdqu	%xmm5,48(%rsi)
   2355 	pcmpgtd	%xmm15,%xmm14
   2356 	movdqu	%xmm6,64(%rsi)
   2357 	leaq	80(%rsi),%rsi
   2358 	pshufd	$19,%xmm14,%xmm11
   2359 	andq	$15,%r9
   2360 	jz	.Lxts_dec_ret
   2361 
   2362 	movdqa	%xmm15,%xmm10
   2363 	paddq	%xmm15,%xmm15
   2364 	pand	%xmm8,%xmm11
   2365 	pxor	%xmm15,%xmm11
   2366 	jmp	.Lxts_dec_done2
   2367 
   2368 .align	16
   2369 .Lxts_dec_one:
   2370 	movups	(%rdi),%xmm2
   2371 	leaq	16(%rdi),%rdi
   2372 	xorps	%xmm10,%xmm2
   2373 	movups	(%rcx),%xmm0
   2374 	movups	16(%rcx),%xmm1
   2375 	leaq	32(%rcx),%rcx
   2376 	xorps	%xmm0,%xmm2
   2377 .Loop_dec1_12:
   2378 .byte	102,15,56,222,209
   2379 	decl	%eax
   2380 	movups	(%rcx),%xmm1
   2381 	leaq	16(%rcx),%rcx
   2382 	jnz	.Loop_dec1_12
   2383 .byte	102,15,56,223,209
   2384 	xorps	%xmm10,%xmm2
   2385 	movdqa	%xmm11,%xmm10
   2386 	movups	%xmm2,(%rsi)
   2387 	movdqa	%xmm12,%xmm11
   2388 	leaq	16(%rsi),%rsi
   2389 	jmp	.Lxts_dec_done
   2390 
   2391 .align	16
   2392 .Lxts_dec_two:
   2393 	movups	(%rdi),%xmm2
   2394 	movups	16(%rdi),%xmm3
   2395 	leaq	32(%rdi),%rdi
   2396 	xorps	%xmm10,%xmm2
   2397 	xorps	%xmm11,%xmm3
   2398 
   2399 	call	_aesni_decrypt2
   2400 
   2401 	xorps	%xmm10,%xmm2
   2402 	movdqa	%xmm12,%xmm10
   2403 	xorps	%xmm11,%xmm3
   2404 	movdqa	%xmm13,%xmm11
   2405 	movups	%xmm2,(%rsi)
   2406 	movups	%xmm3,16(%rsi)
   2407 	leaq	32(%rsi),%rsi
   2408 	jmp	.Lxts_dec_done
   2409 
   2410 .align	16
   2411 .Lxts_dec_three:
   2412 	movups	(%rdi),%xmm2
   2413 	movups	16(%rdi),%xmm3
   2414 	movups	32(%rdi),%xmm4
   2415 	leaq	48(%rdi),%rdi
   2416 	xorps	%xmm10,%xmm2
   2417 	xorps	%xmm11,%xmm3
   2418 	xorps	%xmm12,%xmm4
   2419 
   2420 	call	_aesni_decrypt3
   2421 
   2422 	xorps	%xmm10,%xmm2
   2423 	movdqa	%xmm13,%xmm10
   2424 	xorps	%xmm11,%xmm3
   2425 	movdqa	%xmm14,%xmm11
   2426 	xorps	%xmm12,%xmm4
   2427 	movups	%xmm2,(%rsi)
   2428 	movups	%xmm3,16(%rsi)
   2429 	movups	%xmm4,32(%rsi)
   2430 	leaq	48(%rsi),%rsi
   2431 	jmp	.Lxts_dec_done
   2432 
   2433 .align	16
   2434 .Lxts_dec_four:
   2435 	movups	(%rdi),%xmm2
   2436 	movups	16(%rdi),%xmm3
   2437 	movups	32(%rdi),%xmm4
   2438 	xorps	%xmm10,%xmm2
   2439 	movups	48(%rdi),%xmm5
   2440 	leaq	64(%rdi),%rdi
   2441 	xorps	%xmm11,%xmm3
   2442 	xorps	%xmm12,%xmm4
   2443 	xorps	%xmm13,%xmm5
   2444 
   2445 	call	_aesni_decrypt4
   2446 
   2447 	pxor	%xmm10,%xmm2
   2448 	movdqa	%xmm14,%xmm10
   2449 	pxor	%xmm11,%xmm3
   2450 	movdqa	%xmm15,%xmm11
   2451 	pxor	%xmm12,%xmm4
   2452 	movdqu	%xmm2,(%rsi)
   2453 	pxor	%xmm13,%xmm5
   2454 	movdqu	%xmm3,16(%rsi)
   2455 	movdqu	%xmm4,32(%rsi)
   2456 	movdqu	%xmm5,48(%rsi)
   2457 	leaq	64(%rsi),%rsi
   2458 	jmp	.Lxts_dec_done
   2459 
   2460 .align	16
   2461 .Lxts_dec_done:
   2462 	andq	$15,%r9
   2463 	jz	.Lxts_dec_ret
   2464 .Lxts_dec_done2:
   2465 	movq	%r9,%rdx
   2466 	movq	%r11,%rcx
   2467 	movl	%r10d,%eax
   2468 
   2469 	movups	(%rdi),%xmm2
   2470 	xorps	%xmm11,%xmm2
   2471 	movups	(%rcx),%xmm0
   2472 	movups	16(%rcx),%xmm1
   2473 	leaq	32(%rcx),%rcx
   2474 	xorps	%xmm0,%xmm2
   2475 .Loop_dec1_13:
   2476 .byte	102,15,56,222,209
   2477 	decl	%eax
   2478 	movups	(%rcx),%xmm1
   2479 	leaq	16(%rcx),%rcx
   2480 	jnz	.Loop_dec1_13
   2481 .byte	102,15,56,223,209
   2482 	xorps	%xmm11,%xmm2
   2483 	movups	%xmm2,(%rsi)
   2484 
   2485 .Lxts_dec_steal:
   2486 	movzbl	16(%rdi),%eax
   2487 	movzbl	(%rsi),%ecx
   2488 	leaq	1(%rdi),%rdi
   2489 	movb	%al,(%rsi)
   2490 	movb	%cl,16(%rsi)
   2491 	leaq	1(%rsi),%rsi
   2492 	subq	$1,%rdx
   2493 	jnz	.Lxts_dec_steal
   2494 
   2495 	subq	%r9,%rsi
   2496 	movq	%r11,%rcx
   2497 	movl	%r10d,%eax
   2498 
   2499 	movups	(%rsi),%xmm2
   2500 	xorps	%xmm10,%xmm2
   2501 	movups	(%rcx),%xmm0
   2502 	movups	16(%rcx),%xmm1
   2503 	leaq	32(%rcx),%rcx
   2504 	xorps	%xmm0,%xmm2
   2505 .Loop_dec1_14:
   2506 .byte	102,15,56,222,209
   2507 	decl	%eax
   2508 	movups	(%rcx),%xmm1
   2509 	leaq	16(%rcx),%rcx
   2510 	jnz	.Loop_dec1_14
   2511 .byte	102,15,56,223,209
   2512 	xorps	%xmm10,%xmm2
   2513 	movups	%xmm2,(%rsi)
   2514 
   2515 .Lxts_dec_ret:
   2516 	xorps	%xmm0,%xmm0
   2517 	pxor	%xmm1,%xmm1
   2518 	pxor	%xmm2,%xmm2
   2519 	pxor	%xmm3,%xmm3
   2520 	pxor	%xmm4,%xmm4
   2521 	pxor	%xmm5,%xmm5
   2522 	pxor	%xmm6,%xmm6
   2523 	pxor	%xmm7,%xmm7
   2524 	movaps	%xmm0,0(%rsp)
   2525 	pxor	%xmm8,%xmm8
   2526 	movaps	%xmm0,16(%rsp)
   2527 	pxor	%xmm9,%xmm9
   2528 	movaps	%xmm0,32(%rsp)
   2529 	pxor	%xmm10,%xmm10
   2530 	movaps	%xmm0,48(%rsp)
   2531 	pxor	%xmm11,%xmm11
   2532 	movaps	%xmm0,64(%rsp)
   2533 	pxor	%xmm12,%xmm12
   2534 	movaps	%xmm0,80(%rsp)
   2535 	pxor	%xmm13,%xmm13
   2536 	movaps	%xmm0,96(%rsp)
   2537 	pxor	%xmm14,%xmm14
   2538 	pxor	%xmm15,%xmm15
   2539 	leaq	(%rbp),%rsp
   2540 	popq	%rbp
   2541 .Lxts_dec_epilogue:
   2542 	.byte	0xf3,0xc3
   2543 .size	aesni_xts_decrypt,.-aesni_xts_decrypt
   2544 .globl	aesni_cbc_encrypt
   2545 .hidden aesni_cbc_encrypt
   2546 .type	aesni_cbc_encrypt,@function
   2547 .align	16
   2548 aesni_cbc_encrypt:
   2549 	testq	%rdx,%rdx
   2550 	jz	.Lcbc_ret
   2551 
   2552 	movl	240(%rcx),%r10d
   2553 	movq	%rcx,%r11
   2554 	testl	%r9d,%r9d
   2555 	jz	.Lcbc_decrypt
   2556 
   2557 	movups	(%r8),%xmm2
   2558 	movl	%r10d,%eax
   2559 	cmpq	$16,%rdx
   2560 	jb	.Lcbc_enc_tail
   2561 	subq	$16,%rdx
   2562 	jmp	.Lcbc_enc_loop
   2563 .align	16
   2564 .Lcbc_enc_loop:
   2565 	movups	(%rdi),%xmm3
   2566 	leaq	16(%rdi),%rdi
   2567 
   2568 	movups	(%rcx),%xmm0
   2569 	movups	16(%rcx),%xmm1
   2570 	xorps	%xmm0,%xmm3
   2571 	leaq	32(%rcx),%rcx
   2572 	xorps	%xmm3,%xmm2
   2573 .Loop_enc1_15:
   2574 .byte	102,15,56,220,209
   2575 	decl	%eax
   2576 	movups	(%rcx),%xmm1
   2577 	leaq	16(%rcx),%rcx
   2578 	jnz	.Loop_enc1_15
   2579 .byte	102,15,56,221,209
   2580 	movl	%r10d,%eax
   2581 	movq	%r11,%rcx
   2582 	movups	%xmm2,0(%rsi)
   2583 	leaq	16(%rsi),%rsi
   2584 	subq	$16,%rdx
   2585 	jnc	.Lcbc_enc_loop
   2586 	addq	$16,%rdx
   2587 	jnz	.Lcbc_enc_tail
   2588 	pxor	%xmm0,%xmm0
   2589 	pxor	%xmm1,%xmm1
   2590 	movups	%xmm2,(%r8)
   2591 	pxor	%xmm2,%xmm2
   2592 	pxor	%xmm3,%xmm3
   2593 	jmp	.Lcbc_ret
   2594 
   2595 .Lcbc_enc_tail:
   2596 	movq	%rdx,%rcx
   2597 	xchgq	%rdi,%rsi
   2598 .long	0x9066A4F3
   2599 	movl	$16,%ecx
   2600 	subq	%rdx,%rcx
   2601 	xorl	%eax,%eax
   2602 .long	0x9066AAF3
   2603 	leaq	-16(%rdi),%rdi
   2604 	movl	%r10d,%eax
   2605 	movq	%rdi,%rsi
   2606 	movq	%r11,%rcx
   2607 	xorq	%rdx,%rdx
   2608 	jmp	.Lcbc_enc_loop
   2609 
   2610 .align	16
   2611 .Lcbc_decrypt:
   2612 	cmpq	$16,%rdx
   2613 	jne	.Lcbc_decrypt_bulk
   2614 
   2615 
   2616 
   2617 	movdqu	(%rdi),%xmm2
   2618 	movdqu	(%r8),%xmm3
   2619 	movdqa	%xmm2,%xmm4
   2620 	movups	(%rcx),%xmm0
   2621 	movups	16(%rcx),%xmm1
   2622 	leaq	32(%rcx),%rcx
   2623 	xorps	%xmm0,%xmm2
   2624 .Loop_dec1_16:
   2625 .byte	102,15,56,222,209
   2626 	decl	%r10d
   2627 	movups	(%rcx),%xmm1
   2628 	leaq	16(%rcx),%rcx
   2629 	jnz	.Loop_dec1_16
   2630 .byte	102,15,56,223,209
   2631 	pxor	%xmm0,%xmm0
   2632 	pxor	%xmm1,%xmm1
   2633 	movdqu	%xmm4,(%r8)
   2634 	xorps	%xmm3,%xmm2
   2635 	pxor	%xmm3,%xmm3
   2636 	movups	%xmm2,(%rsi)
   2637 	pxor	%xmm2,%xmm2
   2638 	jmp	.Lcbc_ret
   2639 .align	16
   2640 .Lcbc_decrypt_bulk:
   2641 	leaq	(%rsp),%rax
   2642 	pushq	%rbp
   2643 	subq	$16,%rsp
   2644 	andq	$-16,%rsp
   2645 	leaq	-8(%rax),%rbp
   2646 	movups	(%r8),%xmm10
   2647 	movl	%r10d,%eax
   2648 	cmpq	$80,%rdx
   2649 	jbe	.Lcbc_dec_tail
   2650 
   2651 	movups	(%rcx),%xmm0
   2652 	movdqu	0(%rdi),%xmm2
   2653 	movdqu	16(%rdi),%xmm3
   2654 	movdqa	%xmm2,%xmm11
   2655 	movdqu	32(%rdi),%xmm4
   2656 	movdqa	%xmm3,%xmm12
   2657 	movdqu	48(%rdi),%xmm5
   2658 	movdqa	%xmm4,%xmm13
   2659 	movdqu	64(%rdi),%xmm6
   2660 	movdqa	%xmm5,%xmm14
   2661 	movdqu	80(%rdi),%xmm7
   2662 	movdqa	%xmm6,%xmm15
   2663 	movl	OPENSSL_ia32cap_P+4(%rip),%r9d
   2664 	cmpq	$112,%rdx
   2665 	jbe	.Lcbc_dec_six_or_seven
   2666 
   2667 	andl	$71303168,%r9d
   2668 	subq	$80,%rdx
   2669 	cmpl	$4194304,%r9d
   2670 	je	.Lcbc_dec_loop6_enter
   2671 	subq	$32,%rdx
   2672 	leaq	112(%rcx),%rcx
   2673 	jmp	.Lcbc_dec_loop8_enter
   2674 .align	16
   2675 .Lcbc_dec_loop8:
   2676 	movups	%xmm9,(%rsi)
   2677 	leaq	16(%rsi),%rsi
   2678 .Lcbc_dec_loop8_enter:
   2679 	movdqu	96(%rdi),%xmm8
   2680 	pxor	%xmm0,%xmm2
   2681 	movdqu	112(%rdi),%xmm9
   2682 	pxor	%xmm0,%xmm3
   2683 	movups	16-112(%rcx),%xmm1
   2684 	pxor	%xmm0,%xmm4
   2685 	xorq	%r11,%r11
   2686 	cmpq	$112,%rdx
   2687 	pxor	%xmm0,%xmm5
   2688 	pxor	%xmm0,%xmm6
   2689 	pxor	%xmm0,%xmm7
   2690 	pxor	%xmm0,%xmm8
   2691 
   2692 .byte	102,15,56,222,209
   2693 	pxor	%xmm0,%xmm9
   2694 	movups	32-112(%rcx),%xmm0
   2695 .byte	102,15,56,222,217
   2696 .byte	102,15,56,222,225
   2697 .byte	102,15,56,222,233
   2698 .byte	102,15,56,222,241
   2699 .byte	102,15,56,222,249
   2700 .byte	102,68,15,56,222,193
   2701 	setnc	%r11b
   2702 	shlq	$7,%r11
   2703 .byte	102,68,15,56,222,201
   2704 	addq	%rdi,%r11
   2705 	movups	48-112(%rcx),%xmm1
   2706 .byte	102,15,56,222,208
   2707 .byte	102,15,56,222,216
   2708 .byte	102,15,56,222,224
   2709 .byte	102,15,56,222,232
   2710 .byte	102,15,56,222,240
   2711 .byte	102,15,56,222,248
   2712 .byte	102,68,15,56,222,192
   2713 .byte	102,68,15,56,222,200
   2714 	movups	64-112(%rcx),%xmm0
   2715 	nop
   2716 .byte	102,15,56,222,209
   2717 .byte	102,15,56,222,217
   2718 .byte	102,15,56,222,225
   2719 .byte	102,15,56,222,233
   2720 .byte	102,15,56,222,241
   2721 .byte	102,15,56,222,249
   2722 .byte	102,68,15,56,222,193
   2723 .byte	102,68,15,56,222,201
   2724 	movups	80-112(%rcx),%xmm1
   2725 	nop
   2726 .byte	102,15,56,222,208
   2727 .byte	102,15,56,222,216
   2728 .byte	102,15,56,222,224
   2729 .byte	102,15,56,222,232
   2730 .byte	102,15,56,222,240
   2731 .byte	102,15,56,222,248
   2732 .byte	102,68,15,56,222,192
   2733 .byte	102,68,15,56,222,200
   2734 	movups	96-112(%rcx),%xmm0
   2735 	nop
   2736 .byte	102,15,56,222,209
   2737 .byte	102,15,56,222,217
   2738 .byte	102,15,56,222,225
   2739 .byte	102,15,56,222,233
   2740 .byte	102,15,56,222,241
   2741 .byte	102,15,56,222,249
   2742 .byte	102,68,15,56,222,193
   2743 .byte	102,68,15,56,222,201
   2744 	movups	112-112(%rcx),%xmm1
   2745 	nop
   2746 .byte	102,15,56,222,208
   2747 .byte	102,15,56,222,216
   2748 .byte	102,15,56,222,224
   2749 .byte	102,15,56,222,232
   2750 .byte	102,15,56,222,240
   2751 .byte	102,15,56,222,248
   2752 .byte	102,68,15,56,222,192
   2753 .byte	102,68,15,56,222,200
   2754 	movups	128-112(%rcx),%xmm0
   2755 	nop
   2756 .byte	102,15,56,222,209
   2757 .byte	102,15,56,222,217
   2758 .byte	102,15,56,222,225
   2759 .byte	102,15,56,222,233
   2760 .byte	102,15,56,222,241
   2761 .byte	102,15,56,222,249
   2762 .byte	102,68,15,56,222,193
   2763 .byte	102,68,15,56,222,201
   2764 	movups	144-112(%rcx),%xmm1
   2765 	cmpl	$11,%eax
   2766 .byte	102,15,56,222,208
   2767 .byte	102,15,56,222,216
   2768 .byte	102,15,56,222,224
   2769 .byte	102,15,56,222,232
   2770 .byte	102,15,56,222,240
   2771 .byte	102,15,56,222,248
   2772 .byte	102,68,15,56,222,192
   2773 .byte	102,68,15,56,222,200
   2774 	movups	160-112(%rcx),%xmm0
   2775 	jb	.Lcbc_dec_done
   2776 .byte	102,15,56,222,209
   2777 .byte	102,15,56,222,217
   2778 .byte	102,15,56,222,225
   2779 .byte	102,15,56,222,233
   2780 .byte	102,15,56,222,241
   2781 .byte	102,15,56,222,249
   2782 .byte	102,68,15,56,222,193
   2783 .byte	102,68,15,56,222,201
   2784 	movups	176-112(%rcx),%xmm1
   2785 	nop
   2786 .byte	102,15,56,222,208
   2787 .byte	102,15,56,222,216
   2788 .byte	102,15,56,222,224
   2789 .byte	102,15,56,222,232
   2790 .byte	102,15,56,222,240
   2791 .byte	102,15,56,222,248
   2792 .byte	102,68,15,56,222,192
   2793 .byte	102,68,15,56,222,200
   2794 	movups	192-112(%rcx),%xmm0
   2795 	je	.Lcbc_dec_done
   2796 .byte	102,15,56,222,209
   2797 .byte	102,15,56,222,217
   2798 .byte	102,15,56,222,225
   2799 .byte	102,15,56,222,233
   2800 .byte	102,15,56,222,241
   2801 .byte	102,15,56,222,249
   2802 .byte	102,68,15,56,222,193
   2803 .byte	102,68,15,56,222,201
   2804 	movups	208-112(%rcx),%xmm1
   2805 	nop
   2806 .byte	102,15,56,222,208
   2807 .byte	102,15,56,222,216
   2808 .byte	102,15,56,222,224
   2809 .byte	102,15,56,222,232
   2810 .byte	102,15,56,222,240
   2811 .byte	102,15,56,222,248
   2812 .byte	102,68,15,56,222,192
   2813 .byte	102,68,15,56,222,200
   2814 	movups	224-112(%rcx),%xmm0
   2815 	jmp	.Lcbc_dec_done
   2816 .align	16
   2817 .Lcbc_dec_done:
   2818 .byte	102,15,56,222,209
   2819 .byte	102,15,56,222,217
   2820 	pxor	%xmm0,%xmm10
   2821 	pxor	%xmm0,%xmm11
   2822 .byte	102,15,56,222,225
   2823 .byte	102,15,56,222,233
   2824 	pxor	%xmm0,%xmm12
   2825 	pxor	%xmm0,%xmm13
   2826 .byte	102,15,56,222,241
   2827 .byte	102,15,56,222,249
   2828 	pxor	%xmm0,%xmm14
   2829 	pxor	%xmm0,%xmm15
   2830 .byte	102,68,15,56,222,193
   2831 .byte	102,68,15,56,222,201
   2832 	movdqu	80(%rdi),%xmm1
   2833 
   2834 .byte	102,65,15,56,223,210
   2835 	movdqu	96(%rdi),%xmm10
   2836 	pxor	%xmm0,%xmm1
   2837 .byte	102,65,15,56,223,219
   2838 	pxor	%xmm0,%xmm10
   2839 	movdqu	112(%rdi),%xmm0
   2840 .byte	102,65,15,56,223,228
   2841 	leaq	128(%rdi),%rdi
   2842 	movdqu	0(%r11),%xmm11
   2843 .byte	102,65,15,56,223,237
   2844 .byte	102,65,15,56,223,246
   2845 	movdqu	16(%r11),%xmm12
   2846 	movdqu	32(%r11),%xmm13
   2847 .byte	102,65,15,56,223,255
   2848 .byte	102,68,15,56,223,193
   2849 	movdqu	48(%r11),%xmm14
   2850 	movdqu	64(%r11),%xmm15
   2851 .byte	102,69,15,56,223,202
   2852 	movdqa	%xmm0,%xmm10
   2853 	movdqu	80(%r11),%xmm1
   2854 	movups	-112(%rcx),%xmm0
   2855 
   2856 	movups	%xmm2,(%rsi)
   2857 	movdqa	%xmm11,%xmm2
   2858 	movups	%xmm3,16(%rsi)
   2859 	movdqa	%xmm12,%xmm3
   2860 	movups	%xmm4,32(%rsi)
   2861 	movdqa	%xmm13,%xmm4
   2862 	movups	%xmm5,48(%rsi)
   2863 	movdqa	%xmm14,%xmm5
   2864 	movups	%xmm6,64(%rsi)
   2865 	movdqa	%xmm15,%xmm6
   2866 	movups	%xmm7,80(%rsi)
   2867 	movdqa	%xmm1,%xmm7
   2868 	movups	%xmm8,96(%rsi)
   2869 	leaq	112(%rsi),%rsi
   2870 
   2871 	subq	$128,%rdx
   2872 	ja	.Lcbc_dec_loop8
   2873 
   2874 	movaps	%xmm9,%xmm2
   2875 	leaq	-112(%rcx),%rcx
   2876 	addq	$112,%rdx
   2877 	jle	.Lcbc_dec_clear_tail_collected
   2878 	movups	%xmm9,(%rsi)
   2879 	leaq	16(%rsi),%rsi
   2880 	cmpq	$80,%rdx
   2881 	jbe	.Lcbc_dec_tail
   2882 
   2883 	movaps	%xmm11,%xmm2
   2884 .Lcbc_dec_six_or_seven:
   2885 	cmpq	$96,%rdx
   2886 	ja	.Lcbc_dec_seven
   2887 
   2888 	movaps	%xmm7,%xmm8
   2889 	call	_aesni_decrypt6
   2890 	pxor	%xmm10,%xmm2
   2891 	movaps	%xmm8,%xmm10
   2892 	pxor	%xmm11,%xmm3
   2893 	movdqu	%xmm2,(%rsi)
   2894 	pxor	%xmm12,%xmm4
   2895 	movdqu	%xmm3,16(%rsi)
   2896 	pxor	%xmm3,%xmm3
   2897 	pxor	%xmm13,%xmm5
   2898 	movdqu	%xmm4,32(%rsi)
   2899 	pxor	%xmm4,%xmm4
   2900 	pxor	%xmm14,%xmm6
   2901 	movdqu	%xmm5,48(%rsi)
   2902 	pxor	%xmm5,%xmm5
   2903 	pxor	%xmm15,%xmm7
   2904 	movdqu	%xmm6,64(%rsi)
   2905 	pxor	%xmm6,%xmm6
   2906 	leaq	80(%rsi),%rsi
   2907 	movdqa	%xmm7,%xmm2
   2908 	pxor	%xmm7,%xmm7
   2909 	jmp	.Lcbc_dec_tail_collected
   2910 
   2911 .align	16
   2912 .Lcbc_dec_seven:
   2913 	movups	96(%rdi),%xmm8
   2914 	xorps	%xmm9,%xmm9
   2915 	call	_aesni_decrypt8
   2916 	movups	80(%rdi),%xmm9
   2917 	pxor	%xmm10,%xmm2
   2918 	movups	96(%rdi),%xmm10
   2919 	pxor	%xmm11,%xmm3
   2920 	movdqu	%xmm2,(%rsi)
   2921 	pxor	%xmm12,%xmm4
   2922 	movdqu	%xmm3,16(%rsi)
   2923 	pxor	%xmm3,%xmm3
   2924 	pxor	%xmm13,%xmm5
   2925 	movdqu	%xmm4,32(%rsi)
   2926 	pxor	%xmm4,%xmm4
   2927 	pxor	%xmm14,%xmm6
   2928 	movdqu	%xmm5,48(%rsi)
   2929 	pxor	%xmm5,%xmm5
   2930 	pxor	%xmm15,%xmm7
   2931 	movdqu	%xmm6,64(%rsi)
   2932 	pxor	%xmm6,%xmm6
   2933 	pxor	%xmm9,%xmm8
   2934 	movdqu	%xmm7,80(%rsi)
   2935 	pxor	%xmm7,%xmm7
   2936 	leaq	96(%rsi),%rsi
   2937 	movdqa	%xmm8,%xmm2
   2938 	pxor	%xmm8,%xmm8
   2939 	pxor	%xmm9,%xmm9
   2940 	jmp	.Lcbc_dec_tail_collected
   2941 
   2942 .align	16
   2943 .Lcbc_dec_loop6:
   2944 	movups	%xmm7,(%rsi)
   2945 	leaq	16(%rsi),%rsi
   2946 	movdqu	0(%rdi),%xmm2
   2947 	movdqu	16(%rdi),%xmm3
   2948 	movdqa	%xmm2,%xmm11
   2949 	movdqu	32(%rdi),%xmm4
   2950 	movdqa	%xmm3,%xmm12
   2951 	movdqu	48(%rdi),%xmm5
   2952 	movdqa	%xmm4,%xmm13
   2953 	movdqu	64(%rdi),%xmm6
   2954 	movdqa	%xmm5,%xmm14
   2955 	movdqu	80(%rdi),%xmm7
   2956 	movdqa	%xmm6,%xmm15
   2957 .Lcbc_dec_loop6_enter:
   2958 	leaq	96(%rdi),%rdi
   2959 	movdqa	%xmm7,%xmm8
   2960 
   2961 	call	_aesni_decrypt6
   2962 
   2963 	pxor	%xmm10,%xmm2
   2964 	movdqa	%xmm8,%xmm10
   2965 	pxor	%xmm11,%xmm3
   2966 	movdqu	%xmm2,(%rsi)
   2967 	pxor	%xmm12,%xmm4
   2968 	movdqu	%xmm3,16(%rsi)
   2969 	pxor	%xmm13,%xmm5
   2970 	movdqu	%xmm4,32(%rsi)
   2971 	pxor	%xmm14,%xmm6
   2972 	movq	%r11,%rcx
   2973 	movdqu	%xmm5,48(%rsi)
   2974 	pxor	%xmm15,%xmm7
   2975 	movl	%r10d,%eax
   2976 	movdqu	%xmm6,64(%rsi)
   2977 	leaq	80(%rsi),%rsi
   2978 	subq	$96,%rdx
   2979 	ja	.Lcbc_dec_loop6
   2980 
   2981 	movdqa	%xmm7,%xmm2
   2982 	addq	$80,%rdx
   2983 	jle	.Lcbc_dec_clear_tail_collected
   2984 	movups	%xmm7,(%rsi)
   2985 	leaq	16(%rsi),%rsi
   2986 
   2987 .Lcbc_dec_tail:
   2988 	movups	(%rdi),%xmm2
   2989 	subq	$16,%rdx
   2990 	jbe	.Lcbc_dec_one
   2991 
   2992 	movups	16(%rdi),%xmm3
   2993 	movaps	%xmm2,%xmm11
   2994 	subq	$16,%rdx
   2995 	jbe	.Lcbc_dec_two
   2996 
   2997 	movups	32(%rdi),%xmm4
   2998 	movaps	%xmm3,%xmm12
   2999 	subq	$16,%rdx
   3000 	jbe	.Lcbc_dec_three
   3001 
   3002 	movups	48(%rdi),%xmm5
   3003 	movaps	%xmm4,%xmm13
   3004 	subq	$16,%rdx
   3005 	jbe	.Lcbc_dec_four
   3006 
   3007 	movups	64(%rdi),%xmm6
   3008 	movaps	%xmm5,%xmm14
   3009 	movaps	%xmm6,%xmm15
   3010 	xorps	%xmm7,%xmm7
   3011 	call	_aesni_decrypt6
   3012 	pxor	%xmm10,%xmm2
   3013 	movaps	%xmm15,%xmm10
   3014 	pxor	%xmm11,%xmm3
   3015 	movdqu	%xmm2,(%rsi)
   3016 	pxor	%xmm12,%xmm4
   3017 	movdqu	%xmm3,16(%rsi)
   3018 	pxor	%xmm3,%xmm3
   3019 	pxor	%xmm13,%xmm5
   3020 	movdqu	%xmm4,32(%rsi)
   3021 	pxor	%xmm4,%xmm4
   3022 	pxor	%xmm14,%xmm6
   3023 	movdqu	%xmm5,48(%rsi)
   3024 	pxor	%xmm5,%xmm5
   3025 	leaq	64(%rsi),%rsi
   3026 	movdqa	%xmm6,%xmm2
   3027 	pxor	%xmm6,%xmm6
   3028 	pxor	%xmm7,%xmm7
   3029 	subq	$16,%rdx
   3030 	jmp	.Lcbc_dec_tail_collected
   3031 
   3032 .align	16
   3033 .Lcbc_dec_one:
   3034 	movaps	%xmm2,%xmm11
   3035 	movups	(%rcx),%xmm0
   3036 	movups	16(%rcx),%xmm1
   3037 	leaq	32(%rcx),%rcx
   3038 	xorps	%xmm0,%xmm2
   3039 .Loop_dec1_17:
   3040 .byte	102,15,56,222,209
   3041 	decl	%eax
   3042 	movups	(%rcx),%xmm1
   3043 	leaq	16(%rcx),%rcx
   3044 	jnz	.Loop_dec1_17
   3045 .byte	102,15,56,223,209
   3046 	xorps	%xmm10,%xmm2
   3047 	movaps	%xmm11,%xmm10
   3048 	jmp	.Lcbc_dec_tail_collected
   3049 .align	16
   3050 .Lcbc_dec_two:
   3051 	movaps	%xmm3,%xmm12
   3052 	call	_aesni_decrypt2
   3053 	pxor	%xmm10,%xmm2
   3054 	movaps	%xmm12,%xmm10
   3055 	pxor	%xmm11,%xmm3
   3056 	movdqu	%xmm2,(%rsi)
   3057 	movdqa	%xmm3,%xmm2
   3058 	pxor	%xmm3,%xmm3
   3059 	leaq	16(%rsi),%rsi
   3060 	jmp	.Lcbc_dec_tail_collected
   3061 .align	16
   3062 .Lcbc_dec_three:
   3063 	movaps	%xmm4,%xmm13
   3064 	call	_aesni_decrypt3
   3065 	pxor	%xmm10,%xmm2
   3066 	movaps	%xmm13,%xmm10
   3067 	pxor	%xmm11,%xmm3
   3068 	movdqu	%xmm2,(%rsi)
   3069 	pxor	%xmm12,%xmm4
   3070 	movdqu	%xmm3,16(%rsi)
   3071 	pxor	%xmm3,%xmm3
   3072 	movdqa	%xmm4,%xmm2
   3073 	pxor	%xmm4,%xmm4
   3074 	leaq	32(%rsi),%rsi
   3075 	jmp	.Lcbc_dec_tail_collected
   3076 .align	16
   3077 .Lcbc_dec_four:
   3078 	movaps	%xmm5,%xmm14
   3079 	call	_aesni_decrypt4
   3080 	pxor	%xmm10,%xmm2
   3081 	movaps	%xmm14,%xmm10
   3082 	pxor	%xmm11,%xmm3
   3083 	movdqu	%xmm2,(%rsi)
   3084 	pxor	%xmm12,%xmm4
   3085 	movdqu	%xmm3,16(%rsi)
   3086 	pxor	%xmm3,%xmm3
   3087 	pxor	%xmm13,%xmm5
   3088 	movdqu	%xmm4,32(%rsi)
   3089 	pxor	%xmm4,%xmm4
   3090 	movdqa	%xmm5,%xmm2
   3091 	pxor	%xmm5,%xmm5
   3092 	leaq	48(%rsi),%rsi
   3093 	jmp	.Lcbc_dec_tail_collected
   3094 
   3095 .align	16
   3096 .Lcbc_dec_clear_tail_collected:
   3097 	pxor	%xmm3,%xmm3
   3098 	pxor	%xmm4,%xmm4
   3099 	pxor	%xmm5,%xmm5
   3100 	pxor	%xmm6,%xmm6
   3101 	pxor	%xmm7,%xmm7
   3102 	pxor	%xmm8,%xmm8
   3103 	pxor	%xmm9,%xmm9
   3104 .Lcbc_dec_tail_collected:
   3105 	movups	%xmm10,(%r8)
   3106 	andq	$15,%rdx
   3107 	jnz	.Lcbc_dec_tail_partial
   3108 	movups	%xmm2,(%rsi)
   3109 	pxor	%xmm2,%xmm2
   3110 	jmp	.Lcbc_dec_ret
   3111 .align	16
   3112 .Lcbc_dec_tail_partial:
   3113 	movaps	%xmm2,(%rsp)
   3114 	pxor	%xmm2,%xmm2
   3115 	movq	$16,%rcx
   3116 	movq	%rsi,%rdi
   3117 	subq	%rdx,%rcx
   3118 	leaq	(%rsp),%rsi
   3119 .long	0x9066A4F3
   3120 	movdqa	%xmm2,(%rsp)
   3121 
   3122 .Lcbc_dec_ret:
   3123 	xorps	%xmm0,%xmm0
   3124 	pxor	%xmm1,%xmm1
   3125 	leaq	(%rbp),%rsp
   3126 	popq	%rbp
   3127 .Lcbc_ret:
   3128 	.byte	0xf3,0xc3
   3129 .size	aesni_cbc_encrypt,.-aesni_cbc_encrypt
   3130 .globl	aesni_set_decrypt_key
   3131 .hidden aesni_set_decrypt_key
   3132 .type	aesni_set_decrypt_key,@function
   3133 .align	16
   3134 aesni_set_decrypt_key:
   3135 .byte	0x48,0x83,0xEC,0x08
   3136 	call	__aesni_set_encrypt_key
   3137 	shll	$4,%esi
   3138 	testl	%eax,%eax
   3139 	jnz	.Ldec_key_ret
   3140 	leaq	16(%rdx,%rsi,1),%rdi
   3141 
   3142 	movups	(%rdx),%xmm0
   3143 	movups	(%rdi),%xmm1
   3144 	movups	%xmm0,(%rdi)
   3145 	movups	%xmm1,(%rdx)
   3146 	leaq	16(%rdx),%rdx
   3147 	leaq	-16(%rdi),%rdi
   3148 
   3149 .Ldec_key_inverse:
   3150 	movups	(%rdx),%xmm0
   3151 	movups	(%rdi),%xmm1
   3152 .byte	102,15,56,219,192
   3153 .byte	102,15,56,219,201
   3154 	leaq	16(%rdx),%rdx
   3155 	leaq	-16(%rdi),%rdi
   3156 	movups	%xmm0,16(%rdi)
   3157 	movups	%xmm1,-16(%rdx)
   3158 	cmpq	%rdx,%rdi
   3159 	ja	.Ldec_key_inverse
   3160 
   3161 	movups	(%rdx),%xmm0
   3162 .byte	102,15,56,219,192
   3163 	pxor	%xmm1,%xmm1
   3164 	movups	%xmm0,(%rdi)
   3165 	pxor	%xmm0,%xmm0
   3166 .Ldec_key_ret:
   3167 	addq	$8,%rsp
   3168 	.byte	0xf3,0xc3
   3169 .LSEH_end_set_decrypt_key:
   3170 .size	aesni_set_decrypt_key,.-aesni_set_decrypt_key
   3171 .globl	aesni_set_encrypt_key
   3172 .hidden aesni_set_encrypt_key
   3173 .type	aesni_set_encrypt_key,@function
   3174 .align	16
   3175 aesni_set_encrypt_key:
   3176 __aesni_set_encrypt_key:
   3177 .byte	0x48,0x83,0xEC,0x08
   3178 	movq	$-1,%rax
   3179 	testq	%rdi,%rdi
   3180 	jz	.Lenc_key_ret
   3181 	testq	%rdx,%rdx
   3182 	jz	.Lenc_key_ret
   3183 
   3184 	movl	$268437504,%r10d
   3185 	movups	(%rdi),%xmm0
   3186 	xorps	%xmm4,%xmm4
   3187 	andl	OPENSSL_ia32cap_P+4(%rip),%r10d
   3188 	leaq	16(%rdx),%rax
   3189 	cmpl	$256,%esi
   3190 	je	.L14rounds
   3191 	cmpl	$192,%esi
   3192 	je	.L12rounds
   3193 	cmpl	$128,%esi
   3194 	jne	.Lbad_keybits
   3195 
   3196 .L10rounds:
   3197 	movl	$9,%esi
   3198 	cmpl	$268435456,%r10d
   3199 	je	.L10rounds_alt
   3200 
   3201 	movups	%xmm0,(%rdx)
   3202 .byte	102,15,58,223,200,1
   3203 	call	.Lkey_expansion_128_cold
   3204 .byte	102,15,58,223,200,2
   3205 	call	.Lkey_expansion_128
   3206 .byte	102,15,58,223,200,4
   3207 	call	.Lkey_expansion_128
   3208 .byte	102,15,58,223,200,8
   3209 	call	.Lkey_expansion_128
   3210 .byte	102,15,58,223,200,16
   3211 	call	.Lkey_expansion_128
   3212 .byte	102,15,58,223,200,32
   3213 	call	.Lkey_expansion_128
   3214 .byte	102,15,58,223,200,64
   3215 	call	.Lkey_expansion_128
   3216 .byte	102,15,58,223,200,128
   3217 	call	.Lkey_expansion_128
   3218 .byte	102,15,58,223,200,27
   3219 	call	.Lkey_expansion_128
   3220 .byte	102,15,58,223,200,54
   3221 	call	.Lkey_expansion_128
   3222 	movups	%xmm0,(%rax)
   3223 	movl	%esi,80(%rax)
   3224 	xorl	%eax,%eax
   3225 	jmp	.Lenc_key_ret
   3226 
   3227 .align	16
   3228 .L10rounds_alt:
   3229 	movdqa	.Lkey_rotate(%rip),%xmm5
   3230 	movl	$8,%r10d
   3231 	movdqa	.Lkey_rcon1(%rip),%xmm4
   3232 	movdqa	%xmm0,%xmm2
   3233 	movdqu	%xmm0,(%rdx)
   3234 	jmp	.Loop_key128
   3235 
   3236 .align	16
   3237 .Loop_key128:
   3238 .byte	102,15,56,0,197
   3239 .byte	102,15,56,221,196
   3240 	pslld	$1,%xmm4
   3241 	leaq	16(%rax),%rax
   3242 
   3243 	movdqa	%xmm2,%xmm3
   3244 	pslldq	$4,%xmm2
   3245 	pxor	%xmm2,%xmm3
   3246 	pslldq	$4,%xmm2
   3247 	pxor	%xmm2,%xmm3
   3248 	pslldq	$4,%xmm2
   3249 	pxor	%xmm3,%xmm2
   3250 
   3251 	pxor	%xmm2,%xmm0
   3252 	movdqu	%xmm0,-16(%rax)
   3253 	movdqa	%xmm0,%xmm2
   3254 
   3255 	decl	%r10d
   3256 	jnz	.Loop_key128
   3257 
   3258 	movdqa	.Lkey_rcon1b(%rip),%xmm4
   3259 
   3260 .byte	102,15,56,0,197
   3261 .byte	102,15,56,221,196
   3262 	pslld	$1,%xmm4
   3263 
   3264 	movdqa	%xmm2,%xmm3
   3265 	pslldq	$4,%xmm2
   3266 	pxor	%xmm2,%xmm3
   3267 	pslldq	$4,%xmm2
   3268 	pxor	%xmm2,%xmm3
   3269 	pslldq	$4,%xmm2
   3270 	pxor	%xmm3,%xmm2
   3271 
   3272 	pxor	%xmm2,%xmm0
   3273 	movdqu	%xmm0,(%rax)
   3274 
   3275 	movdqa	%xmm0,%xmm2
   3276 .byte	102,15,56,0,197
   3277 .byte	102,15,56,221,196
   3278 
   3279 	movdqa	%xmm2,%xmm3
   3280 	pslldq	$4,%xmm2
   3281 	pxor	%xmm2,%xmm3
   3282 	pslldq	$4,%xmm2
   3283 	pxor	%xmm2,%xmm3
   3284 	pslldq	$4,%xmm2
   3285 	pxor	%xmm3,%xmm2
   3286 
   3287 	pxor	%xmm2,%xmm0
   3288 	movdqu	%xmm0,16(%rax)
   3289 
   3290 	movl	%esi,96(%rax)
   3291 	xorl	%eax,%eax
   3292 	jmp	.Lenc_key_ret
   3293 
   3294 .align	16
   3295 .L12rounds:
   3296 	movq	16(%rdi),%xmm2
   3297 	movl	$11,%esi
   3298 	cmpl	$268435456,%r10d
   3299 	je	.L12rounds_alt
   3300 
   3301 	movups	%xmm0,(%rdx)
   3302 .byte	102,15,58,223,202,1
   3303 	call	.Lkey_expansion_192a_cold
   3304 .byte	102,15,58,223,202,2
   3305 	call	.Lkey_expansion_192b
   3306 .byte	102,15,58,223,202,4
   3307 	call	.Lkey_expansion_192a
   3308 .byte	102,15,58,223,202,8
   3309 	call	.Lkey_expansion_192b
   3310 .byte	102,15,58,223,202,16
   3311 	call	.Lkey_expansion_192a
   3312 .byte	102,15,58,223,202,32
   3313 	call	.Lkey_expansion_192b
   3314 .byte	102,15,58,223,202,64
   3315 	call	.Lkey_expansion_192a
   3316 .byte	102,15,58,223,202,128
   3317 	call	.Lkey_expansion_192b
   3318 	movups	%xmm0,(%rax)
   3319 	movl	%esi,48(%rax)
   3320 	xorq	%rax,%rax
   3321 	jmp	.Lenc_key_ret
   3322 
   3323 .align	16
   3324 .L12rounds_alt:
   3325 	movdqa	.Lkey_rotate192(%rip),%xmm5
   3326 	movdqa	.Lkey_rcon1(%rip),%xmm4
   3327 	movl	$8,%r10d
   3328 	movdqu	%xmm0,(%rdx)
   3329 	jmp	.Loop_key192
   3330 
   3331 .align	16
   3332 .Loop_key192:
   3333 	movq	%xmm2,0(%rax)
   3334 	movdqa	%xmm2,%xmm1
   3335 .byte	102,15,56,0,213
   3336 .byte	102,15,56,221,212
   3337 	pslld	$1,%xmm4
   3338 	leaq	24(%rax),%rax
   3339 
   3340 	movdqa	%xmm0,%xmm3
   3341 	pslldq	$4,%xmm0
   3342 	pxor	%xmm0,%xmm3
   3343 	pslldq	$4,%xmm0
   3344 	pxor	%xmm0,%xmm3
   3345 	pslldq	$4,%xmm0
   3346 	pxor	%xmm3,%xmm0
   3347 
   3348 	pshufd	$255,%xmm0,%xmm3
   3349 	pxor	%xmm1,%xmm3
   3350 	pslldq	$4,%xmm1
   3351 	pxor	%xmm1,%xmm3
   3352 
   3353 	pxor	%xmm2,%xmm0
   3354 	pxor	%xmm3,%xmm2
   3355 	movdqu	%xmm0,-16(%rax)
   3356 
   3357 	decl	%r10d
   3358 	jnz	.Loop_key192
   3359 
   3360 	movl	%esi,32(%rax)
   3361 	xorl	%eax,%eax
   3362 	jmp	.Lenc_key_ret
   3363 
   3364 .align	16
   3365 .L14rounds:
   3366 	movups	16(%rdi),%xmm2
   3367 	movl	$13,%esi
   3368 	leaq	16(%rax),%rax
   3369 	cmpl	$268435456,%r10d
   3370 	je	.L14rounds_alt
   3371 
   3372 	movups	%xmm0,(%rdx)
   3373 	movups	%xmm2,16(%rdx)
   3374 .byte	102,15,58,223,202,1
   3375 	call	.Lkey_expansion_256a_cold
   3376 .byte	102,15,58,223,200,1
   3377 	call	.Lkey_expansion_256b
   3378 .byte	102,15,58,223,202,2
   3379 	call	.Lkey_expansion_256a
   3380 .byte	102,15,58,223,200,2
   3381 	call	.Lkey_expansion_256b
   3382 .byte	102,15,58,223,202,4
   3383 	call	.Lkey_expansion_256a
   3384 .byte	102,15,58,223,200,4
   3385 	call	.Lkey_expansion_256b
   3386 .byte	102,15,58,223,202,8
   3387 	call	.Lkey_expansion_256a
   3388 .byte	102,15,58,223,200,8
   3389 	call	.Lkey_expansion_256b
   3390 .byte	102,15,58,223,202,16
   3391 	call	.Lkey_expansion_256a
   3392 .byte	102,15,58,223,200,16
   3393 	call	.Lkey_expansion_256b
   3394 .byte	102,15,58,223,202,32
   3395 	call	.Lkey_expansion_256a
   3396 .byte	102,15,58,223,200,32
   3397 	call	.Lkey_expansion_256b
   3398 .byte	102,15,58,223,202,64
   3399 	call	.Lkey_expansion_256a
   3400 	movups	%xmm0,(%rax)
   3401 	movl	%esi,16(%rax)
   3402 	xorq	%rax,%rax
   3403 	jmp	.Lenc_key_ret
   3404 
   3405 .align	16
   3406 .L14rounds_alt:
   3407 	movdqa	.Lkey_rotate(%rip),%xmm5
   3408 	movdqa	.Lkey_rcon1(%rip),%xmm4
   3409 	movl	$7,%r10d
   3410 	movdqu	%xmm0,0(%rdx)
   3411 	movdqa	%xmm2,%xmm1
   3412 	movdqu	%xmm2,16(%rdx)
   3413 	jmp	.Loop_key256
   3414 
   3415 .align	16
   3416 .Loop_key256:
   3417 .byte	102,15,56,0,213
   3418 .byte	102,15,56,221,212
   3419 
   3420 	movdqa	%xmm0,%xmm3
   3421 	pslldq	$4,%xmm0
   3422 	pxor	%xmm0,%xmm3
   3423 	pslldq	$4,%xmm0
   3424 	pxor	%xmm0,%xmm3
   3425 	pslldq	$4,%xmm0
   3426 	pxor	%xmm3,%xmm0
   3427 	pslld	$1,%xmm4
   3428 
   3429 	pxor	%xmm2,%xmm0
   3430 	movdqu	%xmm0,(%rax)
   3431 
   3432 	decl	%r10d
   3433 	jz	.Ldone_key256
   3434 
   3435 	pshufd	$255,%xmm0,%xmm2
   3436 	pxor	%xmm3,%xmm3
   3437 .byte	102,15,56,221,211
   3438 
   3439 	movdqa	%xmm1,%xmm3
   3440 	pslldq	$4,%xmm1
   3441 	pxor	%xmm1,%xmm3
   3442 	pslldq	$4,%xmm1
   3443 	pxor	%xmm1,%xmm3
   3444 	pslldq	$4,%xmm1
   3445 	pxor	%xmm3,%xmm1
   3446 
   3447 	pxor	%xmm1,%xmm2
   3448 	movdqu	%xmm2,16(%rax)
   3449 	leaq	32(%rax),%rax
   3450 	movdqa	%xmm2,%xmm1
   3451 
   3452 	jmp	.Loop_key256
   3453 
   3454 .Ldone_key256:
   3455 	movl	%esi,16(%rax)
   3456 	xorl	%eax,%eax
   3457 	jmp	.Lenc_key_ret
   3458 
   3459 .align	16
   3460 .Lbad_keybits:
   3461 	movq	$-2,%rax
   3462 .Lenc_key_ret:
   3463 	pxor	%xmm0,%xmm0
   3464 	pxor	%xmm1,%xmm1
   3465 	pxor	%xmm2,%xmm2
   3466 	pxor	%xmm3,%xmm3
   3467 	pxor	%xmm4,%xmm4
   3468 	pxor	%xmm5,%xmm5
   3469 	addq	$8,%rsp
   3470 	.byte	0xf3,0xc3
   3471 .LSEH_end_set_encrypt_key:
   3472 
   3473 .align	16
   3474 .Lkey_expansion_128:
   3475 	movups	%xmm0,(%rax)
   3476 	leaq	16(%rax),%rax
   3477 .Lkey_expansion_128_cold:
   3478 	shufps	$16,%xmm0,%xmm4
   3479 	xorps	%xmm4,%xmm0
   3480 	shufps	$140,%xmm0,%xmm4
   3481 	xorps	%xmm4,%xmm0
   3482 	shufps	$255,%xmm1,%xmm1
   3483 	xorps	%xmm1,%xmm0
   3484 	.byte	0xf3,0xc3
   3485 
   3486 .align	16
   3487 .Lkey_expansion_192a:
   3488 	movups	%xmm0,(%rax)
   3489 	leaq	16(%rax),%rax
   3490 .Lkey_expansion_192a_cold:
   3491 	movaps	%xmm2,%xmm5
   3492 .Lkey_expansion_192b_warm:
   3493 	shufps	$16,%xmm0,%xmm4
   3494 	movdqa	%xmm2,%xmm3
   3495 	xorps	%xmm4,%xmm0
   3496 	shufps	$140,%xmm0,%xmm4
   3497 	pslldq	$4,%xmm3
   3498 	xorps	%xmm4,%xmm0
   3499 	pshufd	$85,%xmm1,%xmm1
   3500 	pxor	%xmm3,%xmm2
   3501 	pxor	%xmm1,%xmm0
   3502 	pshufd	$255,%xmm0,%xmm3
   3503 	pxor	%xmm3,%xmm2
   3504 	.byte	0xf3,0xc3
   3505 
   3506 .align	16
   3507 .Lkey_expansion_192b:
   3508 	movaps	%xmm0,%xmm3
   3509 	shufps	$68,%xmm0,%xmm5
   3510 	movups	%xmm5,(%rax)
   3511 	shufps	$78,%xmm2,%xmm3
   3512 	movups	%xmm3,16(%rax)
   3513 	leaq	32(%rax),%rax
   3514 	jmp	.Lkey_expansion_192b_warm
   3515 
   3516 .align	16
   3517 .Lkey_expansion_256a:
   3518 	movups	%xmm2,(%rax)
   3519 	leaq	16(%rax),%rax
   3520 .Lkey_expansion_256a_cold:
   3521 	shufps	$16,%xmm0,%xmm4
   3522 	xorps	%xmm4,%xmm0
   3523 	shufps	$140,%xmm0,%xmm4
   3524 	xorps	%xmm4,%xmm0
   3525 	shufps	$255,%xmm1,%xmm1
   3526 	xorps	%xmm1,%xmm0
   3527 	.byte	0xf3,0xc3
   3528 
   3529 .align	16
   3530 .Lkey_expansion_256b:
   3531 	movups	%xmm0,(%rax)
   3532 	leaq	16(%rax),%rax
   3533 
   3534 	shufps	$16,%xmm2,%xmm4
   3535 	xorps	%xmm4,%xmm2
   3536 	shufps	$140,%xmm2,%xmm4
   3537 	xorps	%xmm4,%xmm2
   3538 	shufps	$170,%xmm1,%xmm1
   3539 	xorps	%xmm1,%xmm2
   3540 	.byte	0xf3,0xc3
   3541 .size	aesni_set_encrypt_key,.-aesni_set_encrypt_key
   3542 .size	__aesni_set_encrypt_key,.-__aesni_set_encrypt_key
   3543 .align	64
   3544 .Lbswap_mask:
   3545 .byte	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   3546 .Lincrement32:
   3547 .long	6,6,6,0
   3548 .Lincrement64:
   3549 .long	1,0,0,0
   3550 .Lxts_magic:
   3551 .long	0x87,0,1,0
   3552 .Lincrement1:
   3553 .byte	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
   3554 .Lkey_rotate:
   3555 .long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
   3556 .Lkey_rotate192:
   3557 .long	0x04070605,0x04070605,0x04070605,0x04070605
   3558 .Lkey_rcon1:
   3559 .long	1,1,1,1
   3560 .Lkey_rcon1b:
   3561 .long	0x1b,0x1b,0x1b,0x1b
   3562 
   3563 .byte	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
   3564 .align	64
   3565 #endif
   3566