Home | History | Annotate | Download | only in aes
      1 default	rel
      2 %define XMMWORD
      3 %define YMMWORD
      4 %define ZMMWORD
      5 section	.text code align=64
      6 
      7 EXTERN	OPENSSL_ia32cap_P
      8 global	aesni_encrypt
      9 
     10 ALIGN	16
     11 aesni_encrypt:
     12 	movups	xmm2,XMMWORD[rcx]
     13 	mov	eax,DWORD[240+r8]
     14 	movups	xmm0,XMMWORD[r8]
     15 	movups	xmm1,XMMWORD[16+r8]
     16 	lea	r8,[32+r8]
     17 	xorps	xmm2,xmm0
     18 $L$oop_enc1_1:
     19 DB	102,15,56,220,209
     20 	dec	eax
     21 	movups	xmm1,XMMWORD[r8]
     22 	lea	r8,[16+r8]
     23 	jnz	NEAR $L$oop_enc1_1
     24 DB	102,15,56,221,209
     25 	pxor	xmm0,xmm0
     26 	pxor	xmm1,xmm1
     27 	movups	XMMWORD[rdx],xmm2
     28 	pxor	xmm2,xmm2
     29 	DB	0F3h,0C3h		;repret
     30 
     31 
     32 global	aesni_decrypt
     33 
     34 ALIGN	16
     35 aesni_decrypt:
     36 	movups	xmm2,XMMWORD[rcx]
     37 	mov	eax,DWORD[240+r8]
     38 	movups	xmm0,XMMWORD[r8]
     39 	movups	xmm1,XMMWORD[16+r8]
     40 	lea	r8,[32+r8]
     41 	xorps	xmm2,xmm0
     42 $L$oop_dec1_2:
     43 DB	102,15,56,222,209
     44 	dec	eax
     45 	movups	xmm1,XMMWORD[r8]
     46 	lea	r8,[16+r8]
     47 	jnz	NEAR $L$oop_dec1_2
     48 DB	102,15,56,223,209
     49 	pxor	xmm0,xmm0
     50 	pxor	xmm1,xmm1
     51 	movups	XMMWORD[rdx],xmm2
     52 	pxor	xmm2,xmm2
     53 	DB	0F3h,0C3h		;repret
     54 
     55 
     56 ALIGN	16
     57 _aesni_encrypt2:
     58 	movups	xmm0,XMMWORD[rcx]
     59 	shl	eax,4
     60 	movups	xmm1,XMMWORD[16+rcx]
     61 	xorps	xmm2,xmm0
     62 	xorps	xmm3,xmm0
     63 	movups	xmm0,XMMWORD[32+rcx]
     64 	lea	rcx,[32+rax*1+rcx]
     65 	neg	rax
     66 	add	rax,16
     67 
     68 $L$enc_loop2:
     69 DB	102,15,56,220,209
     70 DB	102,15,56,220,217
     71 	movups	xmm1,XMMWORD[rax*1+rcx]
     72 	add	rax,32
     73 DB	102,15,56,220,208
     74 DB	102,15,56,220,216
     75 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
     76 	jnz	NEAR $L$enc_loop2
     77 
     78 DB	102,15,56,220,209
     79 DB	102,15,56,220,217
     80 DB	102,15,56,221,208
     81 DB	102,15,56,221,216
     82 	DB	0F3h,0C3h		;repret
     83 
     84 
     85 ALIGN	16
     86 _aesni_decrypt2:
     87 	movups	xmm0,XMMWORD[rcx]
     88 	shl	eax,4
     89 	movups	xmm1,XMMWORD[16+rcx]
     90 	xorps	xmm2,xmm0
     91 	xorps	xmm3,xmm0
     92 	movups	xmm0,XMMWORD[32+rcx]
     93 	lea	rcx,[32+rax*1+rcx]
     94 	neg	rax
     95 	add	rax,16
     96 
     97 $L$dec_loop2:
     98 DB	102,15,56,222,209
     99 DB	102,15,56,222,217
    100 	movups	xmm1,XMMWORD[rax*1+rcx]
    101 	add	rax,32
    102 DB	102,15,56,222,208
    103 DB	102,15,56,222,216
    104 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    105 	jnz	NEAR $L$dec_loop2
    106 
    107 DB	102,15,56,222,209
    108 DB	102,15,56,222,217
    109 DB	102,15,56,223,208
    110 DB	102,15,56,223,216
    111 	DB	0F3h,0C3h		;repret
    112 
    113 
    114 ALIGN	16
    115 _aesni_encrypt3:
    116 	movups	xmm0,XMMWORD[rcx]
    117 	shl	eax,4
    118 	movups	xmm1,XMMWORD[16+rcx]
    119 	xorps	xmm2,xmm0
    120 	xorps	xmm3,xmm0
    121 	xorps	xmm4,xmm0
    122 	movups	xmm0,XMMWORD[32+rcx]
    123 	lea	rcx,[32+rax*1+rcx]
    124 	neg	rax
    125 	add	rax,16
    126 
    127 $L$enc_loop3:
    128 DB	102,15,56,220,209
    129 DB	102,15,56,220,217
    130 DB	102,15,56,220,225
    131 	movups	xmm1,XMMWORD[rax*1+rcx]
    132 	add	rax,32
    133 DB	102,15,56,220,208
    134 DB	102,15,56,220,216
    135 DB	102,15,56,220,224
    136 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    137 	jnz	NEAR $L$enc_loop3
    138 
    139 DB	102,15,56,220,209
    140 DB	102,15,56,220,217
    141 DB	102,15,56,220,225
    142 DB	102,15,56,221,208
    143 DB	102,15,56,221,216
    144 DB	102,15,56,221,224
    145 	DB	0F3h,0C3h		;repret
    146 
    147 
    148 ALIGN	16
    149 _aesni_decrypt3:
    150 	movups	xmm0,XMMWORD[rcx]
    151 	shl	eax,4
    152 	movups	xmm1,XMMWORD[16+rcx]
    153 	xorps	xmm2,xmm0
    154 	xorps	xmm3,xmm0
    155 	xorps	xmm4,xmm0
    156 	movups	xmm0,XMMWORD[32+rcx]
    157 	lea	rcx,[32+rax*1+rcx]
    158 	neg	rax
    159 	add	rax,16
    160 
    161 $L$dec_loop3:
    162 DB	102,15,56,222,209
    163 DB	102,15,56,222,217
    164 DB	102,15,56,222,225
    165 	movups	xmm1,XMMWORD[rax*1+rcx]
    166 	add	rax,32
    167 DB	102,15,56,222,208
    168 DB	102,15,56,222,216
    169 DB	102,15,56,222,224
    170 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    171 	jnz	NEAR $L$dec_loop3
    172 
    173 DB	102,15,56,222,209
    174 DB	102,15,56,222,217
    175 DB	102,15,56,222,225
    176 DB	102,15,56,223,208
    177 DB	102,15,56,223,216
    178 DB	102,15,56,223,224
    179 	DB	0F3h,0C3h		;repret
    180 
    181 
    182 ALIGN	16
    183 _aesni_encrypt4:
    184 	movups	xmm0,XMMWORD[rcx]
    185 	shl	eax,4
    186 	movups	xmm1,XMMWORD[16+rcx]
    187 	xorps	xmm2,xmm0
    188 	xorps	xmm3,xmm0
    189 	xorps	xmm4,xmm0
    190 	xorps	xmm5,xmm0
    191 	movups	xmm0,XMMWORD[32+rcx]
    192 	lea	rcx,[32+rax*1+rcx]
    193 	neg	rax
    194 DB	0x0f,0x1f,0x00
    195 	add	rax,16
    196 
    197 $L$enc_loop4:
    198 DB	102,15,56,220,209
    199 DB	102,15,56,220,217
    200 DB	102,15,56,220,225
    201 DB	102,15,56,220,233
    202 	movups	xmm1,XMMWORD[rax*1+rcx]
    203 	add	rax,32
    204 DB	102,15,56,220,208
    205 DB	102,15,56,220,216
    206 DB	102,15,56,220,224
    207 DB	102,15,56,220,232
    208 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    209 	jnz	NEAR $L$enc_loop4
    210 
    211 DB	102,15,56,220,209
    212 DB	102,15,56,220,217
    213 DB	102,15,56,220,225
    214 DB	102,15,56,220,233
    215 DB	102,15,56,221,208
    216 DB	102,15,56,221,216
    217 DB	102,15,56,221,224
    218 DB	102,15,56,221,232
    219 	DB	0F3h,0C3h		;repret
    220 
    221 
    222 ALIGN	16
    223 _aesni_decrypt4:
    224 	movups	xmm0,XMMWORD[rcx]
    225 	shl	eax,4
    226 	movups	xmm1,XMMWORD[16+rcx]
    227 	xorps	xmm2,xmm0
    228 	xorps	xmm3,xmm0
    229 	xorps	xmm4,xmm0
    230 	xorps	xmm5,xmm0
    231 	movups	xmm0,XMMWORD[32+rcx]
    232 	lea	rcx,[32+rax*1+rcx]
    233 	neg	rax
    234 DB	0x0f,0x1f,0x00
    235 	add	rax,16
    236 
    237 $L$dec_loop4:
    238 DB	102,15,56,222,209
    239 DB	102,15,56,222,217
    240 DB	102,15,56,222,225
    241 DB	102,15,56,222,233
    242 	movups	xmm1,XMMWORD[rax*1+rcx]
    243 	add	rax,32
    244 DB	102,15,56,222,208
    245 DB	102,15,56,222,216
    246 DB	102,15,56,222,224
    247 DB	102,15,56,222,232
    248 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    249 	jnz	NEAR $L$dec_loop4
    250 
    251 DB	102,15,56,222,209
    252 DB	102,15,56,222,217
    253 DB	102,15,56,222,225
    254 DB	102,15,56,222,233
    255 DB	102,15,56,223,208
    256 DB	102,15,56,223,216
    257 DB	102,15,56,223,224
    258 DB	102,15,56,223,232
    259 	DB	0F3h,0C3h		;repret
    260 
    261 
    262 ALIGN	16
    263 _aesni_encrypt6:
    264 	movups	xmm0,XMMWORD[rcx]
    265 	shl	eax,4
    266 	movups	xmm1,XMMWORD[16+rcx]
    267 	xorps	xmm2,xmm0
    268 	pxor	xmm3,xmm0
    269 	pxor	xmm4,xmm0
    270 DB	102,15,56,220,209
    271 	lea	rcx,[32+rax*1+rcx]
    272 	neg	rax
    273 DB	102,15,56,220,217
    274 	pxor	xmm5,xmm0
    275 	pxor	xmm6,xmm0
    276 DB	102,15,56,220,225
    277 	pxor	xmm7,xmm0
    278 	movups	xmm0,XMMWORD[rax*1+rcx]
    279 	add	rax,16
    280 	jmp	NEAR $L$enc_loop6_enter
    281 ALIGN	16
    282 $L$enc_loop6:
    283 DB	102,15,56,220,209
    284 DB	102,15,56,220,217
    285 DB	102,15,56,220,225
    286 $L$enc_loop6_enter:
    287 DB	102,15,56,220,233
    288 DB	102,15,56,220,241
    289 DB	102,15,56,220,249
    290 	movups	xmm1,XMMWORD[rax*1+rcx]
    291 	add	rax,32
    292 DB	102,15,56,220,208
    293 DB	102,15,56,220,216
    294 DB	102,15,56,220,224
    295 DB	102,15,56,220,232
    296 DB	102,15,56,220,240
    297 DB	102,15,56,220,248
    298 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    299 	jnz	NEAR $L$enc_loop6
    300 
    301 DB	102,15,56,220,209
    302 DB	102,15,56,220,217
    303 DB	102,15,56,220,225
    304 DB	102,15,56,220,233
    305 DB	102,15,56,220,241
    306 DB	102,15,56,220,249
    307 DB	102,15,56,221,208
    308 DB	102,15,56,221,216
    309 DB	102,15,56,221,224
    310 DB	102,15,56,221,232
    311 DB	102,15,56,221,240
    312 DB	102,15,56,221,248
    313 	DB	0F3h,0C3h		;repret
    314 
    315 
    316 ALIGN	16
    317 _aesni_decrypt6:
    318 	movups	xmm0,XMMWORD[rcx]
    319 	shl	eax,4
    320 	movups	xmm1,XMMWORD[16+rcx]
    321 	xorps	xmm2,xmm0
    322 	pxor	xmm3,xmm0
    323 	pxor	xmm4,xmm0
    324 DB	102,15,56,222,209
    325 	lea	rcx,[32+rax*1+rcx]
    326 	neg	rax
    327 DB	102,15,56,222,217
    328 	pxor	xmm5,xmm0
    329 	pxor	xmm6,xmm0
    330 DB	102,15,56,222,225
    331 	pxor	xmm7,xmm0
    332 	movups	xmm0,XMMWORD[rax*1+rcx]
    333 	add	rax,16
    334 	jmp	NEAR $L$dec_loop6_enter
    335 ALIGN	16
    336 $L$dec_loop6:
    337 DB	102,15,56,222,209
    338 DB	102,15,56,222,217
    339 DB	102,15,56,222,225
    340 $L$dec_loop6_enter:
    341 DB	102,15,56,222,233
    342 DB	102,15,56,222,241
    343 DB	102,15,56,222,249
    344 	movups	xmm1,XMMWORD[rax*1+rcx]
    345 	add	rax,32
    346 DB	102,15,56,222,208
    347 DB	102,15,56,222,216
    348 DB	102,15,56,222,224
    349 DB	102,15,56,222,232
    350 DB	102,15,56,222,240
    351 DB	102,15,56,222,248
    352 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    353 	jnz	NEAR $L$dec_loop6
    354 
    355 DB	102,15,56,222,209
    356 DB	102,15,56,222,217
    357 DB	102,15,56,222,225
    358 DB	102,15,56,222,233
    359 DB	102,15,56,222,241
    360 DB	102,15,56,222,249
    361 DB	102,15,56,223,208
    362 DB	102,15,56,223,216
    363 DB	102,15,56,223,224
    364 DB	102,15,56,223,232
    365 DB	102,15,56,223,240
    366 DB	102,15,56,223,248
    367 	DB	0F3h,0C3h		;repret
    368 
    369 
    370 ALIGN	16
    371 _aesni_encrypt8:
    372 	movups	xmm0,XMMWORD[rcx]
    373 	shl	eax,4
    374 	movups	xmm1,XMMWORD[16+rcx]
    375 	xorps	xmm2,xmm0
    376 	xorps	xmm3,xmm0
    377 	pxor	xmm4,xmm0
    378 	pxor	xmm5,xmm0
    379 	pxor	xmm6,xmm0
    380 	lea	rcx,[32+rax*1+rcx]
    381 	neg	rax
    382 DB	102,15,56,220,209
    383 	pxor	xmm7,xmm0
    384 	pxor	xmm8,xmm0
    385 DB	102,15,56,220,217
    386 	pxor	xmm9,xmm0
    387 	movups	xmm0,XMMWORD[rax*1+rcx]
    388 	add	rax,16
    389 	jmp	NEAR $L$enc_loop8_inner
    390 ALIGN	16
    391 $L$enc_loop8:
    392 DB	102,15,56,220,209
    393 DB	102,15,56,220,217
    394 $L$enc_loop8_inner:
    395 DB	102,15,56,220,225
    396 DB	102,15,56,220,233
    397 DB	102,15,56,220,241
    398 DB	102,15,56,220,249
    399 DB	102,68,15,56,220,193
    400 DB	102,68,15,56,220,201
    401 $L$enc_loop8_enter:
    402 	movups	xmm1,XMMWORD[rax*1+rcx]
    403 	add	rax,32
    404 DB	102,15,56,220,208
    405 DB	102,15,56,220,216
    406 DB	102,15,56,220,224
    407 DB	102,15,56,220,232
    408 DB	102,15,56,220,240
    409 DB	102,15,56,220,248
    410 DB	102,68,15,56,220,192
    411 DB	102,68,15,56,220,200
    412 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    413 	jnz	NEAR $L$enc_loop8
    414 
    415 DB	102,15,56,220,209
    416 DB	102,15,56,220,217
    417 DB	102,15,56,220,225
    418 DB	102,15,56,220,233
    419 DB	102,15,56,220,241
    420 DB	102,15,56,220,249
    421 DB	102,68,15,56,220,193
    422 DB	102,68,15,56,220,201
    423 DB	102,15,56,221,208
    424 DB	102,15,56,221,216
    425 DB	102,15,56,221,224
    426 DB	102,15,56,221,232
    427 DB	102,15,56,221,240
    428 DB	102,15,56,221,248
    429 DB	102,68,15,56,221,192
    430 DB	102,68,15,56,221,200
    431 	DB	0F3h,0C3h		;repret
    432 
    433 
    434 ALIGN	16
    435 _aesni_decrypt8:
    436 	movups	xmm0,XMMWORD[rcx]
    437 	shl	eax,4
    438 	movups	xmm1,XMMWORD[16+rcx]
    439 	xorps	xmm2,xmm0
    440 	xorps	xmm3,xmm0
    441 	pxor	xmm4,xmm0
    442 	pxor	xmm5,xmm0
    443 	pxor	xmm6,xmm0
    444 	lea	rcx,[32+rax*1+rcx]
    445 	neg	rax
    446 DB	102,15,56,222,209
    447 	pxor	xmm7,xmm0
    448 	pxor	xmm8,xmm0
    449 DB	102,15,56,222,217
    450 	pxor	xmm9,xmm0
    451 	movups	xmm0,XMMWORD[rax*1+rcx]
    452 	add	rax,16
    453 	jmp	NEAR $L$dec_loop8_inner
    454 ALIGN	16
    455 $L$dec_loop8:
    456 DB	102,15,56,222,209
    457 DB	102,15,56,222,217
    458 $L$dec_loop8_inner:
    459 DB	102,15,56,222,225
    460 DB	102,15,56,222,233
    461 DB	102,15,56,222,241
    462 DB	102,15,56,222,249
    463 DB	102,68,15,56,222,193
    464 DB	102,68,15,56,222,201
    465 $L$dec_loop8_enter:
    466 	movups	xmm1,XMMWORD[rax*1+rcx]
    467 	add	rax,32
    468 DB	102,15,56,222,208
    469 DB	102,15,56,222,216
    470 DB	102,15,56,222,224
    471 DB	102,15,56,222,232
    472 DB	102,15,56,222,240
    473 DB	102,15,56,222,248
    474 DB	102,68,15,56,222,192
    475 DB	102,68,15,56,222,200
    476 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    477 	jnz	NEAR $L$dec_loop8
    478 
    479 DB	102,15,56,222,209
    480 DB	102,15,56,222,217
    481 DB	102,15,56,222,225
    482 DB	102,15,56,222,233
    483 DB	102,15,56,222,241
    484 DB	102,15,56,222,249
    485 DB	102,68,15,56,222,193
    486 DB	102,68,15,56,222,201
    487 DB	102,15,56,223,208
    488 DB	102,15,56,223,216
    489 DB	102,15,56,223,224
    490 DB	102,15,56,223,232
    491 DB	102,15,56,223,240
    492 DB	102,15,56,223,248
    493 DB	102,68,15,56,223,192
    494 DB	102,68,15,56,223,200
    495 	DB	0F3h,0C3h		;repret
    496 
    497 global	aesni_ecb_encrypt
    498 
    499 ALIGN	16
    500 aesni_ecb_encrypt:
    501 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    502 	mov	QWORD[16+rsp],rsi
    503 	mov	rax,rsp
    504 $L$SEH_begin_aesni_ecb_encrypt:
    505 	mov	rdi,rcx
    506 	mov	rsi,rdx
    507 	mov	rdx,r8
    508 	mov	rcx,r9
    509 	mov	r8,QWORD[40+rsp]
    510 
    511 
    512 	lea	rsp,[((-88))+rsp]
    513 	movaps	XMMWORD[rsp],xmm6
    514 	movaps	XMMWORD[16+rsp],xmm7
    515 	movaps	XMMWORD[32+rsp],xmm8
    516 	movaps	XMMWORD[48+rsp],xmm9
    517 $L$ecb_enc_body:
    518 	and	rdx,-16
    519 	jz	NEAR $L$ecb_ret
    520 
    521 	mov	eax,DWORD[240+rcx]
    522 	movups	xmm0,XMMWORD[rcx]
    523 	mov	r11,rcx
    524 	mov	r10d,eax
    525 	test	r8d,r8d
    526 	jz	NEAR $L$ecb_decrypt
    527 
    528 	cmp	rdx,0x80
    529 	jb	NEAR $L$ecb_enc_tail
    530 
    531 	movdqu	xmm2,XMMWORD[rdi]
    532 	movdqu	xmm3,XMMWORD[16+rdi]
    533 	movdqu	xmm4,XMMWORD[32+rdi]
    534 	movdqu	xmm5,XMMWORD[48+rdi]
    535 	movdqu	xmm6,XMMWORD[64+rdi]
    536 	movdqu	xmm7,XMMWORD[80+rdi]
    537 	movdqu	xmm8,XMMWORD[96+rdi]
    538 	movdqu	xmm9,XMMWORD[112+rdi]
    539 	lea	rdi,[128+rdi]
    540 	sub	rdx,0x80
    541 	jmp	NEAR $L$ecb_enc_loop8_enter
    542 ALIGN	16
    543 $L$ecb_enc_loop8:
    544 	movups	XMMWORD[rsi],xmm2
    545 	mov	rcx,r11
    546 	movdqu	xmm2,XMMWORD[rdi]
    547 	mov	eax,r10d
    548 	movups	XMMWORD[16+rsi],xmm3
    549 	movdqu	xmm3,XMMWORD[16+rdi]
    550 	movups	XMMWORD[32+rsi],xmm4
    551 	movdqu	xmm4,XMMWORD[32+rdi]
    552 	movups	XMMWORD[48+rsi],xmm5
    553 	movdqu	xmm5,XMMWORD[48+rdi]
    554 	movups	XMMWORD[64+rsi],xmm6
    555 	movdqu	xmm6,XMMWORD[64+rdi]
    556 	movups	XMMWORD[80+rsi],xmm7
    557 	movdqu	xmm7,XMMWORD[80+rdi]
    558 	movups	XMMWORD[96+rsi],xmm8
    559 	movdqu	xmm8,XMMWORD[96+rdi]
    560 	movups	XMMWORD[112+rsi],xmm9
    561 	lea	rsi,[128+rsi]
    562 	movdqu	xmm9,XMMWORD[112+rdi]
    563 	lea	rdi,[128+rdi]
    564 $L$ecb_enc_loop8_enter:
    565 
    566 	call	_aesni_encrypt8
    567 
    568 	sub	rdx,0x80
    569 	jnc	NEAR $L$ecb_enc_loop8
    570 
    571 	movups	XMMWORD[rsi],xmm2
    572 	mov	rcx,r11
    573 	movups	XMMWORD[16+rsi],xmm3
    574 	mov	eax,r10d
    575 	movups	XMMWORD[32+rsi],xmm4
    576 	movups	XMMWORD[48+rsi],xmm5
    577 	movups	XMMWORD[64+rsi],xmm6
    578 	movups	XMMWORD[80+rsi],xmm7
    579 	movups	XMMWORD[96+rsi],xmm8
    580 	movups	XMMWORD[112+rsi],xmm9
    581 	lea	rsi,[128+rsi]
    582 	add	rdx,0x80
    583 	jz	NEAR $L$ecb_ret
    584 
    585 $L$ecb_enc_tail:
    586 	movups	xmm2,XMMWORD[rdi]
    587 	cmp	rdx,0x20
    588 	jb	NEAR $L$ecb_enc_one
    589 	movups	xmm3,XMMWORD[16+rdi]
    590 	je	NEAR $L$ecb_enc_two
    591 	movups	xmm4,XMMWORD[32+rdi]
    592 	cmp	rdx,0x40
    593 	jb	NEAR $L$ecb_enc_three
    594 	movups	xmm5,XMMWORD[48+rdi]
    595 	je	NEAR $L$ecb_enc_four
    596 	movups	xmm6,XMMWORD[64+rdi]
    597 	cmp	rdx,0x60
    598 	jb	NEAR $L$ecb_enc_five
    599 	movups	xmm7,XMMWORD[80+rdi]
    600 	je	NEAR $L$ecb_enc_six
    601 	movdqu	xmm8,XMMWORD[96+rdi]
    602 	xorps	xmm9,xmm9
    603 	call	_aesni_encrypt8
    604 	movups	XMMWORD[rsi],xmm2
    605 	movups	XMMWORD[16+rsi],xmm3
    606 	movups	XMMWORD[32+rsi],xmm4
    607 	movups	XMMWORD[48+rsi],xmm5
    608 	movups	XMMWORD[64+rsi],xmm6
    609 	movups	XMMWORD[80+rsi],xmm7
    610 	movups	XMMWORD[96+rsi],xmm8
    611 	jmp	NEAR $L$ecb_ret
    612 ALIGN	16
    613 $L$ecb_enc_one:
    614 	movups	xmm0,XMMWORD[rcx]
    615 	movups	xmm1,XMMWORD[16+rcx]
    616 	lea	rcx,[32+rcx]
    617 	xorps	xmm2,xmm0
    618 $L$oop_enc1_3:
    619 DB	102,15,56,220,209
    620 	dec	eax
    621 	movups	xmm1,XMMWORD[rcx]
    622 	lea	rcx,[16+rcx]
    623 	jnz	NEAR $L$oop_enc1_3
    624 DB	102,15,56,221,209
    625 	movups	XMMWORD[rsi],xmm2
    626 	jmp	NEAR $L$ecb_ret
    627 ALIGN	16
    628 $L$ecb_enc_two:
    629 	call	_aesni_encrypt2
    630 	movups	XMMWORD[rsi],xmm2
    631 	movups	XMMWORD[16+rsi],xmm3
    632 	jmp	NEAR $L$ecb_ret
    633 ALIGN	16
    634 $L$ecb_enc_three:
    635 	call	_aesni_encrypt3
    636 	movups	XMMWORD[rsi],xmm2
    637 	movups	XMMWORD[16+rsi],xmm3
    638 	movups	XMMWORD[32+rsi],xmm4
    639 	jmp	NEAR $L$ecb_ret
    640 ALIGN	16
    641 $L$ecb_enc_four:
    642 	call	_aesni_encrypt4
    643 	movups	XMMWORD[rsi],xmm2
    644 	movups	XMMWORD[16+rsi],xmm3
    645 	movups	XMMWORD[32+rsi],xmm4
    646 	movups	XMMWORD[48+rsi],xmm5
    647 	jmp	NEAR $L$ecb_ret
    648 ALIGN	16
    649 $L$ecb_enc_five:
    650 	xorps	xmm7,xmm7
    651 	call	_aesni_encrypt6
    652 	movups	XMMWORD[rsi],xmm2
    653 	movups	XMMWORD[16+rsi],xmm3
    654 	movups	XMMWORD[32+rsi],xmm4
    655 	movups	XMMWORD[48+rsi],xmm5
    656 	movups	XMMWORD[64+rsi],xmm6
    657 	jmp	NEAR $L$ecb_ret
    658 ALIGN	16
    659 $L$ecb_enc_six:
    660 	call	_aesni_encrypt6
    661 	movups	XMMWORD[rsi],xmm2
    662 	movups	XMMWORD[16+rsi],xmm3
    663 	movups	XMMWORD[32+rsi],xmm4
    664 	movups	XMMWORD[48+rsi],xmm5
    665 	movups	XMMWORD[64+rsi],xmm6
    666 	movups	XMMWORD[80+rsi],xmm7
    667 	jmp	NEAR $L$ecb_ret
    668 
    669 ALIGN	16
    670 $L$ecb_decrypt:
    671 	cmp	rdx,0x80
    672 	jb	NEAR $L$ecb_dec_tail
    673 
    674 	movdqu	xmm2,XMMWORD[rdi]
    675 	movdqu	xmm3,XMMWORD[16+rdi]
    676 	movdqu	xmm4,XMMWORD[32+rdi]
    677 	movdqu	xmm5,XMMWORD[48+rdi]
    678 	movdqu	xmm6,XMMWORD[64+rdi]
    679 	movdqu	xmm7,XMMWORD[80+rdi]
    680 	movdqu	xmm8,XMMWORD[96+rdi]
    681 	movdqu	xmm9,XMMWORD[112+rdi]
    682 	lea	rdi,[128+rdi]
    683 	sub	rdx,0x80
    684 	jmp	NEAR $L$ecb_dec_loop8_enter
    685 ALIGN	16
    686 $L$ecb_dec_loop8:
    687 	movups	XMMWORD[rsi],xmm2
    688 	mov	rcx,r11
    689 	movdqu	xmm2,XMMWORD[rdi]
    690 	mov	eax,r10d
    691 	movups	XMMWORD[16+rsi],xmm3
    692 	movdqu	xmm3,XMMWORD[16+rdi]
    693 	movups	XMMWORD[32+rsi],xmm4
    694 	movdqu	xmm4,XMMWORD[32+rdi]
    695 	movups	XMMWORD[48+rsi],xmm5
    696 	movdqu	xmm5,XMMWORD[48+rdi]
    697 	movups	XMMWORD[64+rsi],xmm6
    698 	movdqu	xmm6,XMMWORD[64+rdi]
    699 	movups	XMMWORD[80+rsi],xmm7
    700 	movdqu	xmm7,XMMWORD[80+rdi]
    701 	movups	XMMWORD[96+rsi],xmm8
    702 	movdqu	xmm8,XMMWORD[96+rdi]
    703 	movups	XMMWORD[112+rsi],xmm9
    704 	lea	rsi,[128+rsi]
    705 	movdqu	xmm9,XMMWORD[112+rdi]
    706 	lea	rdi,[128+rdi]
    707 $L$ecb_dec_loop8_enter:
    708 
    709 	call	_aesni_decrypt8
    710 
    711 	movups	xmm0,XMMWORD[r11]
    712 	sub	rdx,0x80
    713 	jnc	NEAR $L$ecb_dec_loop8
    714 
    715 	movups	XMMWORD[rsi],xmm2
    716 	pxor	xmm2,xmm2
    717 	mov	rcx,r11
    718 	movups	XMMWORD[16+rsi],xmm3
    719 	pxor	xmm3,xmm3
    720 	mov	eax,r10d
    721 	movups	XMMWORD[32+rsi],xmm4
    722 	pxor	xmm4,xmm4
    723 	movups	XMMWORD[48+rsi],xmm5
    724 	pxor	xmm5,xmm5
    725 	movups	XMMWORD[64+rsi],xmm6
    726 	pxor	xmm6,xmm6
    727 	movups	XMMWORD[80+rsi],xmm7
    728 	pxor	xmm7,xmm7
    729 	movups	XMMWORD[96+rsi],xmm8
    730 	pxor	xmm8,xmm8
    731 	movups	XMMWORD[112+rsi],xmm9
    732 	pxor	xmm9,xmm9
    733 	lea	rsi,[128+rsi]
    734 	add	rdx,0x80
    735 	jz	NEAR $L$ecb_ret
    736 
    737 $L$ecb_dec_tail:
    738 	movups	xmm2,XMMWORD[rdi]
    739 	cmp	rdx,0x20
    740 	jb	NEAR $L$ecb_dec_one
    741 	movups	xmm3,XMMWORD[16+rdi]
    742 	je	NEAR $L$ecb_dec_two
    743 	movups	xmm4,XMMWORD[32+rdi]
    744 	cmp	rdx,0x40
    745 	jb	NEAR $L$ecb_dec_three
    746 	movups	xmm5,XMMWORD[48+rdi]
    747 	je	NEAR $L$ecb_dec_four
    748 	movups	xmm6,XMMWORD[64+rdi]
    749 	cmp	rdx,0x60
    750 	jb	NEAR $L$ecb_dec_five
    751 	movups	xmm7,XMMWORD[80+rdi]
    752 	je	NEAR $L$ecb_dec_six
    753 	movups	xmm8,XMMWORD[96+rdi]
    754 	movups	xmm0,XMMWORD[rcx]
    755 	xorps	xmm9,xmm9
    756 	call	_aesni_decrypt8
    757 	movups	XMMWORD[rsi],xmm2
    758 	pxor	xmm2,xmm2
    759 	movups	XMMWORD[16+rsi],xmm3
    760 	pxor	xmm3,xmm3
    761 	movups	XMMWORD[32+rsi],xmm4
    762 	pxor	xmm4,xmm4
    763 	movups	XMMWORD[48+rsi],xmm5
    764 	pxor	xmm5,xmm5
    765 	movups	XMMWORD[64+rsi],xmm6
    766 	pxor	xmm6,xmm6
    767 	movups	XMMWORD[80+rsi],xmm7
    768 	pxor	xmm7,xmm7
    769 	movups	XMMWORD[96+rsi],xmm8
    770 	pxor	xmm8,xmm8
    771 	pxor	xmm9,xmm9
    772 	jmp	NEAR $L$ecb_ret
    773 ALIGN	16
    774 $L$ecb_dec_one:
    775 	movups	xmm0,XMMWORD[rcx]
    776 	movups	xmm1,XMMWORD[16+rcx]
    777 	lea	rcx,[32+rcx]
    778 	xorps	xmm2,xmm0
    779 $L$oop_dec1_4:
    780 DB	102,15,56,222,209
    781 	dec	eax
    782 	movups	xmm1,XMMWORD[rcx]
    783 	lea	rcx,[16+rcx]
    784 	jnz	NEAR $L$oop_dec1_4
    785 DB	102,15,56,223,209
    786 	movups	XMMWORD[rsi],xmm2
    787 	pxor	xmm2,xmm2
    788 	jmp	NEAR $L$ecb_ret
    789 ALIGN	16
    790 $L$ecb_dec_two:
    791 	call	_aesni_decrypt2
    792 	movups	XMMWORD[rsi],xmm2
    793 	pxor	xmm2,xmm2
    794 	movups	XMMWORD[16+rsi],xmm3
    795 	pxor	xmm3,xmm3
    796 	jmp	NEAR $L$ecb_ret
    797 ALIGN	16
    798 $L$ecb_dec_three:
    799 	call	_aesni_decrypt3
    800 	movups	XMMWORD[rsi],xmm2
    801 	pxor	xmm2,xmm2
    802 	movups	XMMWORD[16+rsi],xmm3
    803 	pxor	xmm3,xmm3
    804 	movups	XMMWORD[32+rsi],xmm4
    805 	pxor	xmm4,xmm4
    806 	jmp	NEAR $L$ecb_ret
    807 ALIGN	16
    808 $L$ecb_dec_four:
    809 	call	_aesni_decrypt4
    810 	movups	XMMWORD[rsi],xmm2
    811 	pxor	xmm2,xmm2
    812 	movups	XMMWORD[16+rsi],xmm3
    813 	pxor	xmm3,xmm3
    814 	movups	XMMWORD[32+rsi],xmm4
    815 	pxor	xmm4,xmm4
    816 	movups	XMMWORD[48+rsi],xmm5
    817 	pxor	xmm5,xmm5
    818 	jmp	NEAR $L$ecb_ret
    819 ALIGN	16
    820 $L$ecb_dec_five:
    821 	xorps	xmm7,xmm7
    822 	call	_aesni_decrypt6
    823 	movups	XMMWORD[rsi],xmm2
    824 	pxor	xmm2,xmm2
    825 	movups	XMMWORD[16+rsi],xmm3
    826 	pxor	xmm3,xmm3
    827 	movups	XMMWORD[32+rsi],xmm4
    828 	pxor	xmm4,xmm4
    829 	movups	XMMWORD[48+rsi],xmm5
    830 	pxor	xmm5,xmm5
    831 	movups	XMMWORD[64+rsi],xmm6
    832 	pxor	xmm6,xmm6
    833 	pxor	xmm7,xmm7
    834 	jmp	NEAR $L$ecb_ret
    835 ALIGN	16
    836 $L$ecb_dec_six:
    837 	call	_aesni_decrypt6
    838 	movups	XMMWORD[rsi],xmm2
    839 	pxor	xmm2,xmm2
    840 	movups	XMMWORD[16+rsi],xmm3
    841 	pxor	xmm3,xmm3
    842 	movups	XMMWORD[32+rsi],xmm4
    843 	pxor	xmm4,xmm4
    844 	movups	XMMWORD[48+rsi],xmm5
    845 	pxor	xmm5,xmm5
    846 	movups	XMMWORD[64+rsi],xmm6
    847 	pxor	xmm6,xmm6
    848 	movups	XMMWORD[80+rsi],xmm7
    849 	pxor	xmm7,xmm7
    850 
    851 $L$ecb_ret:
    852 	xorps	xmm0,xmm0
    853 	pxor	xmm1,xmm1
    854 	movaps	xmm6,XMMWORD[rsp]
    855 	movaps	XMMWORD[rsp],xmm0
    856 	movaps	xmm7,XMMWORD[16+rsp]
    857 	movaps	XMMWORD[16+rsp],xmm0
    858 	movaps	xmm8,XMMWORD[32+rsp]
    859 	movaps	XMMWORD[32+rsp],xmm0
    860 	movaps	xmm9,XMMWORD[48+rsp]
    861 	movaps	XMMWORD[48+rsp],xmm0
    862 	lea	rsp,[88+rsp]
    863 $L$ecb_enc_ret:
    864 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    865 	mov	rsi,QWORD[16+rsp]
    866 	DB	0F3h,0C3h		;repret
    867 $L$SEH_end_aesni_ecb_encrypt:
    868 global	aesni_ccm64_encrypt_blocks
    869 
    870 ALIGN	16
    871 aesni_ccm64_encrypt_blocks:
    872 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    873 	mov	QWORD[16+rsp],rsi
    874 	mov	rax,rsp
    875 $L$SEH_begin_aesni_ccm64_encrypt_blocks:
    876 	mov	rdi,rcx
    877 	mov	rsi,rdx
    878 	mov	rdx,r8
    879 	mov	rcx,r9
    880 	mov	r8,QWORD[40+rsp]
    881 	mov	r9,QWORD[48+rsp]
    882 
    883 
    884 	lea	rsp,[((-88))+rsp]
    885 	movaps	XMMWORD[rsp],xmm6
    886 	movaps	XMMWORD[16+rsp],xmm7
    887 	movaps	XMMWORD[32+rsp],xmm8
    888 	movaps	XMMWORD[48+rsp],xmm9
    889 $L$ccm64_enc_body:
    890 	mov	eax,DWORD[240+rcx]
    891 	movdqu	xmm6,XMMWORD[r8]
    892 	movdqa	xmm9,XMMWORD[$L$increment64]
    893 	movdqa	xmm7,XMMWORD[$L$bswap_mask]
    894 
    895 	shl	eax,4
    896 	mov	r10d,16
    897 	lea	r11,[rcx]
    898 	movdqu	xmm3,XMMWORD[r9]
    899 	movdqa	xmm2,xmm6
    900 	lea	rcx,[32+rax*1+rcx]
    901 DB	102,15,56,0,247
    902 	sub	r10,rax
    903 	jmp	NEAR $L$ccm64_enc_outer
    904 ALIGN	16
    905 $L$ccm64_enc_outer:
    906 	movups	xmm0,XMMWORD[r11]
    907 	mov	rax,r10
    908 	movups	xmm8,XMMWORD[rdi]
    909 
    910 	xorps	xmm2,xmm0
    911 	movups	xmm1,XMMWORD[16+r11]
    912 	xorps	xmm0,xmm8
    913 	xorps	xmm3,xmm0
    914 	movups	xmm0,XMMWORD[32+r11]
    915 
    916 $L$ccm64_enc2_loop:
    917 DB	102,15,56,220,209
    918 DB	102,15,56,220,217
    919 	movups	xmm1,XMMWORD[rax*1+rcx]
    920 	add	rax,32
    921 DB	102,15,56,220,208
    922 DB	102,15,56,220,216
    923 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
    924 	jnz	NEAR $L$ccm64_enc2_loop
    925 DB	102,15,56,220,209
    926 DB	102,15,56,220,217
    927 	paddq	xmm6,xmm9
    928 	dec	rdx
    929 DB	102,15,56,221,208
    930 DB	102,15,56,221,216
    931 
    932 	lea	rdi,[16+rdi]
    933 	xorps	xmm8,xmm2
    934 	movdqa	xmm2,xmm6
    935 	movups	XMMWORD[rsi],xmm8
    936 DB	102,15,56,0,215
    937 	lea	rsi,[16+rsi]
    938 	jnz	NEAR $L$ccm64_enc_outer
    939 
    940 	pxor	xmm0,xmm0
    941 	pxor	xmm1,xmm1
    942 	pxor	xmm2,xmm2
    943 	movups	XMMWORD[r9],xmm3
    944 	pxor	xmm3,xmm3
    945 	pxor	xmm8,xmm8
    946 	pxor	xmm6,xmm6
    947 	movaps	xmm6,XMMWORD[rsp]
    948 	movaps	XMMWORD[rsp],xmm0
    949 	movaps	xmm7,XMMWORD[16+rsp]
    950 	movaps	XMMWORD[16+rsp],xmm0
    951 	movaps	xmm8,XMMWORD[32+rsp]
    952 	movaps	XMMWORD[32+rsp],xmm0
    953 	movaps	xmm9,XMMWORD[48+rsp]
    954 	movaps	XMMWORD[48+rsp],xmm0
    955 	lea	rsp,[88+rsp]
    956 $L$ccm64_enc_ret:
    957 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    958 	mov	rsi,QWORD[16+rsp]
    959 	DB	0F3h,0C3h		;repret
    960 $L$SEH_end_aesni_ccm64_encrypt_blocks:
    961 global	aesni_ccm64_decrypt_blocks
    962 
    963 ALIGN	16
    964 aesni_ccm64_decrypt_blocks:
    965 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    966 	mov	QWORD[16+rsp],rsi
    967 	mov	rax,rsp
    968 $L$SEH_begin_aesni_ccm64_decrypt_blocks:
    969 	mov	rdi,rcx
    970 	mov	rsi,rdx
    971 	mov	rdx,r8
    972 	mov	rcx,r9
    973 	mov	r8,QWORD[40+rsp]
    974 	mov	r9,QWORD[48+rsp]
    975 
    976 
    977 	lea	rsp,[((-88))+rsp]
    978 	movaps	XMMWORD[rsp],xmm6
    979 	movaps	XMMWORD[16+rsp],xmm7
    980 	movaps	XMMWORD[32+rsp],xmm8
    981 	movaps	XMMWORD[48+rsp],xmm9
    982 $L$ccm64_dec_body:
    983 	mov	eax,DWORD[240+rcx]
    984 	movups	xmm6,XMMWORD[r8]
    985 	movdqu	xmm3,XMMWORD[r9]
    986 	movdqa	xmm9,XMMWORD[$L$increment64]
    987 	movdqa	xmm7,XMMWORD[$L$bswap_mask]
    988 
    989 	movaps	xmm2,xmm6
    990 	mov	r10d,eax
    991 	mov	r11,rcx
    992 DB	102,15,56,0,247
    993 	movups	xmm0,XMMWORD[rcx]
    994 	movups	xmm1,XMMWORD[16+rcx]
    995 	lea	rcx,[32+rcx]
    996 	xorps	xmm2,xmm0
    997 $L$oop_enc1_5:
    998 DB	102,15,56,220,209
    999 	dec	eax
   1000 	movups	xmm1,XMMWORD[rcx]
   1001 	lea	rcx,[16+rcx]
   1002 	jnz	NEAR $L$oop_enc1_5
   1003 DB	102,15,56,221,209
   1004 	shl	r10d,4
   1005 	mov	eax,16
   1006 	movups	xmm8,XMMWORD[rdi]
   1007 	paddq	xmm6,xmm9
   1008 	lea	rdi,[16+rdi]
   1009 	sub	rax,r10
   1010 	lea	rcx,[32+r10*1+r11]
   1011 	mov	r10,rax
   1012 	jmp	NEAR $L$ccm64_dec_outer
   1013 ALIGN	16
   1014 $L$ccm64_dec_outer:
   1015 	xorps	xmm8,xmm2
   1016 	movdqa	xmm2,xmm6
   1017 	movups	XMMWORD[rsi],xmm8
   1018 	lea	rsi,[16+rsi]
   1019 DB	102,15,56,0,215
   1020 
   1021 	sub	rdx,1
   1022 	jz	NEAR $L$ccm64_dec_break
   1023 
   1024 	movups	xmm0,XMMWORD[r11]
   1025 	mov	rax,r10
   1026 	movups	xmm1,XMMWORD[16+r11]
   1027 	xorps	xmm8,xmm0
   1028 	xorps	xmm2,xmm0
   1029 	xorps	xmm3,xmm8
   1030 	movups	xmm0,XMMWORD[32+r11]
   1031 	jmp	NEAR $L$ccm64_dec2_loop
   1032 ALIGN	16
   1033 $L$ccm64_dec2_loop:
   1034 DB	102,15,56,220,209
   1035 DB	102,15,56,220,217
   1036 	movups	xmm1,XMMWORD[rax*1+rcx]
   1037 	add	rax,32
   1038 DB	102,15,56,220,208
   1039 DB	102,15,56,220,216
   1040 	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
   1041 	jnz	NEAR $L$ccm64_dec2_loop
   1042 	movups	xmm8,XMMWORD[rdi]
   1043 	paddq	xmm6,xmm9
   1044 DB	102,15,56,220,209
   1045 DB	102,15,56,220,217
   1046 DB	102,15,56,221,208
   1047 DB	102,15,56,221,216
   1048 	lea	rdi,[16+rdi]
   1049 	jmp	NEAR $L$ccm64_dec_outer
   1050 
   1051 ALIGN	16
   1052 $L$ccm64_dec_break:
   1053 
   1054 	mov	eax,DWORD[240+r11]
   1055 	movups	xmm0,XMMWORD[r11]
   1056 	movups	xmm1,XMMWORD[16+r11]
   1057 	xorps	xmm8,xmm0
   1058 	lea	r11,[32+r11]
   1059 	xorps	xmm3,xmm8
   1060 $L$oop_enc1_6:
   1061 DB	102,15,56,220,217
   1062 	dec	eax
   1063 	movups	xmm1,XMMWORD[r11]
   1064 	lea	r11,[16+r11]
   1065 	jnz	NEAR $L$oop_enc1_6
   1066 DB	102,15,56,221,217
   1067 	pxor	xmm0,xmm0
   1068 	pxor	xmm1,xmm1
   1069 	pxor	xmm2,xmm2
   1070 	movups	XMMWORD[r9],xmm3
   1071 	pxor	xmm3,xmm3
   1072 	pxor	xmm8,xmm8
   1073 	pxor	xmm6,xmm6
   1074 	movaps	xmm6,XMMWORD[rsp]
   1075 	movaps	XMMWORD[rsp],xmm0
   1076 	movaps	xmm7,XMMWORD[16+rsp]
   1077 	movaps	XMMWORD[16+rsp],xmm0
   1078 	movaps	xmm8,XMMWORD[32+rsp]
   1079 	movaps	XMMWORD[32+rsp],xmm0
   1080 	movaps	xmm9,XMMWORD[48+rsp]
   1081 	movaps	XMMWORD[48+rsp],xmm0
   1082 	lea	rsp,[88+rsp]
   1083 $L$ccm64_dec_ret:
   1084 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   1085 	mov	rsi,QWORD[16+rsp]
   1086 	DB	0F3h,0C3h		;repret
   1087 $L$SEH_end_aesni_ccm64_decrypt_blocks:
   1088 global	aesni_ctr32_encrypt_blocks
   1089 
   1090 ALIGN	16
   1091 aesni_ctr32_encrypt_blocks:
   1092 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   1093 	mov	QWORD[16+rsp],rsi
   1094 	mov	rax,rsp
   1095 $L$SEH_begin_aesni_ctr32_encrypt_blocks:
   1096 	mov	rdi,rcx
   1097 	mov	rsi,rdx
   1098 	mov	rdx,r8
   1099 	mov	rcx,r9
   1100 	mov	r8,QWORD[40+rsp]
   1101 
   1102 
   1103 	cmp	rdx,1
   1104 	jne	NEAR $L$ctr32_bulk
   1105 
   1106 
   1107 
   1108 	movups	xmm2,XMMWORD[r8]
   1109 	movups	xmm3,XMMWORD[rdi]
   1110 	mov	edx,DWORD[240+rcx]
   1111 	movups	xmm0,XMMWORD[rcx]
   1112 	movups	xmm1,XMMWORD[16+rcx]
   1113 	lea	rcx,[32+rcx]
   1114 	xorps	xmm2,xmm0
   1115 $L$oop_enc1_7:
   1116 DB	102,15,56,220,209
   1117 	dec	edx
   1118 	movups	xmm1,XMMWORD[rcx]
   1119 	lea	rcx,[16+rcx]
   1120 	jnz	NEAR $L$oop_enc1_7
   1121 DB	102,15,56,221,209
   1122 	pxor	xmm0,xmm0
   1123 	pxor	xmm1,xmm1
   1124 	xorps	xmm2,xmm3
   1125 	pxor	xmm3,xmm3
   1126 	movups	XMMWORD[rsi],xmm2
   1127 	xorps	xmm2,xmm2
   1128 	jmp	NEAR $L$ctr32_epilogue
   1129 
   1130 ALIGN	16
   1131 $L$ctr32_bulk:
   1132 	lea	rax,[rsp]
   1133 	push	rbp
   1134 	sub	rsp,288
   1135 	and	rsp,-16
   1136 	movaps	XMMWORD[(-168)+rax],xmm6
   1137 	movaps	XMMWORD[(-152)+rax],xmm7
   1138 	movaps	XMMWORD[(-136)+rax],xmm8
   1139 	movaps	XMMWORD[(-120)+rax],xmm9
   1140 	movaps	XMMWORD[(-104)+rax],xmm10
   1141 	movaps	XMMWORD[(-88)+rax],xmm11
   1142 	movaps	XMMWORD[(-72)+rax],xmm12
   1143 	movaps	XMMWORD[(-56)+rax],xmm13
   1144 	movaps	XMMWORD[(-40)+rax],xmm14
   1145 	movaps	XMMWORD[(-24)+rax],xmm15
   1146 $L$ctr32_body:
   1147 	lea	rbp,[((-8))+rax]
   1148 
   1149 
   1150 
   1151 
   1152 	movdqu	xmm2,XMMWORD[r8]
   1153 	movdqu	xmm0,XMMWORD[rcx]
   1154 	mov	r8d,DWORD[12+r8]
   1155 	pxor	xmm2,xmm0
   1156 	mov	r11d,DWORD[12+rcx]
   1157 	movdqa	XMMWORD[rsp],xmm2
   1158 	bswap	r8d
   1159 	movdqa	xmm3,xmm2
   1160 	movdqa	xmm4,xmm2
   1161 	movdqa	xmm5,xmm2
   1162 	movdqa	XMMWORD[64+rsp],xmm2
   1163 	movdqa	XMMWORD[80+rsp],xmm2
   1164 	movdqa	XMMWORD[96+rsp],xmm2
   1165 	mov	r10,rdx
   1166 	movdqa	XMMWORD[112+rsp],xmm2
   1167 
   1168 	lea	rax,[1+r8]
   1169 	lea	rdx,[2+r8]
   1170 	bswap	eax
   1171 	bswap	edx
   1172 	xor	eax,r11d
   1173 	xor	edx,r11d
   1174 DB	102,15,58,34,216,3
   1175 	lea	rax,[3+r8]
   1176 	movdqa	XMMWORD[16+rsp],xmm3
   1177 DB	102,15,58,34,226,3
   1178 	bswap	eax
   1179 	mov	rdx,r10
   1180 	lea	r10,[4+r8]
   1181 	movdqa	XMMWORD[32+rsp],xmm4
   1182 	xor	eax,r11d
   1183 	bswap	r10d
   1184 DB	102,15,58,34,232,3
   1185 	xor	r10d,r11d
   1186 	movdqa	XMMWORD[48+rsp],xmm5
   1187 	lea	r9,[5+r8]
   1188 	mov	DWORD[((64+12))+rsp],r10d
   1189 	bswap	r9d
   1190 	lea	r10,[6+r8]
   1191 	mov	eax,DWORD[240+rcx]
   1192 	xor	r9d,r11d
   1193 	bswap	r10d
   1194 	mov	DWORD[((80+12))+rsp],r9d
   1195 	xor	r10d,r11d
   1196 	lea	r9,[7+r8]
   1197 	mov	DWORD[((96+12))+rsp],r10d
   1198 	bswap	r9d
   1199 	mov	r10d,DWORD[((OPENSSL_ia32cap_P+4))]
   1200 	xor	r9d,r11d
   1201 	and	r10d,71303168
   1202 	mov	DWORD[((112+12))+rsp],r9d
   1203 
   1204 	movups	xmm1,XMMWORD[16+rcx]
   1205 
   1206 	movdqa	xmm6,XMMWORD[64+rsp]
   1207 	movdqa	xmm7,XMMWORD[80+rsp]
   1208 
   1209 	cmp	rdx,8
   1210 	jb	NEAR $L$ctr32_tail
   1211 
   1212 	sub	rdx,6
   1213 	cmp	r10d,4194304
   1214 	je	NEAR $L$ctr32_6x
   1215 
   1216 	lea	rcx,[128+rcx]
   1217 	sub	rdx,2
   1218 	jmp	NEAR $L$ctr32_loop8
   1219 
   1220 ALIGN	16
   1221 $L$ctr32_6x:
   1222 	shl	eax,4
   1223 	mov	r10d,48
   1224 	bswap	r11d
   1225 	lea	rcx,[32+rax*1+rcx]
   1226 	sub	r10,rax
   1227 	jmp	NEAR $L$ctr32_loop6
   1228 
   1229 ALIGN	16
   1230 $L$ctr32_loop6:
   1231 	add	r8d,6
   1232 	movups	xmm0,XMMWORD[((-48))+r10*1+rcx]
   1233 DB	102,15,56,220,209
   1234 	mov	eax,r8d
   1235 	xor	eax,r11d
   1236 DB	102,15,56,220,217
   1237 DB	0x0f,0x38,0xf1,0x44,0x24,12
   1238 	lea	eax,[1+r8]
   1239 DB	102,15,56,220,225
   1240 	xor	eax,r11d
   1241 DB	0x0f,0x38,0xf1,0x44,0x24,28
   1242 DB	102,15,56,220,233
   1243 	lea	eax,[2+r8]
   1244 	xor	eax,r11d
   1245 DB	102,15,56,220,241
   1246 DB	0x0f,0x38,0xf1,0x44,0x24,44
   1247 	lea	eax,[3+r8]
   1248 DB	102,15,56,220,249
   1249 	movups	xmm1,XMMWORD[((-32))+r10*1+rcx]
   1250 	xor	eax,r11d
   1251 
   1252 DB	102,15,56,220,208
   1253 DB	0x0f,0x38,0xf1,0x44,0x24,60
   1254 	lea	eax,[4+r8]
   1255 DB	102,15,56,220,216
   1256 	xor	eax,r11d
   1257 DB	0x0f,0x38,0xf1,0x44,0x24,76
   1258 DB	102,15,56,220,224
   1259 	lea	eax,[5+r8]
   1260 	xor	eax,r11d
   1261 DB	102,15,56,220,232
   1262 DB	0x0f,0x38,0xf1,0x44,0x24,92
   1263 	mov	rax,r10
   1264 DB	102,15,56,220,240
   1265 DB	102,15,56,220,248
   1266 	movups	xmm0,XMMWORD[((-16))+r10*1+rcx]
   1267 
   1268 	call	$L$enc_loop6
   1269 
   1270 	movdqu	xmm8,XMMWORD[rdi]
   1271 	movdqu	xmm9,XMMWORD[16+rdi]
   1272 	movdqu	xmm10,XMMWORD[32+rdi]
   1273 	movdqu	xmm11,XMMWORD[48+rdi]
   1274 	movdqu	xmm12,XMMWORD[64+rdi]
   1275 	movdqu	xmm13,XMMWORD[80+rdi]
   1276 	lea	rdi,[96+rdi]
   1277 	movups	xmm1,XMMWORD[((-64))+r10*1+rcx]
   1278 	pxor	xmm8,xmm2
   1279 	movaps	xmm2,XMMWORD[rsp]
   1280 	pxor	xmm9,xmm3
   1281 	movaps	xmm3,XMMWORD[16+rsp]
   1282 	pxor	xmm10,xmm4
   1283 	movaps	xmm4,XMMWORD[32+rsp]
   1284 	pxor	xmm11,xmm5
   1285 	movaps	xmm5,XMMWORD[48+rsp]
   1286 	pxor	xmm12,xmm6
   1287 	movaps	xmm6,XMMWORD[64+rsp]
   1288 	pxor	xmm13,xmm7
   1289 	movaps	xmm7,XMMWORD[80+rsp]
   1290 	movdqu	XMMWORD[rsi],xmm8
   1291 	movdqu	XMMWORD[16+rsi],xmm9
   1292 	movdqu	XMMWORD[32+rsi],xmm10
   1293 	movdqu	XMMWORD[48+rsi],xmm11
   1294 	movdqu	XMMWORD[64+rsi],xmm12
   1295 	movdqu	XMMWORD[80+rsi],xmm13
   1296 	lea	rsi,[96+rsi]
   1297 
   1298 	sub	rdx,6
   1299 	jnc	NEAR $L$ctr32_loop6
   1300 
   1301 	add	rdx,6
   1302 	jz	NEAR $L$ctr32_done
   1303 
   1304 	lea	eax,[((-48))+r10]
   1305 	lea	rcx,[((-80))+r10*1+rcx]
   1306 	neg	eax
   1307 	shr	eax,4
   1308 	jmp	NEAR $L$ctr32_tail
   1309 
   1310 ALIGN	32
   1311 $L$ctr32_loop8:
   1312 	add	r8d,8
   1313 	movdqa	xmm8,XMMWORD[96+rsp]
   1314 DB	102,15,56,220,209
   1315 	mov	r9d,r8d
   1316 	movdqa	xmm9,XMMWORD[112+rsp]
   1317 DB	102,15,56,220,217
   1318 	bswap	r9d
   1319 	movups	xmm0,XMMWORD[((32-128))+rcx]
   1320 DB	102,15,56,220,225
   1321 	xor	r9d,r11d
   1322 	nop
   1323 DB	102,15,56,220,233
   1324 	mov	DWORD[((0+12))+rsp],r9d
   1325 	lea	r9,[1+r8]
   1326 DB	102,15,56,220,241
   1327 DB	102,15,56,220,249
   1328 DB	102,68,15,56,220,193
   1329 DB	102,68,15,56,220,201
   1330 	movups	xmm1,XMMWORD[((48-128))+rcx]
   1331 	bswap	r9d
   1332 DB	102,15,56,220,208
   1333 DB	102,15,56,220,216
   1334 	xor	r9d,r11d
   1335 DB	0x66,0x90
   1336 DB	102,15,56,220,224
   1337 DB	102,15,56,220,232
   1338 	mov	DWORD[((16+12))+rsp],r9d
   1339 	lea	r9,[2+r8]
   1340 DB	102,15,56,220,240
   1341 DB	102,15,56,220,248
   1342 DB	102,68,15,56,220,192
   1343 DB	102,68,15,56,220,200
   1344 	movups	xmm0,XMMWORD[((64-128))+rcx]
   1345 	bswap	r9d
   1346 DB	102,15,56,220,209
   1347 DB	102,15,56,220,217
   1348 	xor	r9d,r11d
   1349 DB	0x66,0x90
   1350 DB	102,15,56,220,225
   1351 DB	102,15,56,220,233
   1352 	mov	DWORD[((32+12))+rsp],r9d
   1353 	lea	r9,[3+r8]
   1354 DB	102,15,56,220,241
   1355 DB	102,15,56,220,249
   1356 DB	102,68,15,56,220,193
   1357 DB	102,68,15,56,220,201
   1358 	movups	xmm1,XMMWORD[((80-128))+rcx]
   1359 	bswap	r9d
   1360 DB	102,15,56,220,208
   1361 DB	102,15,56,220,216
   1362 	xor	r9d,r11d
   1363 DB	0x66,0x90
   1364 DB	102,15,56,220,224
   1365 DB	102,15,56,220,232
   1366 	mov	DWORD[((48+12))+rsp],r9d
   1367 	lea	r9,[4+r8]
   1368 DB	102,15,56,220,240
   1369 DB	102,15,56,220,248
   1370 DB	102,68,15,56,220,192
   1371 DB	102,68,15,56,220,200
   1372 	movups	xmm0,XMMWORD[((96-128))+rcx]
   1373 	bswap	r9d
   1374 DB	102,15,56,220,209
   1375 DB	102,15,56,220,217
   1376 	xor	r9d,r11d
   1377 DB	0x66,0x90
   1378 DB	102,15,56,220,225
   1379 DB	102,15,56,220,233
   1380 	mov	DWORD[((64+12))+rsp],r9d
   1381 	lea	r9,[5+r8]
   1382 DB	102,15,56,220,241
   1383 DB	102,15,56,220,249
   1384 DB	102,68,15,56,220,193
   1385 DB	102,68,15,56,220,201
   1386 	movups	xmm1,XMMWORD[((112-128))+rcx]
   1387 	bswap	r9d
   1388 DB	102,15,56,220,208
   1389 DB	102,15,56,220,216
   1390 	xor	r9d,r11d
   1391 DB	0x66,0x90
   1392 DB	102,15,56,220,224
   1393 DB	102,15,56,220,232
   1394 	mov	DWORD[((80+12))+rsp],r9d
   1395 	lea	r9,[6+r8]
   1396 DB	102,15,56,220,240
   1397 DB	102,15,56,220,248
   1398 DB	102,68,15,56,220,192
   1399 DB	102,68,15,56,220,200
   1400 	movups	xmm0,XMMWORD[((128-128))+rcx]
   1401 	bswap	r9d
   1402 DB	102,15,56,220,209
   1403 DB	102,15,56,220,217
   1404 	xor	r9d,r11d
   1405 DB	0x66,0x90
   1406 DB	102,15,56,220,225
   1407 DB	102,15,56,220,233
   1408 	mov	DWORD[((96+12))+rsp],r9d
   1409 	lea	r9,[7+r8]
   1410 DB	102,15,56,220,241
   1411 DB	102,15,56,220,249
   1412 DB	102,68,15,56,220,193
   1413 DB	102,68,15,56,220,201
   1414 	movups	xmm1,XMMWORD[((144-128))+rcx]
   1415 	bswap	r9d
   1416 DB	102,15,56,220,208
   1417 DB	102,15,56,220,216
   1418 DB	102,15,56,220,224
   1419 	xor	r9d,r11d
   1420 	movdqu	xmm10,XMMWORD[rdi]
   1421 DB	102,15,56,220,232
   1422 	mov	DWORD[((112+12))+rsp],r9d
   1423 	cmp	eax,11
   1424 DB	102,15,56,220,240
   1425 DB	102,15,56,220,248
   1426 DB	102,68,15,56,220,192
   1427 DB	102,68,15,56,220,200
   1428 	movups	xmm0,XMMWORD[((160-128))+rcx]
   1429 
   1430 	jb	NEAR $L$ctr32_enc_done
   1431 
   1432 DB	102,15,56,220,209
   1433 DB	102,15,56,220,217
   1434 DB	102,15,56,220,225
   1435 DB	102,15,56,220,233
   1436 DB	102,15,56,220,241
   1437 DB	102,15,56,220,249
   1438 DB	102,68,15,56,220,193
   1439 DB	102,68,15,56,220,201
   1440 	movups	xmm1,XMMWORD[((176-128))+rcx]
   1441 
   1442 DB	102,15,56,220,208
   1443 DB	102,15,56,220,216
   1444 DB	102,15,56,220,224
   1445 DB	102,15,56,220,232
   1446 DB	102,15,56,220,240
   1447 DB	102,15,56,220,248
   1448 DB	102,68,15,56,220,192
   1449 DB	102,68,15,56,220,200
   1450 	movups	xmm0,XMMWORD[((192-128))+rcx]
   1451 	je	NEAR $L$ctr32_enc_done
   1452 
   1453 DB	102,15,56,220,209
   1454 DB	102,15,56,220,217
   1455 DB	102,15,56,220,225
   1456 DB	102,15,56,220,233
   1457 DB	102,15,56,220,241
   1458 DB	102,15,56,220,249
   1459 DB	102,68,15,56,220,193
   1460 DB	102,68,15,56,220,201
   1461 	movups	xmm1,XMMWORD[((208-128))+rcx]
   1462 
   1463 DB	102,15,56,220,208
   1464 DB	102,15,56,220,216
   1465 DB	102,15,56,220,224
   1466 DB	102,15,56,220,232
   1467 DB	102,15,56,220,240
   1468 DB	102,15,56,220,248
   1469 DB	102,68,15,56,220,192
   1470 DB	102,68,15,56,220,200
   1471 	movups	xmm0,XMMWORD[((224-128))+rcx]
   1472 	jmp	NEAR $L$ctr32_enc_done
   1473 
   1474 ALIGN	16
   1475 $L$ctr32_enc_done:
   1476 	movdqu	xmm11,XMMWORD[16+rdi]
   1477 	pxor	xmm10,xmm0
   1478 	movdqu	xmm12,XMMWORD[32+rdi]
   1479 	pxor	xmm11,xmm0
   1480 	movdqu	xmm13,XMMWORD[48+rdi]
   1481 	pxor	xmm12,xmm0
   1482 	movdqu	xmm14,XMMWORD[64+rdi]
   1483 	pxor	xmm13,xmm0
   1484 	movdqu	xmm15,XMMWORD[80+rdi]
   1485 	pxor	xmm14,xmm0
   1486 	pxor	xmm15,xmm0
   1487 DB	102,15,56,220,209
   1488 DB	102,15,56,220,217
   1489 DB	102,15,56,220,225
   1490 DB	102,15,56,220,233
   1491 DB	102,15,56,220,241
   1492 DB	102,15,56,220,249
   1493 DB	102,68,15,56,220,193
   1494 DB	102,68,15,56,220,201
   1495 	movdqu	xmm1,XMMWORD[96+rdi]
   1496 	lea	rdi,[128+rdi]
   1497 
   1498 DB	102,65,15,56,221,210
   1499 	pxor	xmm1,xmm0
   1500 	movdqu	xmm10,XMMWORD[((112-128))+rdi]
   1501 DB	102,65,15,56,221,219
   1502 	pxor	xmm10,xmm0
   1503 	movdqa	xmm11,XMMWORD[rsp]
   1504 DB	102,65,15,56,221,228
   1505 DB	102,65,15,56,221,237
   1506 	movdqa	xmm12,XMMWORD[16+rsp]
   1507 	movdqa	xmm13,XMMWORD[32+rsp]
   1508 DB	102,65,15,56,221,246
   1509 DB	102,65,15,56,221,255
   1510 	movdqa	xmm14,XMMWORD[48+rsp]
   1511 	movdqa	xmm15,XMMWORD[64+rsp]
   1512 DB	102,68,15,56,221,193
   1513 	movdqa	xmm0,XMMWORD[80+rsp]
   1514 	movups	xmm1,XMMWORD[((16-128))+rcx]
   1515 DB	102,69,15,56,221,202
   1516 
   1517 	movups	XMMWORD[rsi],xmm2
   1518 	movdqa	xmm2,xmm11
   1519 	movups	XMMWORD[16+rsi],xmm3
   1520 	movdqa	xmm3,xmm12
   1521 	movups	XMMWORD[32+rsi],xmm4
   1522 	movdqa	xmm4,xmm13
   1523 	movups	XMMWORD[48+rsi],xmm5
   1524 	movdqa	xmm5,xmm14
   1525 	movups	XMMWORD[64+rsi],xmm6
   1526 	movdqa	xmm6,xmm15
   1527 	movups	XMMWORD[80+rsi],xmm7
   1528 	movdqa	xmm7,xmm0
   1529 	movups	XMMWORD[96+rsi],xmm8
   1530 	movups	XMMWORD[112+rsi],xmm9
   1531 	lea	rsi,[128+rsi]
   1532 
   1533 	sub	rdx,8
   1534 	jnc	NEAR $L$ctr32_loop8
   1535 
   1536 	add	rdx,8
   1537 	jz	NEAR $L$ctr32_done
   1538 	lea	rcx,[((-128))+rcx]
   1539 
   1540 $L$ctr32_tail:
   1541 
   1542 
   1543 	lea	rcx,[16+rcx]
   1544 	cmp	rdx,4
   1545 	jb	NEAR $L$ctr32_loop3
   1546 	je	NEAR $L$ctr32_loop4
   1547 
   1548 
   1549 	shl	eax,4
   1550 	movdqa	xmm8,XMMWORD[96+rsp]
   1551 	pxor	xmm9,xmm9
   1552 
   1553 	movups	xmm0,XMMWORD[16+rcx]
   1554 DB	102,15,56,220,209
   1555 DB	102,15,56,220,217
   1556 	lea	rcx,[((32-16))+rax*1+rcx]
   1557 	neg	rax
   1558 DB	102,15,56,220,225
   1559 	add	rax,16
   1560 	movups	xmm10,XMMWORD[rdi]
   1561 DB	102,15,56,220,233
   1562 DB	102,15,56,220,241
   1563 	movups	xmm11,XMMWORD[16+rdi]
   1564 	movups	xmm12,XMMWORD[32+rdi]
   1565 DB	102,15,56,220,249
   1566 DB	102,68,15,56,220,193
   1567 
   1568 	call	$L$enc_loop8_enter
   1569 
   1570 	movdqu	xmm13,XMMWORD[48+rdi]
   1571 	pxor	xmm2,xmm10
   1572 	movdqu	xmm10,XMMWORD[64+rdi]
   1573 	pxor	xmm3,xmm11
   1574 	movdqu	XMMWORD[rsi],xmm2
   1575 	pxor	xmm4,xmm12
   1576 	movdqu	XMMWORD[16+rsi],xmm3
   1577 	pxor	xmm5,xmm13
   1578 	movdqu	XMMWORD[32+rsi],xmm4
   1579 	pxor	xmm6,xmm10
   1580 	movdqu	XMMWORD[48+rsi],xmm5
   1581 	movdqu	XMMWORD[64+rsi],xmm6
   1582 	cmp	rdx,6
   1583 	jb	NEAR $L$ctr32_done
   1584 
   1585 	movups	xmm11,XMMWORD[80+rdi]
   1586 	xorps	xmm7,xmm11
   1587 	movups	XMMWORD[80+rsi],xmm7
   1588 	je	NEAR $L$ctr32_done
   1589 
   1590 	movups	xmm12,XMMWORD[96+rdi]
   1591 	xorps	xmm8,xmm12
   1592 	movups	XMMWORD[96+rsi],xmm8
   1593 	jmp	NEAR $L$ctr32_done
   1594 
   1595 ALIGN	32
   1596 $L$ctr32_loop4:
   1597 DB	102,15,56,220,209
   1598 	lea	rcx,[16+rcx]
   1599 	dec	eax
   1600 DB	102,15,56,220,217
   1601 DB	102,15,56,220,225
   1602 DB	102,15,56,220,233
   1603 	movups	xmm1,XMMWORD[rcx]
   1604 	jnz	NEAR $L$ctr32_loop4
   1605 DB	102,15,56,221,209
   1606 DB	102,15,56,221,217
   1607 	movups	xmm10,XMMWORD[rdi]
   1608 	movups	xmm11,XMMWORD[16+rdi]
   1609 DB	102,15,56,221,225
   1610 DB	102,15,56,221,233
   1611 	movups	xmm12,XMMWORD[32+rdi]
   1612 	movups	xmm13,XMMWORD[48+rdi]
   1613 
   1614 	xorps	xmm2,xmm10
   1615 	movups	XMMWORD[rsi],xmm2
   1616 	xorps	xmm3,xmm11
   1617 	movups	XMMWORD[16+rsi],xmm3
   1618 	pxor	xmm4,xmm12
   1619 	movdqu	XMMWORD[32+rsi],xmm4
   1620 	pxor	xmm5,xmm13
   1621 	movdqu	XMMWORD[48+rsi],xmm5
   1622 	jmp	NEAR $L$ctr32_done
   1623 
   1624 ALIGN	32
   1625 $L$ctr32_loop3:
   1626 DB	102,15,56,220,209
   1627 	lea	rcx,[16+rcx]
   1628 	dec	eax
   1629 DB	102,15,56,220,217
   1630 DB	102,15,56,220,225
   1631 	movups	xmm1,XMMWORD[rcx]
   1632 	jnz	NEAR $L$ctr32_loop3
   1633 DB	102,15,56,221,209
   1634 DB	102,15,56,221,217
   1635 DB	102,15,56,221,225
   1636 
   1637 	movups	xmm10,XMMWORD[rdi]
   1638 	xorps	xmm2,xmm10
   1639 	movups	XMMWORD[rsi],xmm2
   1640 	cmp	rdx,2
   1641 	jb	NEAR $L$ctr32_done
   1642 
   1643 	movups	xmm11,XMMWORD[16+rdi]
   1644 	xorps	xmm3,xmm11
   1645 	movups	XMMWORD[16+rsi],xmm3
   1646 	je	NEAR $L$ctr32_done
   1647 
   1648 	movups	xmm12,XMMWORD[32+rdi]
   1649 	xorps	xmm4,xmm12
   1650 	movups	XMMWORD[32+rsi],xmm4
   1651 
   1652 $L$ctr32_done:
   1653 	xorps	xmm0,xmm0
   1654 	xor	r11d,r11d
   1655 	pxor	xmm1,xmm1
   1656 	pxor	xmm2,xmm2
   1657 	pxor	xmm3,xmm3
   1658 	pxor	xmm4,xmm4
   1659 	pxor	xmm5,xmm5
   1660 	movaps	xmm6,XMMWORD[((-160))+rbp]
   1661 	movaps	XMMWORD[(-160)+rbp],xmm0
   1662 	movaps	xmm7,XMMWORD[((-144))+rbp]
   1663 	movaps	XMMWORD[(-144)+rbp],xmm0
   1664 	movaps	xmm8,XMMWORD[((-128))+rbp]
   1665 	movaps	XMMWORD[(-128)+rbp],xmm0
   1666 	movaps	xmm9,XMMWORD[((-112))+rbp]
   1667 	movaps	XMMWORD[(-112)+rbp],xmm0
   1668 	movaps	xmm10,XMMWORD[((-96))+rbp]
   1669 	movaps	XMMWORD[(-96)+rbp],xmm0
   1670 	movaps	xmm11,XMMWORD[((-80))+rbp]
   1671 	movaps	XMMWORD[(-80)+rbp],xmm0
   1672 	movaps	xmm12,XMMWORD[((-64))+rbp]
   1673 	movaps	XMMWORD[(-64)+rbp],xmm0
   1674 	movaps	xmm13,XMMWORD[((-48))+rbp]
   1675 	movaps	XMMWORD[(-48)+rbp],xmm0
   1676 	movaps	xmm14,XMMWORD[((-32))+rbp]
   1677 	movaps	XMMWORD[(-32)+rbp],xmm0
   1678 	movaps	xmm15,XMMWORD[((-16))+rbp]
   1679 	movaps	XMMWORD[(-16)+rbp],xmm0
   1680 	movaps	XMMWORD[rsp],xmm0
   1681 	movaps	XMMWORD[16+rsp],xmm0
   1682 	movaps	XMMWORD[32+rsp],xmm0
   1683 	movaps	XMMWORD[48+rsp],xmm0
   1684 	movaps	XMMWORD[64+rsp],xmm0
   1685 	movaps	XMMWORD[80+rsp],xmm0
   1686 	movaps	XMMWORD[96+rsp],xmm0
   1687 	movaps	XMMWORD[112+rsp],xmm0
   1688 	lea	rsp,[rbp]
   1689 	pop	rbp
   1690 $L$ctr32_epilogue:
   1691 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   1692 	mov	rsi,QWORD[16+rsp]
   1693 	DB	0F3h,0C3h		;repret
   1694 $L$SEH_end_aesni_ctr32_encrypt_blocks:
   1695 global	aesni_xts_encrypt
   1696 
   1697 ALIGN	16
   1698 aesni_xts_encrypt:
   1699 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   1700 	mov	QWORD[16+rsp],rsi
   1701 	mov	rax,rsp
   1702 $L$SEH_begin_aesni_xts_encrypt:
   1703 	mov	rdi,rcx
   1704 	mov	rsi,rdx
   1705 	mov	rdx,r8
   1706 	mov	rcx,r9
   1707 	mov	r8,QWORD[40+rsp]
   1708 	mov	r9,QWORD[48+rsp]
   1709 
   1710 
   1711 	lea	rax,[rsp]
   1712 	push	rbp
   1713 	sub	rsp,272
   1714 	and	rsp,-16
   1715 	movaps	XMMWORD[(-168)+rax],xmm6
   1716 	movaps	XMMWORD[(-152)+rax],xmm7
   1717 	movaps	XMMWORD[(-136)+rax],xmm8
   1718 	movaps	XMMWORD[(-120)+rax],xmm9
   1719 	movaps	XMMWORD[(-104)+rax],xmm10
   1720 	movaps	XMMWORD[(-88)+rax],xmm11
   1721 	movaps	XMMWORD[(-72)+rax],xmm12
   1722 	movaps	XMMWORD[(-56)+rax],xmm13
   1723 	movaps	XMMWORD[(-40)+rax],xmm14
   1724 	movaps	XMMWORD[(-24)+rax],xmm15
   1725 $L$xts_enc_body:
   1726 	lea	rbp,[((-8))+rax]
   1727 	movups	xmm2,XMMWORD[r9]
   1728 	mov	eax,DWORD[240+r8]
   1729 	mov	r10d,DWORD[240+rcx]
   1730 	movups	xmm0,XMMWORD[r8]
   1731 	movups	xmm1,XMMWORD[16+r8]
   1732 	lea	r8,[32+r8]
   1733 	xorps	xmm2,xmm0
   1734 $L$oop_enc1_8:
   1735 DB	102,15,56,220,209
   1736 	dec	eax
   1737 	movups	xmm1,XMMWORD[r8]
   1738 	lea	r8,[16+r8]
   1739 	jnz	NEAR $L$oop_enc1_8
   1740 DB	102,15,56,221,209
   1741 	movups	xmm0,XMMWORD[rcx]
   1742 	mov	r11,rcx
   1743 	mov	eax,r10d
   1744 	shl	r10d,4
   1745 	mov	r9,rdx
   1746 	and	rdx,-16
   1747 
   1748 	movups	xmm1,XMMWORD[16+r10*1+rcx]
   1749 
   1750 	movdqa	xmm8,XMMWORD[$L$xts_magic]
   1751 	movdqa	xmm15,xmm2
   1752 	pshufd	xmm9,xmm2,0x5f
   1753 	pxor	xmm1,xmm0
   1754 	movdqa	xmm14,xmm9
   1755 	paddd	xmm9,xmm9
   1756 	movdqa	xmm10,xmm15
   1757 	psrad	xmm14,31
   1758 	paddq	xmm15,xmm15
   1759 	pand	xmm14,xmm8
   1760 	pxor	xmm10,xmm0
   1761 	pxor	xmm15,xmm14
   1762 	movdqa	xmm14,xmm9
   1763 	paddd	xmm9,xmm9
   1764 	movdqa	xmm11,xmm15
   1765 	psrad	xmm14,31
   1766 	paddq	xmm15,xmm15
   1767 	pand	xmm14,xmm8
   1768 	pxor	xmm11,xmm0
   1769 	pxor	xmm15,xmm14
   1770 	movdqa	xmm14,xmm9
   1771 	paddd	xmm9,xmm9
   1772 	movdqa	xmm12,xmm15
   1773 	psrad	xmm14,31
   1774 	paddq	xmm15,xmm15
   1775 	pand	xmm14,xmm8
   1776 	pxor	xmm12,xmm0
   1777 	pxor	xmm15,xmm14
   1778 	movdqa	xmm14,xmm9
   1779 	paddd	xmm9,xmm9
   1780 	movdqa	xmm13,xmm15
   1781 	psrad	xmm14,31
   1782 	paddq	xmm15,xmm15
   1783 	pand	xmm14,xmm8
   1784 	pxor	xmm13,xmm0
   1785 	pxor	xmm15,xmm14
   1786 	movdqa	xmm14,xmm15
   1787 	psrad	xmm9,31
   1788 	paddq	xmm15,xmm15
   1789 	pand	xmm9,xmm8
   1790 	pxor	xmm14,xmm0
   1791 	pxor	xmm15,xmm9
   1792 	movaps	XMMWORD[96+rsp],xmm1
   1793 
   1794 	sub	rdx,16*6
   1795 	jc	NEAR $L$xts_enc_short
   1796 
   1797 	mov	eax,16+96
   1798 	lea	rcx,[32+r10*1+r11]
   1799 	sub	rax,r10
   1800 	movups	xmm1,XMMWORD[16+r11]
   1801 	mov	r10,rax
   1802 	lea	r8,[$L$xts_magic]
   1803 	jmp	NEAR $L$xts_enc_grandloop
   1804 
   1805 ALIGN	32
   1806 $L$xts_enc_grandloop:
   1807 	movdqu	xmm2,XMMWORD[rdi]
   1808 	movdqa	xmm8,xmm0
   1809 	movdqu	xmm3,XMMWORD[16+rdi]
   1810 	pxor	xmm2,xmm10
   1811 	movdqu	xmm4,XMMWORD[32+rdi]
   1812 	pxor	xmm3,xmm11
   1813 DB	102,15,56,220,209
   1814 	movdqu	xmm5,XMMWORD[48+rdi]
   1815 	pxor	xmm4,xmm12
   1816 DB	102,15,56,220,217
   1817 	movdqu	xmm6,XMMWORD[64+rdi]
   1818 	pxor	xmm5,xmm13
   1819 DB	102,15,56,220,225
   1820 	movdqu	xmm7,XMMWORD[80+rdi]
   1821 	pxor	xmm8,xmm15
   1822 	movdqa	xmm9,XMMWORD[96+rsp]
   1823 	pxor	xmm6,xmm14
   1824 DB	102,15,56,220,233
   1825 	movups	xmm0,XMMWORD[32+r11]
   1826 	lea	rdi,[96+rdi]
   1827 	pxor	xmm7,xmm8
   1828 
   1829 	pxor	xmm10,xmm9
   1830 DB	102,15,56,220,241
   1831 	pxor	xmm11,xmm9
   1832 	movdqa	XMMWORD[rsp],xmm10
   1833 DB	102,15,56,220,249
   1834 	movups	xmm1,XMMWORD[48+r11]
   1835 	pxor	xmm12,xmm9
   1836 
   1837 DB	102,15,56,220,208
   1838 	pxor	xmm13,xmm9
   1839 	movdqa	XMMWORD[16+rsp],xmm11
   1840 DB	102,15,56,220,216
   1841 	pxor	xmm14,xmm9
   1842 	movdqa	XMMWORD[32+rsp],xmm12
   1843 DB	102,15,56,220,224
   1844 DB	102,15,56,220,232
   1845 	pxor	xmm8,xmm9
   1846 	movdqa	XMMWORD[64+rsp],xmm14
   1847 DB	102,15,56,220,240
   1848 DB	102,15,56,220,248
   1849 	movups	xmm0,XMMWORD[64+r11]
   1850 	movdqa	XMMWORD[80+rsp],xmm8
   1851 	pshufd	xmm9,xmm15,0x5f
   1852 	jmp	NEAR $L$xts_enc_loop6
   1853 ALIGN	32
   1854 $L$xts_enc_loop6:
   1855 DB	102,15,56,220,209
   1856 DB	102,15,56,220,217
   1857 DB	102,15,56,220,225
   1858 DB	102,15,56,220,233
   1859 DB	102,15,56,220,241
   1860 DB	102,15,56,220,249
   1861 	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
   1862 	add	rax,32
   1863 
   1864 DB	102,15,56,220,208
   1865 DB	102,15,56,220,216
   1866 DB	102,15,56,220,224
   1867 DB	102,15,56,220,232
   1868 DB	102,15,56,220,240
   1869 DB	102,15,56,220,248
   1870 	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
   1871 	jnz	NEAR $L$xts_enc_loop6
   1872 
   1873 	movdqa	xmm8,XMMWORD[r8]
   1874 	movdqa	xmm14,xmm9
   1875 	paddd	xmm9,xmm9
   1876 DB	102,15,56,220,209
   1877 	paddq	xmm15,xmm15
   1878 	psrad	xmm14,31
   1879 DB	102,15,56,220,217
   1880 	pand	xmm14,xmm8
   1881 	movups	xmm10,XMMWORD[r11]
   1882 DB	102,15,56,220,225
   1883 DB	102,15,56,220,233
   1884 DB	102,15,56,220,241
   1885 	pxor	xmm15,xmm14
   1886 	movaps	xmm11,xmm10
   1887 DB	102,15,56,220,249
   1888 	movups	xmm1,XMMWORD[((-64))+rcx]
   1889 
   1890 	movdqa	xmm14,xmm9
   1891 DB	102,15,56,220,208
   1892 	paddd	xmm9,xmm9
   1893 	pxor	xmm10,xmm15
   1894 DB	102,15,56,220,216
   1895 	psrad	xmm14,31
   1896 	paddq	xmm15,xmm15
   1897 DB	102,15,56,220,224
   1898 DB	102,15,56,220,232
   1899 	pand	xmm14,xmm8
   1900 	movaps	xmm12,xmm11
   1901 DB	102,15,56,220,240
   1902 	pxor	xmm15,xmm14
   1903 	movdqa	xmm14,xmm9
   1904 DB	102,15,56,220,248
   1905 	movups	xmm0,XMMWORD[((-48))+rcx]
   1906 
   1907 	paddd	xmm9,xmm9
   1908 DB	102,15,56,220,209
   1909 	pxor	xmm11,xmm15
   1910 	psrad	xmm14,31
   1911 DB	102,15,56,220,217
   1912 	paddq	xmm15,xmm15
   1913 	pand	xmm14,xmm8
   1914 DB	102,15,56,220,225
   1915 DB	102,15,56,220,233
   1916 	movdqa	XMMWORD[48+rsp],xmm13
   1917 	pxor	xmm15,xmm14
   1918 DB	102,15,56,220,241
   1919 	movaps	xmm13,xmm12
   1920 	movdqa	xmm14,xmm9
   1921 DB	102,15,56,220,249
   1922 	movups	xmm1,XMMWORD[((-32))+rcx]
   1923 
   1924 	paddd	xmm9,xmm9
   1925 DB	102,15,56,220,208
   1926 	pxor	xmm12,xmm15
   1927 	psrad	xmm14,31
   1928 DB	102,15,56,220,216
   1929 	paddq	xmm15,xmm15
   1930 	pand	xmm14,xmm8
   1931 DB	102,15,56,220,224
   1932 DB	102,15,56,220,232
   1933 DB	102,15,56,220,240
   1934 	pxor	xmm15,xmm14
   1935 	movaps	xmm14,xmm13
   1936 DB	102,15,56,220,248
   1937 
   1938 	movdqa	xmm0,xmm9
   1939 	paddd	xmm9,xmm9
   1940 DB	102,15,56,220,209
   1941 	pxor	xmm13,xmm15
   1942 	psrad	xmm0,31
   1943 DB	102,15,56,220,217
   1944 	paddq	xmm15,xmm15
   1945 	pand	xmm0,xmm8
   1946 DB	102,15,56,220,225
   1947 DB	102,15,56,220,233
   1948 	pxor	xmm15,xmm0
   1949 	movups	xmm0,XMMWORD[r11]
   1950 DB	102,15,56,220,241
   1951 DB	102,15,56,220,249
   1952 	movups	xmm1,XMMWORD[16+r11]
   1953 
   1954 	pxor	xmm14,xmm15
   1955 DB	102,15,56,221,84,36,0
   1956 	psrad	xmm9,31
   1957 	paddq	xmm15,xmm15
   1958 DB	102,15,56,221,92,36,16
   1959 DB	102,15,56,221,100,36,32
   1960 	pand	xmm9,xmm8
   1961 	mov	rax,r10
   1962 DB	102,15,56,221,108,36,48
   1963 DB	102,15,56,221,116,36,64
   1964 DB	102,15,56,221,124,36,80
   1965 	pxor	xmm15,xmm9
   1966 
   1967 	lea	rsi,[96+rsi]
   1968 	movups	XMMWORD[(-96)+rsi],xmm2
   1969 	movups	XMMWORD[(-80)+rsi],xmm3
   1970 	movups	XMMWORD[(-64)+rsi],xmm4
   1971 	movups	XMMWORD[(-48)+rsi],xmm5
   1972 	movups	XMMWORD[(-32)+rsi],xmm6
   1973 	movups	XMMWORD[(-16)+rsi],xmm7
   1974 	sub	rdx,16*6
   1975 	jnc	NEAR $L$xts_enc_grandloop
   1976 
   1977 	mov	eax,16+96
   1978 	sub	eax,r10d
   1979 	mov	rcx,r11
   1980 	shr	eax,4
   1981 
   1982 $L$xts_enc_short:
   1983 
   1984 	mov	r10d,eax
   1985 	pxor	xmm10,xmm0
   1986 	add	rdx,16*6
   1987 	jz	NEAR $L$xts_enc_done
   1988 
   1989 	pxor	xmm11,xmm0
   1990 	cmp	rdx,0x20
   1991 	jb	NEAR $L$xts_enc_one
   1992 	pxor	xmm12,xmm0
   1993 	je	NEAR $L$xts_enc_two
   1994 
   1995 	pxor	xmm13,xmm0
   1996 	cmp	rdx,0x40
   1997 	jb	NEAR $L$xts_enc_three
   1998 	pxor	xmm14,xmm0
   1999 	je	NEAR $L$xts_enc_four
   2000 
   2001 	movdqu	xmm2,XMMWORD[rdi]
   2002 	movdqu	xmm3,XMMWORD[16+rdi]
   2003 	movdqu	xmm4,XMMWORD[32+rdi]
   2004 	pxor	xmm2,xmm10
   2005 	movdqu	xmm5,XMMWORD[48+rdi]
   2006 	pxor	xmm3,xmm11
   2007 	movdqu	xmm6,XMMWORD[64+rdi]
   2008 	lea	rdi,[80+rdi]
   2009 	pxor	xmm4,xmm12
   2010 	pxor	xmm5,xmm13
   2011 	pxor	xmm6,xmm14
   2012 	pxor	xmm7,xmm7
   2013 
   2014 	call	_aesni_encrypt6
   2015 
   2016 	xorps	xmm2,xmm10
   2017 	movdqa	xmm10,xmm15
   2018 	xorps	xmm3,xmm11
   2019 	xorps	xmm4,xmm12
   2020 	movdqu	XMMWORD[rsi],xmm2
   2021 	xorps	xmm5,xmm13
   2022 	movdqu	XMMWORD[16+rsi],xmm3
   2023 	xorps	xmm6,xmm14
   2024 	movdqu	XMMWORD[32+rsi],xmm4
   2025 	movdqu	XMMWORD[48+rsi],xmm5
   2026 	movdqu	XMMWORD[64+rsi],xmm6
   2027 	lea	rsi,[80+rsi]
   2028 	jmp	NEAR $L$xts_enc_done
   2029 
   2030 ALIGN	16
   2031 $L$xts_enc_one:
   2032 	movups	xmm2,XMMWORD[rdi]
   2033 	lea	rdi,[16+rdi]
   2034 	xorps	xmm2,xmm10
   2035 	movups	xmm0,XMMWORD[rcx]
   2036 	movups	xmm1,XMMWORD[16+rcx]
   2037 	lea	rcx,[32+rcx]
   2038 	xorps	xmm2,xmm0
   2039 $L$oop_enc1_9:
   2040 DB	102,15,56,220,209
   2041 	dec	eax
   2042 	movups	xmm1,XMMWORD[rcx]
   2043 	lea	rcx,[16+rcx]
   2044 	jnz	NEAR $L$oop_enc1_9
   2045 DB	102,15,56,221,209
   2046 	xorps	xmm2,xmm10
   2047 	movdqa	xmm10,xmm11
   2048 	movups	XMMWORD[rsi],xmm2
   2049 	lea	rsi,[16+rsi]
   2050 	jmp	NEAR $L$xts_enc_done
   2051 
   2052 ALIGN	16
   2053 $L$xts_enc_two:
   2054 	movups	xmm2,XMMWORD[rdi]
   2055 	movups	xmm3,XMMWORD[16+rdi]
   2056 	lea	rdi,[32+rdi]
   2057 	xorps	xmm2,xmm10
   2058 	xorps	xmm3,xmm11
   2059 
   2060 	call	_aesni_encrypt2
   2061 
   2062 	xorps	xmm2,xmm10
   2063 	movdqa	xmm10,xmm12
   2064 	xorps	xmm3,xmm11
   2065 	movups	XMMWORD[rsi],xmm2
   2066 	movups	XMMWORD[16+rsi],xmm3
   2067 	lea	rsi,[32+rsi]
   2068 	jmp	NEAR $L$xts_enc_done
   2069 
   2070 ALIGN	16
   2071 $L$xts_enc_three:
   2072 	movups	xmm2,XMMWORD[rdi]
   2073 	movups	xmm3,XMMWORD[16+rdi]
   2074 	movups	xmm4,XMMWORD[32+rdi]
   2075 	lea	rdi,[48+rdi]
   2076 	xorps	xmm2,xmm10
   2077 	xorps	xmm3,xmm11
   2078 	xorps	xmm4,xmm12
   2079 
   2080 	call	_aesni_encrypt3
   2081 
   2082 	xorps	xmm2,xmm10
   2083 	movdqa	xmm10,xmm13
   2084 	xorps	xmm3,xmm11
   2085 	xorps	xmm4,xmm12
   2086 	movups	XMMWORD[rsi],xmm2
   2087 	movups	XMMWORD[16+rsi],xmm3
   2088 	movups	XMMWORD[32+rsi],xmm4
   2089 	lea	rsi,[48+rsi]
   2090 	jmp	NEAR $L$xts_enc_done
   2091 
   2092 ALIGN	16
   2093 $L$xts_enc_four:
   2094 	movups	xmm2,XMMWORD[rdi]
   2095 	movups	xmm3,XMMWORD[16+rdi]
   2096 	movups	xmm4,XMMWORD[32+rdi]
   2097 	xorps	xmm2,xmm10
   2098 	movups	xmm5,XMMWORD[48+rdi]
   2099 	lea	rdi,[64+rdi]
   2100 	xorps	xmm3,xmm11
   2101 	xorps	xmm4,xmm12
   2102 	xorps	xmm5,xmm13
   2103 
   2104 	call	_aesni_encrypt4
   2105 
   2106 	pxor	xmm2,xmm10
   2107 	movdqa	xmm10,xmm14
   2108 	pxor	xmm3,xmm11
   2109 	pxor	xmm4,xmm12
   2110 	movdqu	XMMWORD[rsi],xmm2
   2111 	pxor	xmm5,xmm13
   2112 	movdqu	XMMWORD[16+rsi],xmm3
   2113 	movdqu	XMMWORD[32+rsi],xmm4
   2114 	movdqu	XMMWORD[48+rsi],xmm5
   2115 	lea	rsi,[64+rsi]
   2116 	jmp	NEAR $L$xts_enc_done
   2117 
   2118 ALIGN	16
   2119 $L$xts_enc_done:
   2120 	and	r9,15
   2121 	jz	NEAR $L$xts_enc_ret
   2122 	mov	rdx,r9
   2123 
   2124 $L$xts_enc_steal:
   2125 	movzx	eax,BYTE[rdi]
   2126 	movzx	ecx,BYTE[((-16))+rsi]
   2127 	lea	rdi,[1+rdi]
   2128 	mov	BYTE[((-16))+rsi],al
   2129 	mov	BYTE[rsi],cl
   2130 	lea	rsi,[1+rsi]
   2131 	sub	rdx,1
   2132 	jnz	NEAR $L$xts_enc_steal
   2133 
   2134 	sub	rsi,r9
   2135 	mov	rcx,r11
   2136 	mov	eax,r10d
   2137 
   2138 	movups	xmm2,XMMWORD[((-16))+rsi]
   2139 	xorps	xmm2,xmm10
   2140 	movups	xmm0,XMMWORD[rcx]
   2141 	movups	xmm1,XMMWORD[16+rcx]
   2142 	lea	rcx,[32+rcx]
   2143 	xorps	xmm2,xmm0
   2144 $L$oop_enc1_10:
   2145 DB	102,15,56,220,209
   2146 	dec	eax
   2147 	movups	xmm1,XMMWORD[rcx]
   2148 	lea	rcx,[16+rcx]
   2149 	jnz	NEAR $L$oop_enc1_10
   2150 DB	102,15,56,221,209
   2151 	xorps	xmm2,xmm10
   2152 	movups	XMMWORD[(-16)+rsi],xmm2
   2153 
   2154 $L$xts_enc_ret:
   2155 	xorps	xmm0,xmm0
   2156 	pxor	xmm1,xmm1
   2157 	pxor	xmm2,xmm2
   2158 	pxor	xmm3,xmm3
   2159 	pxor	xmm4,xmm4
   2160 	pxor	xmm5,xmm5
   2161 	movaps	xmm6,XMMWORD[((-160))+rbp]
   2162 	movaps	XMMWORD[(-160)+rbp],xmm0
   2163 	movaps	xmm7,XMMWORD[((-144))+rbp]
   2164 	movaps	XMMWORD[(-144)+rbp],xmm0
   2165 	movaps	xmm8,XMMWORD[((-128))+rbp]
   2166 	movaps	XMMWORD[(-128)+rbp],xmm0
   2167 	movaps	xmm9,XMMWORD[((-112))+rbp]
   2168 	movaps	XMMWORD[(-112)+rbp],xmm0
   2169 	movaps	xmm10,XMMWORD[((-96))+rbp]
   2170 	movaps	XMMWORD[(-96)+rbp],xmm0
   2171 	movaps	xmm11,XMMWORD[((-80))+rbp]
   2172 	movaps	XMMWORD[(-80)+rbp],xmm0
   2173 	movaps	xmm12,XMMWORD[((-64))+rbp]
   2174 	movaps	XMMWORD[(-64)+rbp],xmm0
   2175 	movaps	xmm13,XMMWORD[((-48))+rbp]
   2176 	movaps	XMMWORD[(-48)+rbp],xmm0
   2177 	movaps	xmm14,XMMWORD[((-32))+rbp]
   2178 	movaps	XMMWORD[(-32)+rbp],xmm0
   2179 	movaps	xmm15,XMMWORD[((-16))+rbp]
   2180 	movaps	XMMWORD[(-16)+rbp],xmm0
   2181 	movaps	XMMWORD[rsp],xmm0
   2182 	movaps	XMMWORD[16+rsp],xmm0
   2183 	movaps	XMMWORD[32+rsp],xmm0
   2184 	movaps	XMMWORD[48+rsp],xmm0
   2185 	movaps	XMMWORD[64+rsp],xmm0
   2186 	movaps	XMMWORD[80+rsp],xmm0
   2187 	movaps	XMMWORD[96+rsp],xmm0
   2188 	lea	rsp,[rbp]
   2189 	pop	rbp
   2190 $L$xts_enc_epilogue:
   2191 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   2192 	mov	rsi,QWORD[16+rsp]
   2193 	DB	0F3h,0C3h		;repret
   2194 $L$SEH_end_aesni_xts_encrypt:
   2195 global	aesni_xts_decrypt
   2196 
   2197 ALIGN	16
   2198 aesni_xts_decrypt:
   2199 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   2200 	mov	QWORD[16+rsp],rsi
   2201 	mov	rax,rsp
   2202 $L$SEH_begin_aesni_xts_decrypt:
   2203 	mov	rdi,rcx
   2204 	mov	rsi,rdx
   2205 	mov	rdx,r8
   2206 	mov	rcx,r9
   2207 	mov	r8,QWORD[40+rsp]
   2208 	mov	r9,QWORD[48+rsp]
   2209 
   2210 
   2211 	lea	rax,[rsp]
   2212 	push	rbp
   2213 	sub	rsp,272
   2214 	and	rsp,-16
   2215 	movaps	XMMWORD[(-168)+rax],xmm6
   2216 	movaps	XMMWORD[(-152)+rax],xmm7
   2217 	movaps	XMMWORD[(-136)+rax],xmm8
   2218 	movaps	XMMWORD[(-120)+rax],xmm9
   2219 	movaps	XMMWORD[(-104)+rax],xmm10
   2220 	movaps	XMMWORD[(-88)+rax],xmm11
   2221 	movaps	XMMWORD[(-72)+rax],xmm12
   2222 	movaps	XMMWORD[(-56)+rax],xmm13
   2223 	movaps	XMMWORD[(-40)+rax],xmm14
   2224 	movaps	XMMWORD[(-24)+rax],xmm15
   2225 $L$xts_dec_body:
   2226 	lea	rbp,[((-8))+rax]
   2227 	movups	xmm2,XMMWORD[r9]
   2228 	mov	eax,DWORD[240+r8]
   2229 	mov	r10d,DWORD[240+rcx]
   2230 	movups	xmm0,XMMWORD[r8]
   2231 	movups	xmm1,XMMWORD[16+r8]
   2232 	lea	r8,[32+r8]
   2233 	xorps	xmm2,xmm0
   2234 $L$oop_enc1_11:
   2235 DB	102,15,56,220,209
   2236 	dec	eax
   2237 	movups	xmm1,XMMWORD[r8]
   2238 	lea	r8,[16+r8]
   2239 	jnz	NEAR $L$oop_enc1_11
   2240 DB	102,15,56,221,209
   2241 	xor	eax,eax
   2242 	test	rdx,15
   2243 	setnz	al
   2244 	shl	rax,4
   2245 	sub	rdx,rax
   2246 
   2247 	movups	xmm0,XMMWORD[rcx]
   2248 	mov	r11,rcx
   2249 	mov	eax,r10d
   2250 	shl	r10d,4
   2251 	mov	r9,rdx
   2252 	and	rdx,-16
   2253 
   2254 	movups	xmm1,XMMWORD[16+r10*1+rcx]
   2255 
   2256 	movdqa	xmm8,XMMWORD[$L$xts_magic]
   2257 	movdqa	xmm15,xmm2
   2258 	pshufd	xmm9,xmm2,0x5f
   2259 	pxor	xmm1,xmm0
   2260 	movdqa	xmm14,xmm9
   2261 	paddd	xmm9,xmm9
   2262 	movdqa	xmm10,xmm15
   2263 	psrad	xmm14,31
   2264 	paddq	xmm15,xmm15
   2265 	pand	xmm14,xmm8
   2266 	pxor	xmm10,xmm0
   2267 	pxor	xmm15,xmm14
   2268 	movdqa	xmm14,xmm9
   2269 	paddd	xmm9,xmm9
   2270 	movdqa	xmm11,xmm15
   2271 	psrad	xmm14,31
   2272 	paddq	xmm15,xmm15
   2273 	pand	xmm14,xmm8
   2274 	pxor	xmm11,xmm0
   2275 	pxor	xmm15,xmm14
   2276 	movdqa	xmm14,xmm9
   2277 	paddd	xmm9,xmm9
   2278 	movdqa	xmm12,xmm15
   2279 	psrad	xmm14,31
   2280 	paddq	xmm15,xmm15
   2281 	pand	xmm14,xmm8
   2282 	pxor	xmm12,xmm0
   2283 	pxor	xmm15,xmm14
   2284 	movdqa	xmm14,xmm9
   2285 	paddd	xmm9,xmm9
   2286 	movdqa	xmm13,xmm15
   2287 	psrad	xmm14,31
   2288 	paddq	xmm15,xmm15
   2289 	pand	xmm14,xmm8
   2290 	pxor	xmm13,xmm0
   2291 	pxor	xmm15,xmm14
   2292 	movdqa	xmm14,xmm15
   2293 	psrad	xmm9,31
   2294 	paddq	xmm15,xmm15
   2295 	pand	xmm9,xmm8
   2296 	pxor	xmm14,xmm0
   2297 	pxor	xmm15,xmm9
   2298 	movaps	XMMWORD[96+rsp],xmm1
   2299 
   2300 	sub	rdx,16*6
   2301 	jc	NEAR $L$xts_dec_short
   2302 
   2303 	mov	eax,16+96
   2304 	lea	rcx,[32+r10*1+r11]
   2305 	sub	rax,r10
   2306 	movups	xmm1,XMMWORD[16+r11]
   2307 	mov	r10,rax
   2308 	lea	r8,[$L$xts_magic]
   2309 	jmp	NEAR $L$xts_dec_grandloop
   2310 
   2311 ALIGN	32
   2312 $L$xts_dec_grandloop:
   2313 	movdqu	xmm2,XMMWORD[rdi]
   2314 	movdqa	xmm8,xmm0
   2315 	movdqu	xmm3,XMMWORD[16+rdi]
   2316 	pxor	xmm2,xmm10
   2317 	movdqu	xmm4,XMMWORD[32+rdi]
   2318 	pxor	xmm3,xmm11
   2319 DB	102,15,56,222,209
   2320 	movdqu	xmm5,XMMWORD[48+rdi]
   2321 	pxor	xmm4,xmm12
   2322 DB	102,15,56,222,217
   2323 	movdqu	xmm6,XMMWORD[64+rdi]
   2324 	pxor	xmm5,xmm13
   2325 DB	102,15,56,222,225
   2326 	movdqu	xmm7,XMMWORD[80+rdi]
   2327 	pxor	xmm8,xmm15
   2328 	movdqa	xmm9,XMMWORD[96+rsp]
   2329 	pxor	xmm6,xmm14
   2330 DB	102,15,56,222,233
   2331 	movups	xmm0,XMMWORD[32+r11]
   2332 	lea	rdi,[96+rdi]
   2333 	pxor	xmm7,xmm8
   2334 
   2335 	pxor	xmm10,xmm9
   2336 DB	102,15,56,222,241
   2337 	pxor	xmm11,xmm9
   2338 	movdqa	XMMWORD[rsp],xmm10
   2339 DB	102,15,56,222,249
   2340 	movups	xmm1,XMMWORD[48+r11]
   2341 	pxor	xmm12,xmm9
   2342 
   2343 DB	102,15,56,222,208
   2344 	pxor	xmm13,xmm9
   2345 	movdqa	XMMWORD[16+rsp],xmm11
   2346 DB	102,15,56,222,216
   2347 	pxor	xmm14,xmm9
   2348 	movdqa	XMMWORD[32+rsp],xmm12
   2349 DB	102,15,56,222,224
   2350 DB	102,15,56,222,232
   2351 	pxor	xmm8,xmm9
   2352 	movdqa	XMMWORD[64+rsp],xmm14
   2353 DB	102,15,56,222,240
   2354 DB	102,15,56,222,248
   2355 	movups	xmm0,XMMWORD[64+r11]
   2356 	movdqa	XMMWORD[80+rsp],xmm8
   2357 	pshufd	xmm9,xmm15,0x5f
   2358 	jmp	NEAR $L$xts_dec_loop6
   2359 ALIGN	32
   2360 $L$xts_dec_loop6:
   2361 DB	102,15,56,222,209
   2362 DB	102,15,56,222,217
   2363 DB	102,15,56,222,225
   2364 DB	102,15,56,222,233
   2365 DB	102,15,56,222,241
   2366 DB	102,15,56,222,249
   2367 	movups	xmm1,XMMWORD[((-64))+rax*1+rcx]
   2368 	add	rax,32
   2369 
   2370 DB	102,15,56,222,208
   2371 DB	102,15,56,222,216
   2372 DB	102,15,56,222,224
   2373 DB	102,15,56,222,232
   2374 DB	102,15,56,222,240
   2375 DB	102,15,56,222,248
   2376 	movups	xmm0,XMMWORD[((-80))+rax*1+rcx]
   2377 	jnz	NEAR $L$xts_dec_loop6
   2378 
   2379 	movdqa	xmm8,XMMWORD[r8]
   2380 	movdqa	xmm14,xmm9
   2381 	paddd	xmm9,xmm9
   2382 DB	102,15,56,222,209
   2383 	paddq	xmm15,xmm15
   2384 	psrad	xmm14,31
   2385 DB	102,15,56,222,217
   2386 	pand	xmm14,xmm8
   2387 	movups	xmm10,XMMWORD[r11]
   2388 DB	102,15,56,222,225
   2389 DB	102,15,56,222,233
   2390 DB	102,15,56,222,241
   2391 	pxor	xmm15,xmm14
   2392 	movaps	xmm11,xmm10
   2393 DB	102,15,56,222,249
   2394 	movups	xmm1,XMMWORD[((-64))+rcx]
   2395 
   2396 	movdqa	xmm14,xmm9
   2397 DB	102,15,56,222,208
   2398 	paddd	xmm9,xmm9
   2399 	pxor	xmm10,xmm15
   2400 DB	102,15,56,222,216
   2401 	psrad	xmm14,31
   2402 	paddq	xmm15,xmm15
   2403 DB	102,15,56,222,224
   2404 DB	102,15,56,222,232
   2405 	pand	xmm14,xmm8
   2406 	movaps	xmm12,xmm11
   2407 DB	102,15,56,222,240
   2408 	pxor	xmm15,xmm14
   2409 	movdqa	xmm14,xmm9
   2410 DB	102,15,56,222,248
   2411 	movups	xmm0,XMMWORD[((-48))+rcx]
   2412 
   2413 	paddd	xmm9,xmm9
   2414 DB	102,15,56,222,209
   2415 	pxor	xmm11,xmm15
   2416 	psrad	xmm14,31
   2417 DB	102,15,56,222,217
   2418 	paddq	xmm15,xmm15
   2419 	pand	xmm14,xmm8
   2420 DB	102,15,56,222,225
   2421 DB	102,15,56,222,233
   2422 	movdqa	XMMWORD[48+rsp],xmm13
   2423 	pxor	xmm15,xmm14
   2424 DB	102,15,56,222,241
   2425 	movaps	xmm13,xmm12
   2426 	movdqa	xmm14,xmm9
   2427 DB	102,15,56,222,249
   2428 	movups	xmm1,XMMWORD[((-32))+rcx]
   2429 
   2430 	paddd	xmm9,xmm9
   2431 DB	102,15,56,222,208
   2432 	pxor	xmm12,xmm15
   2433 	psrad	xmm14,31
   2434 DB	102,15,56,222,216
   2435 	paddq	xmm15,xmm15
   2436 	pand	xmm14,xmm8
   2437 DB	102,15,56,222,224
   2438 DB	102,15,56,222,232
   2439 DB	102,15,56,222,240
   2440 	pxor	xmm15,xmm14
   2441 	movaps	xmm14,xmm13
   2442 DB	102,15,56,222,248
   2443 
   2444 	movdqa	xmm0,xmm9
   2445 	paddd	xmm9,xmm9
   2446 DB	102,15,56,222,209
   2447 	pxor	xmm13,xmm15
   2448 	psrad	xmm0,31
   2449 DB	102,15,56,222,217
   2450 	paddq	xmm15,xmm15
   2451 	pand	xmm0,xmm8
   2452 DB	102,15,56,222,225
   2453 DB	102,15,56,222,233
   2454 	pxor	xmm15,xmm0
   2455 	movups	xmm0,XMMWORD[r11]
   2456 DB	102,15,56,222,241
   2457 DB	102,15,56,222,249
   2458 	movups	xmm1,XMMWORD[16+r11]
   2459 
   2460 	pxor	xmm14,xmm15
   2461 DB	102,15,56,223,84,36,0
   2462 	psrad	xmm9,31
   2463 	paddq	xmm15,xmm15
   2464 DB	102,15,56,223,92,36,16
   2465 DB	102,15,56,223,100,36,32
   2466 	pand	xmm9,xmm8
   2467 	mov	rax,r10
   2468 DB	102,15,56,223,108,36,48
   2469 DB	102,15,56,223,116,36,64
   2470 DB	102,15,56,223,124,36,80
   2471 	pxor	xmm15,xmm9
   2472 
   2473 	lea	rsi,[96+rsi]
   2474 	movups	XMMWORD[(-96)+rsi],xmm2
   2475 	movups	XMMWORD[(-80)+rsi],xmm3
   2476 	movups	XMMWORD[(-64)+rsi],xmm4
   2477 	movups	XMMWORD[(-48)+rsi],xmm5
   2478 	movups	XMMWORD[(-32)+rsi],xmm6
   2479 	movups	XMMWORD[(-16)+rsi],xmm7
   2480 	sub	rdx,16*6
   2481 	jnc	NEAR $L$xts_dec_grandloop
   2482 
   2483 	mov	eax,16+96
   2484 	sub	eax,r10d
   2485 	mov	rcx,r11
   2486 	shr	eax,4
   2487 
   2488 $L$xts_dec_short:
   2489 
   2490 	mov	r10d,eax
   2491 	pxor	xmm10,xmm0
   2492 	pxor	xmm11,xmm0
   2493 	add	rdx,16*6
   2494 	jz	NEAR $L$xts_dec_done
   2495 
   2496 	pxor	xmm12,xmm0
   2497 	cmp	rdx,0x20
   2498 	jb	NEAR $L$xts_dec_one
   2499 	pxor	xmm13,xmm0
   2500 	je	NEAR $L$xts_dec_two
   2501 
   2502 	pxor	xmm14,xmm0
   2503 	cmp	rdx,0x40
   2504 	jb	NEAR $L$xts_dec_three
   2505 	je	NEAR $L$xts_dec_four
   2506 
   2507 	movdqu	xmm2,XMMWORD[rdi]
   2508 	movdqu	xmm3,XMMWORD[16+rdi]
   2509 	movdqu	xmm4,XMMWORD[32+rdi]
   2510 	pxor	xmm2,xmm10
   2511 	movdqu	xmm5,XMMWORD[48+rdi]
   2512 	pxor	xmm3,xmm11
   2513 	movdqu	xmm6,XMMWORD[64+rdi]
   2514 	lea	rdi,[80+rdi]
   2515 	pxor	xmm4,xmm12
   2516 	pxor	xmm5,xmm13
   2517 	pxor	xmm6,xmm14
   2518 
   2519 	call	_aesni_decrypt6
   2520 
   2521 	xorps	xmm2,xmm10
   2522 	xorps	xmm3,xmm11
   2523 	xorps	xmm4,xmm12
   2524 	movdqu	XMMWORD[rsi],xmm2
   2525 	xorps	xmm5,xmm13
   2526 	movdqu	XMMWORD[16+rsi],xmm3
   2527 	xorps	xmm6,xmm14
   2528 	movdqu	XMMWORD[32+rsi],xmm4
   2529 	pxor	xmm14,xmm14
   2530 	movdqu	XMMWORD[48+rsi],xmm5
   2531 	pcmpgtd	xmm14,xmm15
   2532 	movdqu	XMMWORD[64+rsi],xmm6
   2533 	lea	rsi,[80+rsi]
   2534 	pshufd	xmm11,xmm14,0x13
   2535 	and	r9,15
   2536 	jz	NEAR $L$xts_dec_ret
   2537 
   2538 	movdqa	xmm10,xmm15
   2539 	paddq	xmm15,xmm15
   2540 	pand	xmm11,xmm8
   2541 	pxor	xmm11,xmm15
   2542 	jmp	NEAR $L$xts_dec_done2
   2543 
   2544 ALIGN	16
   2545 $L$xts_dec_one:
   2546 	movups	xmm2,XMMWORD[rdi]
   2547 	lea	rdi,[16+rdi]
   2548 	xorps	xmm2,xmm10
   2549 	movups	xmm0,XMMWORD[rcx]
   2550 	movups	xmm1,XMMWORD[16+rcx]
   2551 	lea	rcx,[32+rcx]
   2552 	xorps	xmm2,xmm0
   2553 $L$oop_dec1_12:
   2554 DB	102,15,56,222,209
   2555 	dec	eax
   2556 	movups	xmm1,XMMWORD[rcx]
   2557 	lea	rcx,[16+rcx]
   2558 	jnz	NEAR $L$oop_dec1_12
   2559 DB	102,15,56,223,209
   2560 	xorps	xmm2,xmm10
   2561 	movdqa	xmm10,xmm11
   2562 	movups	XMMWORD[rsi],xmm2
   2563 	movdqa	xmm11,xmm12
   2564 	lea	rsi,[16+rsi]
   2565 	jmp	NEAR $L$xts_dec_done
   2566 
   2567 ALIGN	16
   2568 $L$xts_dec_two:
   2569 	movups	xmm2,XMMWORD[rdi]
   2570 	movups	xmm3,XMMWORD[16+rdi]
   2571 	lea	rdi,[32+rdi]
   2572 	xorps	xmm2,xmm10
   2573 	xorps	xmm3,xmm11
   2574 
   2575 	call	_aesni_decrypt2
   2576 
   2577 	xorps	xmm2,xmm10
   2578 	movdqa	xmm10,xmm12
   2579 	xorps	xmm3,xmm11
   2580 	movdqa	xmm11,xmm13
   2581 	movups	XMMWORD[rsi],xmm2
   2582 	movups	XMMWORD[16+rsi],xmm3
   2583 	lea	rsi,[32+rsi]
   2584 	jmp	NEAR $L$xts_dec_done
   2585 
   2586 ALIGN	16
   2587 $L$xts_dec_three:
   2588 	movups	xmm2,XMMWORD[rdi]
   2589 	movups	xmm3,XMMWORD[16+rdi]
   2590 	movups	xmm4,XMMWORD[32+rdi]
   2591 	lea	rdi,[48+rdi]
   2592 	xorps	xmm2,xmm10
   2593 	xorps	xmm3,xmm11
   2594 	xorps	xmm4,xmm12
   2595 
   2596 	call	_aesni_decrypt3
   2597 
   2598 	xorps	xmm2,xmm10
   2599 	movdqa	xmm10,xmm13
   2600 	xorps	xmm3,xmm11
   2601 	movdqa	xmm11,xmm14
   2602 	xorps	xmm4,xmm12
   2603 	movups	XMMWORD[rsi],xmm2
   2604 	movups	XMMWORD[16+rsi],xmm3
   2605 	movups	XMMWORD[32+rsi],xmm4
   2606 	lea	rsi,[48+rsi]
   2607 	jmp	NEAR $L$xts_dec_done
   2608 
   2609 ALIGN	16
   2610 $L$xts_dec_four:
   2611 	movups	xmm2,XMMWORD[rdi]
   2612 	movups	xmm3,XMMWORD[16+rdi]
   2613 	movups	xmm4,XMMWORD[32+rdi]
   2614 	xorps	xmm2,xmm10
   2615 	movups	xmm5,XMMWORD[48+rdi]
   2616 	lea	rdi,[64+rdi]
   2617 	xorps	xmm3,xmm11
   2618 	xorps	xmm4,xmm12
   2619 	xorps	xmm5,xmm13
   2620 
   2621 	call	_aesni_decrypt4
   2622 
   2623 	pxor	xmm2,xmm10
   2624 	movdqa	xmm10,xmm14
   2625 	pxor	xmm3,xmm11
   2626 	movdqa	xmm11,xmm15
   2627 	pxor	xmm4,xmm12
   2628 	movdqu	XMMWORD[rsi],xmm2
   2629 	pxor	xmm5,xmm13
   2630 	movdqu	XMMWORD[16+rsi],xmm3
   2631 	movdqu	XMMWORD[32+rsi],xmm4
   2632 	movdqu	XMMWORD[48+rsi],xmm5
   2633 	lea	rsi,[64+rsi]
   2634 	jmp	NEAR $L$xts_dec_done
   2635 
   2636 ALIGN	16
   2637 $L$xts_dec_done:
   2638 	and	r9,15
   2639 	jz	NEAR $L$xts_dec_ret
   2640 $L$xts_dec_done2:
   2641 	mov	rdx,r9
   2642 	mov	rcx,r11
   2643 	mov	eax,r10d
   2644 
   2645 	movups	xmm2,XMMWORD[rdi]
   2646 	xorps	xmm2,xmm11
   2647 	movups	xmm0,XMMWORD[rcx]
   2648 	movups	xmm1,XMMWORD[16+rcx]
   2649 	lea	rcx,[32+rcx]
   2650 	xorps	xmm2,xmm0
   2651 $L$oop_dec1_13:
   2652 DB	102,15,56,222,209
   2653 	dec	eax
   2654 	movups	xmm1,XMMWORD[rcx]
   2655 	lea	rcx,[16+rcx]
   2656 	jnz	NEAR $L$oop_dec1_13
   2657 DB	102,15,56,223,209
   2658 	xorps	xmm2,xmm11
   2659 	movups	XMMWORD[rsi],xmm2
   2660 
   2661 $L$xts_dec_steal:
   2662 	movzx	eax,BYTE[16+rdi]
   2663 	movzx	ecx,BYTE[rsi]
   2664 	lea	rdi,[1+rdi]
   2665 	mov	BYTE[rsi],al
   2666 	mov	BYTE[16+rsi],cl
   2667 	lea	rsi,[1+rsi]
   2668 	sub	rdx,1
   2669 	jnz	NEAR $L$xts_dec_steal
   2670 
   2671 	sub	rsi,r9
   2672 	mov	rcx,r11
   2673 	mov	eax,r10d
   2674 
   2675 	movups	xmm2,XMMWORD[rsi]
   2676 	xorps	xmm2,xmm10
   2677 	movups	xmm0,XMMWORD[rcx]
   2678 	movups	xmm1,XMMWORD[16+rcx]
   2679 	lea	rcx,[32+rcx]
   2680 	xorps	xmm2,xmm0
   2681 $L$oop_dec1_14:
   2682 DB	102,15,56,222,209
   2683 	dec	eax
   2684 	movups	xmm1,XMMWORD[rcx]
   2685 	lea	rcx,[16+rcx]
   2686 	jnz	NEAR $L$oop_dec1_14
   2687 DB	102,15,56,223,209
   2688 	xorps	xmm2,xmm10
   2689 	movups	XMMWORD[rsi],xmm2
   2690 
   2691 $L$xts_dec_ret:
   2692 	xorps	xmm0,xmm0
   2693 	pxor	xmm1,xmm1
   2694 	pxor	xmm2,xmm2
   2695 	pxor	xmm3,xmm3
   2696 	pxor	xmm4,xmm4
   2697 	pxor	xmm5,xmm5
   2698 	movaps	xmm6,XMMWORD[((-160))+rbp]
   2699 	movaps	XMMWORD[(-160)+rbp],xmm0
   2700 	movaps	xmm7,XMMWORD[((-144))+rbp]
   2701 	movaps	XMMWORD[(-144)+rbp],xmm0
   2702 	movaps	xmm8,XMMWORD[((-128))+rbp]
   2703 	movaps	XMMWORD[(-128)+rbp],xmm0
   2704 	movaps	xmm9,XMMWORD[((-112))+rbp]
   2705 	movaps	XMMWORD[(-112)+rbp],xmm0
   2706 	movaps	xmm10,XMMWORD[((-96))+rbp]
   2707 	movaps	XMMWORD[(-96)+rbp],xmm0
   2708 	movaps	xmm11,XMMWORD[((-80))+rbp]
   2709 	movaps	XMMWORD[(-80)+rbp],xmm0
   2710 	movaps	xmm12,XMMWORD[((-64))+rbp]
   2711 	movaps	XMMWORD[(-64)+rbp],xmm0
   2712 	movaps	xmm13,XMMWORD[((-48))+rbp]
   2713 	movaps	XMMWORD[(-48)+rbp],xmm0
   2714 	movaps	xmm14,XMMWORD[((-32))+rbp]
   2715 	movaps	XMMWORD[(-32)+rbp],xmm0
   2716 	movaps	xmm15,XMMWORD[((-16))+rbp]
   2717 	movaps	XMMWORD[(-16)+rbp],xmm0
   2718 	movaps	XMMWORD[rsp],xmm0
   2719 	movaps	XMMWORD[16+rsp],xmm0
   2720 	movaps	XMMWORD[32+rsp],xmm0
   2721 	movaps	XMMWORD[48+rsp],xmm0
   2722 	movaps	XMMWORD[64+rsp],xmm0
   2723 	movaps	XMMWORD[80+rsp],xmm0
   2724 	movaps	XMMWORD[96+rsp],xmm0
   2725 	lea	rsp,[rbp]
   2726 	pop	rbp
   2727 $L$xts_dec_epilogue:
   2728 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   2729 	mov	rsi,QWORD[16+rsp]
   2730 	DB	0F3h,0C3h		;repret
   2731 $L$SEH_end_aesni_xts_decrypt:
   2732 global	aesni_cbc_encrypt
   2733 
   2734 ALIGN	16
   2735 aesni_cbc_encrypt:
   2736 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
   2737 	mov	QWORD[16+rsp],rsi
   2738 	mov	rax,rsp
   2739 $L$SEH_begin_aesni_cbc_encrypt:
   2740 	mov	rdi,rcx
   2741 	mov	rsi,rdx
   2742 	mov	rdx,r8
   2743 	mov	rcx,r9
   2744 	mov	r8,QWORD[40+rsp]
   2745 	mov	r9,QWORD[48+rsp]
   2746 
   2747 
   2748 	test	rdx,rdx
   2749 	jz	NEAR $L$cbc_ret
   2750 
   2751 	mov	r10d,DWORD[240+rcx]
   2752 	mov	r11,rcx
   2753 	test	r9d,r9d
   2754 	jz	NEAR $L$cbc_decrypt
   2755 
   2756 	movups	xmm2,XMMWORD[r8]
   2757 	mov	eax,r10d
   2758 	cmp	rdx,16
   2759 	jb	NEAR $L$cbc_enc_tail
   2760 	sub	rdx,16
   2761 	jmp	NEAR $L$cbc_enc_loop
   2762 ALIGN	16
   2763 $L$cbc_enc_loop:
   2764 	movups	xmm3,XMMWORD[rdi]
   2765 	lea	rdi,[16+rdi]
   2766 
   2767 	movups	xmm0,XMMWORD[rcx]
   2768 	movups	xmm1,XMMWORD[16+rcx]
   2769 	xorps	xmm3,xmm0
   2770 	lea	rcx,[32+rcx]
   2771 	xorps	xmm2,xmm3
   2772 $L$oop_enc1_15:
   2773 DB	102,15,56,220,209
   2774 	dec	eax
   2775 	movups	xmm1,XMMWORD[rcx]
   2776 	lea	rcx,[16+rcx]
   2777 	jnz	NEAR $L$oop_enc1_15
   2778 DB	102,15,56,221,209
   2779 	mov	eax,r10d
   2780 	mov	rcx,r11
   2781 	movups	XMMWORD[rsi],xmm2
   2782 	lea	rsi,[16+rsi]
   2783 	sub	rdx,16
   2784 	jnc	NEAR $L$cbc_enc_loop
   2785 	add	rdx,16
   2786 	jnz	NEAR $L$cbc_enc_tail
   2787 	pxor	xmm0,xmm0
   2788 	pxor	xmm1,xmm1
   2789 	movups	XMMWORD[r8],xmm2
   2790 	pxor	xmm2,xmm2
   2791 	pxor	xmm3,xmm3
   2792 	jmp	NEAR $L$cbc_ret
   2793 
   2794 $L$cbc_enc_tail:
   2795 	mov	rcx,rdx
   2796 	xchg	rsi,rdi
   2797 	DD	0x9066A4F3
   2798 	mov	ecx,16
   2799 	sub	rcx,rdx
   2800 	xor	eax,eax
   2801 	DD	0x9066AAF3
   2802 	lea	rdi,[((-16))+rdi]
   2803 	mov	eax,r10d
   2804 	mov	rsi,rdi
   2805 	mov	rcx,r11
   2806 	xor	rdx,rdx
   2807 	jmp	NEAR $L$cbc_enc_loop
   2808 
   2809 ALIGN	16
   2810 $L$cbc_decrypt:
   2811 	cmp	rdx,16
   2812 	jne	NEAR $L$cbc_decrypt_bulk
   2813 
   2814 
   2815 
   2816 	movdqu	xmm2,XMMWORD[rdi]
   2817 	movdqu	xmm3,XMMWORD[r8]
   2818 	movdqa	xmm4,xmm2
   2819 	movups	xmm0,XMMWORD[rcx]
   2820 	movups	xmm1,XMMWORD[16+rcx]
   2821 	lea	rcx,[32+rcx]
   2822 	xorps	xmm2,xmm0
   2823 $L$oop_dec1_16:
   2824 DB	102,15,56,222,209
   2825 	dec	r10d
   2826 	movups	xmm1,XMMWORD[rcx]
   2827 	lea	rcx,[16+rcx]
   2828 	jnz	NEAR $L$oop_dec1_16
   2829 DB	102,15,56,223,209
   2830 	pxor	xmm0,xmm0
   2831 	pxor	xmm1,xmm1
   2832 	movdqu	XMMWORD[r8],xmm4
   2833 	xorps	xmm2,xmm3
   2834 	pxor	xmm3,xmm3
   2835 	movups	XMMWORD[rsi],xmm2
   2836 	pxor	xmm2,xmm2
   2837 	jmp	NEAR $L$cbc_ret
   2838 ALIGN	16
   2839 $L$cbc_decrypt_bulk:
   2840 	lea	rax,[rsp]
   2841 	push	rbp
   2842 	sub	rsp,176
   2843 	and	rsp,-16
   2844 	movaps	XMMWORD[16+rsp],xmm6
   2845 	movaps	XMMWORD[32+rsp],xmm7
   2846 	movaps	XMMWORD[48+rsp],xmm8
   2847 	movaps	XMMWORD[64+rsp],xmm9
   2848 	movaps	XMMWORD[80+rsp],xmm10
   2849 	movaps	XMMWORD[96+rsp],xmm11
   2850 	movaps	XMMWORD[112+rsp],xmm12
   2851 	movaps	XMMWORD[128+rsp],xmm13
   2852 	movaps	XMMWORD[144+rsp],xmm14
   2853 	movaps	XMMWORD[160+rsp],xmm15
   2854 $L$cbc_decrypt_body:
   2855 	lea	rbp,[((-8))+rax]
   2856 	movups	xmm10,XMMWORD[r8]
   2857 	mov	eax,r10d
   2858 	cmp	rdx,0x50
   2859 	jbe	NEAR $L$cbc_dec_tail
   2860 
   2861 	movups	xmm0,XMMWORD[rcx]
   2862 	movdqu	xmm2,XMMWORD[rdi]
   2863 	movdqu	xmm3,XMMWORD[16+rdi]
   2864 	movdqa	xmm11,xmm2
   2865 	movdqu	xmm4,XMMWORD[32+rdi]
   2866 	movdqa	xmm12,xmm3
   2867 	movdqu	xmm5,XMMWORD[48+rdi]
   2868 	movdqa	xmm13,xmm4
   2869 	movdqu	xmm6,XMMWORD[64+rdi]
   2870 	movdqa	xmm14,xmm5
   2871 	movdqu	xmm7,XMMWORD[80+rdi]
   2872 	movdqa	xmm15,xmm6
   2873 	mov	r9d,DWORD[((OPENSSL_ia32cap_P+4))]
   2874 	cmp	rdx,0x70
   2875 	jbe	NEAR $L$cbc_dec_six_or_seven
   2876 
   2877 	and	r9d,71303168
   2878 	sub	rdx,0x50
   2879 	cmp	r9d,4194304
   2880 	je	NEAR $L$cbc_dec_loop6_enter
   2881 	sub	rdx,0x20
   2882 	lea	rcx,[112+rcx]
   2883 	jmp	NEAR $L$cbc_dec_loop8_enter
   2884 ALIGN	16
   2885 $L$cbc_dec_loop8:
   2886 	movups	XMMWORD[rsi],xmm9
   2887 	lea	rsi,[16+rsi]
   2888 $L$cbc_dec_loop8_enter:
   2889 	movdqu	xmm8,XMMWORD[96+rdi]
   2890 	pxor	xmm2,xmm0
   2891 	movdqu	xmm9,XMMWORD[112+rdi]
   2892 	pxor	xmm3,xmm0
   2893 	movups	xmm1,XMMWORD[((16-112))+rcx]
   2894 	pxor	xmm4,xmm0
   2895 	xor	r11,r11
   2896 	cmp	rdx,0x70
   2897 	pxor	xmm5,xmm0
   2898 	pxor	xmm6,xmm0
   2899 	pxor	xmm7,xmm0
   2900 	pxor	xmm8,xmm0
   2901 
   2902 DB	102,15,56,222,209
   2903 	pxor	xmm9,xmm0
   2904 	movups	xmm0,XMMWORD[((32-112))+rcx]
   2905 DB	102,15,56,222,217
   2906 DB	102,15,56,222,225
   2907 DB	102,15,56,222,233
   2908 DB	102,15,56,222,241
   2909 DB	102,15,56,222,249
   2910 DB	102,68,15,56,222,193
   2911 	setnc	r11b
   2912 	shl	r11,7
   2913 DB	102,68,15,56,222,201
   2914 	add	r11,rdi
   2915 	movups	xmm1,XMMWORD[((48-112))+rcx]
   2916 DB	102,15,56,222,208
   2917 DB	102,15,56,222,216
   2918 DB	102,15,56,222,224
   2919 DB	102,15,56,222,232
   2920 DB	102,15,56,222,240
   2921 DB	102,15,56,222,248
   2922 DB	102,68,15,56,222,192
   2923 DB	102,68,15,56,222,200
   2924 	movups	xmm0,XMMWORD[((64-112))+rcx]
   2925 	nop
   2926 DB	102,15,56,222,209
   2927 DB	102,15,56,222,217
   2928 DB	102,15,56,222,225
   2929 DB	102,15,56,222,233
   2930 DB	102,15,56,222,241
   2931 DB	102,15,56,222,249
   2932 DB	102,68,15,56,222,193
   2933 DB	102,68,15,56,222,201
   2934 	movups	xmm1,XMMWORD[((80-112))+rcx]
   2935 	nop
   2936 DB	102,15,56,222,208
   2937 DB	102,15,56,222,216
   2938 DB	102,15,56,222,224
   2939 DB	102,15,56,222,232
   2940 DB	102,15,56,222,240
   2941 DB	102,15,56,222,248
   2942 DB	102,68,15,56,222,192
   2943 DB	102,68,15,56,222,200
   2944 	movups	xmm0,XMMWORD[((96-112))+rcx]
   2945 	nop
   2946 DB	102,15,56,222,209
   2947 DB	102,15,56,222,217
   2948 DB	102,15,56,222,225
   2949 DB	102,15,56,222,233
   2950 DB	102,15,56,222,241
   2951 DB	102,15,56,222,249
   2952 DB	102,68,15,56,222,193
   2953 DB	102,68,15,56,222,201
   2954 	movups	xmm1,XMMWORD[((112-112))+rcx]
   2955 	nop
   2956 DB	102,15,56,222,208
   2957 DB	102,15,56,222,216
   2958 DB	102,15,56,222,224
   2959 DB	102,15,56,222,232
   2960 DB	102,15,56,222,240
   2961 DB	102,15,56,222,248
   2962 DB	102,68,15,56,222,192
   2963 DB	102,68,15,56,222,200
   2964 	movups	xmm0,XMMWORD[((128-112))+rcx]
   2965 	nop
   2966 DB	102,15,56,222,209
   2967 DB	102,15,56,222,217
   2968 DB	102,15,56,222,225
   2969 DB	102,15,56,222,233
   2970 DB	102,15,56,222,241
   2971 DB	102,15,56,222,249
   2972 DB	102,68,15,56,222,193
   2973 DB	102,68,15,56,222,201
   2974 	movups	xmm1,XMMWORD[((144-112))+rcx]
   2975 	cmp	eax,11
   2976 DB	102,15,56,222,208
   2977 DB	102,15,56,222,216
   2978 DB	102,15,56,222,224
   2979 DB	102,15,56,222,232
   2980 DB	102,15,56,222,240
   2981 DB	102,15,56,222,248
   2982 DB	102,68,15,56,222,192
   2983 DB	102,68,15,56,222,200
   2984 	movups	xmm0,XMMWORD[((160-112))+rcx]
   2985 	jb	NEAR $L$cbc_dec_done
   2986 DB	102,15,56,222,209
   2987 DB	102,15,56,222,217
   2988 DB	102,15,56,222,225
   2989 DB	102,15,56,222,233
   2990 DB	102,15,56,222,241
   2991 DB	102,15,56,222,249
   2992 DB	102,68,15,56,222,193
   2993 DB	102,68,15,56,222,201
   2994 	movups	xmm1,XMMWORD[((176-112))+rcx]
   2995 	nop
   2996 DB	102,15,56,222,208
   2997 DB	102,15,56,222,216
   2998 DB	102,15,56,222,224
   2999 DB	102,15,56,222,232
   3000 DB	102,15,56,222,240
   3001 DB	102,15,56,222,248
   3002 DB	102,68,15,56,222,192
   3003 DB	102,68,15,56,222,200
   3004 	movups	xmm0,XMMWORD[((192-112))+rcx]
   3005 	je	NEAR $L$cbc_dec_done
   3006 DB	102,15,56,222,209
   3007 DB	102,15,56,222,217
   3008 DB	102,15,56,222,225
   3009 DB	102,15,56,222,233
   3010 DB	102,15,56,222,241
   3011 DB	102,15,56,222,249
   3012 DB	102,68,15,56,222,193
   3013 DB	102,68,15,56,222,201
   3014 	movups	xmm1,XMMWORD[((208-112))+rcx]
   3015 	nop
   3016 DB	102,15,56,222,208
   3017 DB	102,15,56,222,216
   3018 DB	102,15,56,222,224
   3019 DB	102,15,56,222,232
   3020 DB	102,15,56,222,240
   3021 DB	102,15,56,222,248
   3022 DB	102,68,15,56,222,192
   3023 DB	102,68,15,56,222,200
   3024 	movups	xmm0,XMMWORD[((224-112))+rcx]
   3025 	jmp	NEAR $L$cbc_dec_done
   3026 ALIGN	16
   3027 $L$cbc_dec_done:
   3028 DB	102,15,56,222,209
   3029 DB	102,15,56,222,217
   3030 	pxor	xmm10,xmm0
   3031 	pxor	xmm11,xmm0
   3032 DB	102,15,56,222,225
   3033 DB	102,15,56,222,233
   3034 	pxor	xmm12,xmm0
   3035 	pxor	xmm13,xmm0
   3036 DB	102,15,56,222,241
   3037 DB	102,15,56,222,249
   3038 	pxor	xmm14,xmm0
   3039 	pxor	xmm15,xmm0
   3040 DB	102,68,15,56,222,193
   3041 DB	102,68,15,56,222,201
   3042 	movdqu	xmm1,XMMWORD[80+rdi]
   3043 
   3044 DB	102,65,15,56,223,210
   3045 	movdqu	xmm10,XMMWORD[96+rdi]
   3046 	pxor	xmm1,xmm0
   3047 DB	102,65,15,56,223,219
   3048 	pxor	xmm10,xmm0
   3049 	movdqu	xmm0,XMMWORD[112+rdi]
   3050 DB	102,65,15,56,223,228
   3051 	lea	rdi,[128+rdi]
   3052 	movdqu	xmm11,XMMWORD[r11]
   3053 DB	102,65,15,56,223,237
   3054 DB	102,65,15,56,223,246
   3055 	movdqu	xmm12,XMMWORD[16+r11]
   3056 	movdqu	xmm13,XMMWORD[32+r11]
   3057 DB	102,65,15,56,223,255
   3058 DB	102,68,15,56,223,193
   3059 	movdqu	xmm14,XMMWORD[48+r11]
   3060 	movdqu	xmm15,XMMWORD[64+r11]
   3061 DB	102,69,15,56,223,202
   3062 	movdqa	xmm10,xmm0
   3063 	movdqu	xmm1,XMMWORD[80+r11]
   3064 	movups	xmm0,XMMWORD[((-112))+rcx]
   3065 
   3066 	movups	XMMWORD[rsi],xmm2
   3067 	movdqa	xmm2,xmm11
   3068 	movups	XMMWORD[16+rsi],xmm3
   3069 	movdqa	xmm3,xmm12
   3070 	movups	XMMWORD[32+rsi],xmm4
   3071 	movdqa	xmm4,xmm13
   3072 	movups	XMMWORD[48+rsi],xmm5
   3073 	movdqa	xmm5,xmm14
   3074 	movups	XMMWORD[64+rsi],xmm6
   3075 	movdqa	xmm6,xmm15
   3076 	movups	XMMWORD[80+rsi],xmm7
   3077 	movdqa	xmm7,xmm1
   3078 	movups	XMMWORD[96+rsi],xmm8
   3079 	lea	rsi,[112+rsi]
   3080 
   3081 	sub	rdx,0x80
   3082 	ja	NEAR $L$cbc_dec_loop8
   3083 
   3084 	movaps	xmm2,xmm9
   3085 	lea	rcx,[((-112))+rcx]
   3086 	add	rdx,0x70
   3087 	jle	NEAR $L$cbc_dec_clear_tail_collected
   3088 	movups	XMMWORD[rsi],xmm9
   3089 	lea	rsi,[16+rsi]
   3090 	cmp	rdx,0x50
   3091 	jbe	NEAR $L$cbc_dec_tail
   3092 
   3093 	movaps	xmm2,xmm11
   3094 $L$cbc_dec_six_or_seven:
   3095 	cmp	rdx,0x60
   3096 	ja	NEAR $L$cbc_dec_seven
   3097 
   3098 	movaps	xmm8,xmm7
   3099 	call	_aesni_decrypt6
   3100 	pxor	xmm2,xmm10
   3101 	movaps	xmm10,xmm8
   3102 	pxor	xmm3,xmm11
   3103 	movdqu	XMMWORD[rsi],xmm2
   3104 	pxor	xmm4,xmm12
   3105 	movdqu	XMMWORD[16+rsi],xmm3
   3106 	pxor	xmm3,xmm3
   3107 	pxor	xmm5,xmm13
   3108 	movdqu	XMMWORD[32+rsi],xmm4
   3109 	pxor	xmm4,xmm4
   3110 	pxor	xmm6,xmm14
   3111 	movdqu	XMMWORD[48+rsi],xmm5
   3112 	pxor	xmm5,xmm5
   3113 	pxor	xmm7,xmm15
   3114 	movdqu	XMMWORD[64+rsi],xmm6
   3115 	pxor	xmm6,xmm6
   3116 	lea	rsi,[80+rsi]
   3117 	movdqa	xmm2,xmm7
   3118 	pxor	xmm7,xmm7
   3119 	jmp	NEAR $L$cbc_dec_tail_collected
   3120 
   3121 ALIGN	16
   3122 $L$cbc_dec_seven:
   3123 	movups	xmm8,XMMWORD[96+rdi]
   3124 	xorps	xmm9,xmm9
   3125 	call	_aesni_decrypt8
   3126 	movups	xmm9,XMMWORD[80+rdi]
   3127 	pxor	xmm2,xmm10
   3128 	movups	xmm10,XMMWORD[96+rdi]
   3129 	pxor	xmm3,xmm11
   3130 	movdqu	XMMWORD[rsi],xmm2
   3131 	pxor	xmm4,xmm12
   3132 	movdqu	XMMWORD[16+rsi],xmm3
   3133 	pxor	xmm3,xmm3
   3134 	pxor	xmm5,xmm13
   3135 	movdqu	XMMWORD[32+rsi],xmm4
   3136 	pxor	xmm4,xmm4
   3137 	pxor	xmm6,xmm14
   3138 	movdqu	XMMWORD[48+rsi],xmm5
   3139 	pxor	xmm5,xmm5
   3140 	pxor	xmm7,xmm15
   3141 	movdqu	XMMWORD[64+rsi],xmm6
   3142 	pxor	xmm6,xmm6
   3143 	pxor	xmm8,xmm9
   3144 	movdqu	XMMWORD[80+rsi],xmm7
   3145 	pxor	xmm7,xmm7
   3146 	lea	rsi,[96+rsi]
   3147 	movdqa	xmm2,xmm8
   3148 	pxor	xmm8,xmm8
   3149 	pxor	xmm9,xmm9
   3150 	jmp	NEAR $L$cbc_dec_tail_collected
   3151 
   3152 ALIGN	16
   3153 $L$cbc_dec_loop6:
   3154 	movups	XMMWORD[rsi],xmm7
   3155 	lea	rsi,[16+rsi]
   3156 	movdqu	xmm2,XMMWORD[rdi]
   3157 	movdqu	xmm3,XMMWORD[16+rdi]
   3158 	movdqa	xmm11,xmm2
   3159 	movdqu	xmm4,XMMWORD[32+rdi]
   3160 	movdqa	xmm12,xmm3
   3161 	movdqu	xmm5,XMMWORD[48+rdi]
   3162 	movdqa	xmm13,xmm4
   3163 	movdqu	xmm6,XMMWORD[64+rdi]
   3164 	movdqa	xmm14,xmm5
   3165 	movdqu	xmm7,XMMWORD[80+rdi]
   3166 	movdqa	xmm15,xmm6
   3167 $L$cbc_dec_loop6_enter:
   3168 	lea	rdi,[96+rdi]
   3169 	movdqa	xmm8,xmm7
   3170 
   3171 	call	_aesni_decrypt6
   3172 
   3173 	pxor	xmm2,xmm10
   3174 	movdqa	xmm10,xmm8
   3175 	pxor	xmm3,xmm11
   3176 	movdqu	XMMWORD[rsi],xmm2
   3177 	pxor	xmm4,xmm12
   3178 	movdqu	XMMWORD[16+rsi],xmm3
   3179 	pxor	xmm5,xmm13
   3180 	movdqu	XMMWORD[32+rsi],xmm4
   3181 	pxor	xmm6,xmm14
   3182 	mov	rcx,r11
   3183 	movdqu	XMMWORD[48+rsi],xmm5
   3184 	pxor	xmm7,xmm15
   3185 	mov	eax,r10d
   3186 	movdqu	XMMWORD[64+rsi],xmm6
   3187 	lea	rsi,[80+rsi]
   3188 	sub	rdx,0x60
   3189 	ja	NEAR $L$cbc_dec_loop6
   3190 
   3191 	movdqa	xmm2,xmm7
   3192 	add	rdx,0x50
   3193 	jle	NEAR $L$cbc_dec_clear_tail_collected
   3194 	movups	XMMWORD[rsi],xmm7
   3195 	lea	rsi,[16+rsi]
   3196 
   3197 $L$cbc_dec_tail:
   3198 	movups	xmm2,XMMWORD[rdi]
   3199 	sub	rdx,0x10
   3200 	jbe	NEAR $L$cbc_dec_one
   3201 
   3202 	movups	xmm3,XMMWORD[16+rdi]
   3203 	movaps	xmm11,xmm2
   3204 	sub	rdx,0x10
   3205 	jbe	NEAR $L$cbc_dec_two
   3206 
   3207 	movups	xmm4,XMMWORD[32+rdi]
   3208 	movaps	xmm12,xmm3
   3209 	sub	rdx,0x10
   3210 	jbe	NEAR $L$cbc_dec_three
   3211 
   3212 	movups	xmm5,XMMWORD[48+rdi]
   3213 	movaps	xmm13,xmm4
   3214 	sub	rdx,0x10
   3215 	jbe	NEAR $L$cbc_dec_four
   3216 
   3217 	movups	xmm6,XMMWORD[64+rdi]
   3218 	movaps	xmm14,xmm5
   3219 	movaps	xmm15,xmm6
   3220 	xorps	xmm7,xmm7
   3221 	call	_aesni_decrypt6
   3222 	pxor	xmm2,xmm10
   3223 	movaps	xmm10,xmm15
   3224 	pxor	xmm3,xmm11
   3225 	movdqu	XMMWORD[rsi],xmm2
   3226 	pxor	xmm4,xmm12
   3227 	movdqu	XMMWORD[16+rsi],xmm3
   3228 	pxor	xmm3,xmm3
   3229 	pxor	xmm5,xmm13
   3230 	movdqu	XMMWORD[32+rsi],xmm4
   3231 	pxor	xmm4,xmm4
   3232 	pxor	xmm6,xmm14
   3233 	movdqu	XMMWORD[48+rsi],xmm5
   3234 	pxor	xmm5,xmm5
   3235 	lea	rsi,[64+rsi]
   3236 	movdqa	xmm2,xmm6
   3237 	pxor	xmm6,xmm6
   3238 	pxor	xmm7,xmm7
   3239 	sub	rdx,0x10
   3240 	jmp	NEAR $L$cbc_dec_tail_collected
   3241 
   3242 ALIGN	16
   3243 $L$cbc_dec_one:
   3244 	movaps	xmm11,xmm2
   3245 	movups	xmm0,XMMWORD[rcx]
   3246 	movups	xmm1,XMMWORD[16+rcx]
   3247 	lea	rcx,[32+rcx]
   3248 	xorps	xmm2,xmm0
   3249 $L$oop_dec1_17:
   3250 DB	102,15,56,222,209
   3251 	dec	eax
   3252 	movups	xmm1,XMMWORD[rcx]
   3253 	lea	rcx,[16+rcx]
   3254 	jnz	NEAR $L$oop_dec1_17
   3255 DB	102,15,56,223,209
   3256 	xorps	xmm2,xmm10
   3257 	movaps	xmm10,xmm11
   3258 	jmp	NEAR $L$cbc_dec_tail_collected
   3259 ALIGN	16
   3260 $L$cbc_dec_two:
   3261 	movaps	xmm12,xmm3
   3262 	call	_aesni_decrypt2
   3263 	pxor	xmm2,xmm10
   3264 	movaps	xmm10,xmm12
   3265 	pxor	xmm3,xmm11
   3266 	movdqu	XMMWORD[rsi],xmm2
   3267 	movdqa	xmm2,xmm3
   3268 	pxor	xmm3,xmm3
   3269 	lea	rsi,[16+rsi]
   3270 	jmp	NEAR $L$cbc_dec_tail_collected
   3271 ALIGN	16
   3272 $L$cbc_dec_three:
   3273 	movaps	xmm13,xmm4
   3274 	call	_aesni_decrypt3
   3275 	pxor	xmm2,xmm10
   3276 	movaps	xmm10,xmm13
   3277 	pxor	xmm3,xmm11
   3278 	movdqu	XMMWORD[rsi],xmm2
   3279 	pxor	xmm4,xmm12
   3280 	movdqu	XMMWORD[16+rsi],xmm3
   3281 	pxor	xmm3,xmm3
   3282 	movdqa	xmm2,xmm4
   3283 	pxor	xmm4,xmm4
   3284 	lea	rsi,[32+rsi]
   3285 	jmp	NEAR $L$cbc_dec_tail_collected
   3286 ALIGN	16
   3287 $L$cbc_dec_four:
   3288 	movaps	xmm14,xmm5
   3289 	call	_aesni_decrypt4
   3290 	pxor	xmm2,xmm10
   3291 	movaps	xmm10,xmm14
   3292 	pxor	xmm3,xmm11
   3293 	movdqu	XMMWORD[rsi],xmm2
   3294 	pxor	xmm4,xmm12
   3295 	movdqu	XMMWORD[16+rsi],xmm3
   3296 	pxor	xmm3,xmm3
   3297 	pxor	xmm5,xmm13
   3298 	movdqu	XMMWORD[32+rsi],xmm4
   3299 	pxor	xmm4,xmm4
   3300 	movdqa	xmm2,xmm5
   3301 	pxor	xmm5,xmm5
   3302 	lea	rsi,[48+rsi]
   3303 	jmp	NEAR $L$cbc_dec_tail_collected
   3304 
   3305 ALIGN	16
   3306 $L$cbc_dec_clear_tail_collected:
   3307 	pxor	xmm3,xmm3
   3308 	pxor	xmm4,xmm4
   3309 	pxor	xmm5,xmm5
   3310 $L$cbc_dec_tail_collected:
   3311 	movups	XMMWORD[r8],xmm10
   3312 	and	rdx,15
   3313 	jnz	NEAR $L$cbc_dec_tail_partial
   3314 	movups	XMMWORD[rsi],xmm2
   3315 	pxor	xmm2,xmm2
   3316 	jmp	NEAR $L$cbc_dec_ret
   3317 ALIGN	16
   3318 $L$cbc_dec_tail_partial:
   3319 	movaps	XMMWORD[rsp],xmm2
   3320 	pxor	xmm2,xmm2
   3321 	mov	rcx,16
   3322 	mov	rdi,rsi
   3323 	sub	rcx,rdx
   3324 	lea	rsi,[rsp]
   3325 	DD	0x9066A4F3
   3326 	movdqa	XMMWORD[rsp],xmm2
   3327 
   3328 $L$cbc_dec_ret:
   3329 	xorps	xmm0,xmm0
   3330 	pxor	xmm1,xmm1
   3331 	movaps	xmm6,XMMWORD[16+rsp]
   3332 	movaps	XMMWORD[16+rsp],xmm0
   3333 	movaps	xmm7,XMMWORD[32+rsp]
   3334 	movaps	XMMWORD[32+rsp],xmm0
   3335 	movaps	xmm8,XMMWORD[48+rsp]
   3336 	movaps	XMMWORD[48+rsp],xmm0
   3337 	movaps	xmm9,XMMWORD[64+rsp]
   3338 	movaps	XMMWORD[64+rsp],xmm0
   3339 	movaps	xmm10,XMMWORD[80+rsp]
   3340 	movaps	XMMWORD[80+rsp],xmm0
   3341 	movaps	xmm11,XMMWORD[96+rsp]
   3342 	movaps	XMMWORD[96+rsp],xmm0
   3343 	movaps	xmm12,XMMWORD[112+rsp]
   3344 	movaps	XMMWORD[112+rsp],xmm0
   3345 	movaps	xmm13,XMMWORD[128+rsp]
   3346 	movaps	XMMWORD[128+rsp],xmm0
   3347 	movaps	xmm14,XMMWORD[144+rsp]
   3348 	movaps	XMMWORD[144+rsp],xmm0
   3349 	movaps	xmm15,XMMWORD[160+rsp]
   3350 	movaps	XMMWORD[160+rsp],xmm0
   3351 	lea	rsp,[rbp]
   3352 	pop	rbp
   3353 $L$cbc_ret:
   3354 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
   3355 	mov	rsi,QWORD[16+rsp]
   3356 	DB	0F3h,0C3h		;repret
   3357 $L$SEH_end_aesni_cbc_encrypt:
   3358 global	aesni_set_decrypt_key
   3359 
   3360 ALIGN	16
   3361 aesni_set_decrypt_key:
   3362 DB	0x48,0x83,0xEC,0x08
   3363 	call	__aesni_set_encrypt_key
   3364 	shl	edx,4
   3365 	test	eax,eax
   3366 	jnz	NEAR $L$dec_key_ret
   3367 	lea	rcx,[16+rdx*1+r8]
   3368 
   3369 	movups	xmm0,XMMWORD[r8]
   3370 	movups	xmm1,XMMWORD[rcx]
   3371 	movups	XMMWORD[rcx],xmm0
   3372 	movups	XMMWORD[r8],xmm1
   3373 	lea	r8,[16+r8]
   3374 	lea	rcx,[((-16))+rcx]
   3375 
   3376 $L$dec_key_inverse:
   3377 	movups	xmm0,XMMWORD[r8]
   3378 	movups	xmm1,XMMWORD[rcx]
   3379 DB	102,15,56,219,192
   3380 DB	102,15,56,219,201
   3381 	lea	r8,[16+r8]
   3382 	lea	rcx,[((-16))+rcx]
   3383 	movups	XMMWORD[16+rcx],xmm0
   3384 	movups	XMMWORD[(-16)+r8],xmm1
   3385 	cmp	rcx,r8
   3386 	ja	NEAR $L$dec_key_inverse
   3387 
   3388 	movups	xmm0,XMMWORD[r8]
   3389 DB	102,15,56,219,192
   3390 	pxor	xmm1,xmm1
   3391 	movups	XMMWORD[rcx],xmm0
   3392 	pxor	xmm0,xmm0
   3393 $L$dec_key_ret:
   3394 	add	rsp,8
   3395 	DB	0F3h,0C3h		;repret
   3396 $L$SEH_end_set_decrypt_key:
   3397 
   3398 global	aesni_set_encrypt_key
   3399 
   3400 ALIGN	16
   3401 aesni_set_encrypt_key:
   3402 __aesni_set_encrypt_key:
   3403 DB	0x48,0x83,0xEC,0x08
   3404 	mov	rax,-1
   3405 	test	rcx,rcx
   3406 	jz	NEAR $L$enc_key_ret
   3407 	test	r8,r8
   3408 	jz	NEAR $L$enc_key_ret
   3409 
   3410 	mov	r10d,268437504
   3411 	movups	xmm0,XMMWORD[rcx]
   3412 	xorps	xmm4,xmm4
   3413 	and	r10d,DWORD[((OPENSSL_ia32cap_P+4))]
   3414 	lea	rax,[16+r8]
   3415 	cmp	edx,256
   3416 	je	NEAR $L$14rounds
   3417 	cmp	edx,192
   3418 	je	NEAR $L$12rounds
   3419 	cmp	edx,128
   3420 	jne	NEAR $L$bad_keybits
   3421 
   3422 $L$10rounds:
   3423 	mov	edx,9
   3424 	cmp	r10d,268435456
   3425 	je	NEAR $L$10rounds_alt
   3426 
   3427 	movups	XMMWORD[r8],xmm0
   3428 DB	102,15,58,223,200,1
   3429 	call	$L$key_expansion_128_cold
   3430 DB	102,15,58,223,200,2
   3431 	call	$L$key_expansion_128
   3432 DB	102,15,58,223,200,4
   3433 	call	$L$key_expansion_128
   3434 DB	102,15,58,223,200,8
   3435 	call	$L$key_expansion_128
   3436 DB	102,15,58,223,200,16
   3437 	call	$L$key_expansion_128
   3438 DB	102,15,58,223,200,32
   3439 	call	$L$key_expansion_128
   3440 DB	102,15,58,223,200,64
   3441 	call	$L$key_expansion_128
   3442 DB	102,15,58,223,200,128
   3443 	call	$L$key_expansion_128
   3444 DB	102,15,58,223,200,27
   3445 	call	$L$key_expansion_128
   3446 DB	102,15,58,223,200,54
   3447 	call	$L$key_expansion_128
   3448 	movups	XMMWORD[rax],xmm0
   3449 	mov	DWORD[80+rax],edx
   3450 	xor	eax,eax
   3451 	jmp	NEAR $L$enc_key_ret
   3452 
   3453 ALIGN	16
   3454 $L$10rounds_alt:
   3455 	movdqa	xmm5,XMMWORD[$L$key_rotate]
   3456 	mov	r10d,8
   3457 	movdqa	xmm4,XMMWORD[$L$key_rcon1]
   3458 	movdqa	xmm2,xmm0
   3459 	movdqu	XMMWORD[r8],xmm0
   3460 	jmp	NEAR $L$oop_key128
   3461 
   3462 ALIGN	16
   3463 $L$oop_key128:
   3464 DB	102,15,56,0,197
   3465 DB	102,15,56,221,196
   3466 	pslld	xmm4,1
   3467 	lea	rax,[16+rax]
   3468 
   3469 	movdqa	xmm3,xmm2
   3470 	pslldq	xmm2,4
   3471 	pxor	xmm3,xmm2
   3472 	pslldq	xmm2,4
   3473 	pxor	xmm3,xmm2
   3474 	pslldq	xmm2,4
   3475 	pxor	xmm2,xmm3
   3476 
   3477 	pxor	xmm0,xmm2
   3478 	movdqu	XMMWORD[(-16)+rax],xmm0
   3479 	movdqa	xmm2,xmm0
   3480 
   3481 	dec	r10d
   3482 	jnz	NEAR $L$oop_key128
   3483 
   3484 	movdqa	xmm4,XMMWORD[$L$key_rcon1b]
   3485 
   3486 DB	102,15,56,0,197
   3487 DB	102,15,56,221,196
   3488 	pslld	xmm4,1
   3489 
   3490 	movdqa	xmm3,xmm2
   3491 	pslldq	xmm2,4
   3492 	pxor	xmm3,xmm2
   3493 	pslldq	xmm2,4
   3494 	pxor	xmm3,xmm2
   3495 	pslldq	xmm2,4
   3496 	pxor	xmm2,xmm3
   3497 
   3498 	pxor	xmm0,xmm2
   3499 	movdqu	XMMWORD[rax],xmm0
   3500 
   3501 	movdqa	xmm2,xmm0
   3502 DB	102,15,56,0,197
   3503 DB	102,15,56,221,196
   3504 
   3505 	movdqa	xmm3,xmm2
   3506 	pslldq	xmm2,4
   3507 	pxor	xmm3,xmm2
   3508 	pslldq	xmm2,4
   3509 	pxor	xmm3,xmm2
   3510 	pslldq	xmm2,4
   3511 	pxor	xmm2,xmm3
   3512 
   3513 	pxor	xmm0,xmm2
   3514 	movdqu	XMMWORD[16+rax],xmm0
   3515 
   3516 	mov	DWORD[96+rax],edx
   3517 	xor	eax,eax
   3518 	jmp	NEAR $L$enc_key_ret
   3519 
   3520 ALIGN	16
   3521 $L$12rounds:
   3522 	movq	xmm2,QWORD[16+rcx]
   3523 	mov	edx,11
   3524 	cmp	r10d,268435456
   3525 	je	NEAR $L$12rounds_alt
   3526 
   3527 	movups	XMMWORD[r8],xmm0
   3528 DB	102,15,58,223,202,1
   3529 	call	$L$key_expansion_192a_cold
   3530 DB	102,15,58,223,202,2
   3531 	call	$L$key_expansion_192b
   3532 DB	102,15,58,223,202,4
   3533 	call	$L$key_expansion_192a
   3534 DB	102,15,58,223,202,8
   3535 	call	$L$key_expansion_192b
   3536 DB	102,15,58,223,202,16
   3537 	call	$L$key_expansion_192a
   3538 DB	102,15,58,223,202,32
   3539 	call	$L$key_expansion_192b
   3540 DB	102,15,58,223,202,64
   3541 	call	$L$key_expansion_192a
   3542 DB	102,15,58,223,202,128
   3543 	call	$L$key_expansion_192b
   3544 	movups	XMMWORD[rax],xmm0
   3545 	mov	DWORD[48+rax],edx
   3546 	xor	rax,rax
   3547 	jmp	NEAR $L$enc_key_ret
   3548 
   3549 ALIGN	16
   3550 $L$12rounds_alt:
   3551 	movdqa	xmm5,XMMWORD[$L$key_rotate192]
   3552 	movdqa	xmm4,XMMWORD[$L$key_rcon1]
   3553 	mov	r10d,8
   3554 	movdqu	XMMWORD[r8],xmm0
   3555 	jmp	NEAR $L$oop_key192
   3556 
   3557 ALIGN	16
   3558 $L$oop_key192:
   3559 	movq	QWORD[rax],xmm2
   3560 	movdqa	xmm1,xmm2
   3561 DB	102,15,56,0,213
   3562 DB	102,15,56,221,212
   3563 	pslld	xmm4,1
   3564 	lea	rax,[24+rax]
   3565 
   3566 	movdqa	xmm3,xmm0
   3567 	pslldq	xmm0,4
   3568 	pxor	xmm3,xmm0
   3569 	pslldq	xmm0,4
   3570 	pxor	xmm3,xmm0
   3571 	pslldq	xmm0,4
   3572 	pxor	xmm0,xmm3
   3573 
   3574 	pshufd	xmm3,xmm0,0xff
   3575 	pxor	xmm3,xmm1
   3576 	pslldq	xmm1,4
   3577 	pxor	xmm3,xmm1
   3578 
   3579 	pxor	xmm0,xmm2
   3580 	pxor	xmm2,xmm3
   3581 	movdqu	XMMWORD[(-16)+rax],xmm0
   3582 
   3583 	dec	r10d
   3584 	jnz	NEAR $L$oop_key192
   3585 
   3586 	mov	DWORD[32+rax],edx
   3587 	xor	eax,eax
   3588 	jmp	NEAR $L$enc_key_ret
   3589 
   3590 ALIGN	16
   3591 $L$14rounds:
   3592 	movups	xmm2,XMMWORD[16+rcx]
   3593 	mov	edx,13
   3594 	lea	rax,[16+rax]
   3595 	cmp	r10d,268435456
   3596 	je	NEAR $L$14rounds_alt
   3597 
   3598 	movups	XMMWORD[r8],xmm0
   3599 	movups	XMMWORD[16+r8],xmm2
   3600 DB	102,15,58,223,202,1
   3601 	call	$L$key_expansion_256a_cold
   3602 DB	102,15,58,223,200,1
   3603 	call	$L$key_expansion_256b
   3604 DB	102,15,58,223,202,2
   3605 	call	$L$key_expansion_256a
   3606 DB	102,15,58,223,200,2
   3607 	call	$L$key_expansion_256b
   3608 DB	102,15,58,223,202,4
   3609 	call	$L$key_expansion_256a
   3610 DB	102,15,58,223,200,4
   3611 	call	$L$key_expansion_256b
   3612 DB	102,15,58,223,202,8
   3613 	call	$L$key_expansion_256a
   3614 DB	102,15,58,223,200,8
   3615 	call	$L$key_expansion_256b
   3616 DB	102,15,58,223,202,16
   3617 	call	$L$key_expansion_256a
   3618 DB	102,15,58,223,200,16
   3619 	call	$L$key_expansion_256b
   3620 DB	102,15,58,223,202,32
   3621 	call	$L$key_expansion_256a
   3622 DB	102,15,58,223,200,32
   3623 	call	$L$key_expansion_256b
   3624 DB	102,15,58,223,202,64
   3625 	call	$L$key_expansion_256a
   3626 	movups	XMMWORD[rax],xmm0
   3627 	mov	DWORD[16+rax],edx
   3628 	xor	rax,rax
   3629 	jmp	NEAR $L$enc_key_ret
   3630 
   3631 ALIGN	16
   3632 $L$14rounds_alt:
   3633 	movdqa	xmm5,XMMWORD[$L$key_rotate]
   3634 	movdqa	xmm4,XMMWORD[$L$key_rcon1]
   3635 	mov	r10d,7
   3636 	movdqu	XMMWORD[r8],xmm0
   3637 	movdqa	xmm1,xmm2
   3638 	movdqu	XMMWORD[16+r8],xmm2
   3639 	jmp	NEAR $L$oop_key256
   3640 
   3641 ALIGN	16
   3642 $L$oop_key256:
   3643 DB	102,15,56,0,213
   3644 DB	102,15,56,221,212
   3645 
   3646 	movdqa	xmm3,xmm0
   3647 	pslldq	xmm0,4
   3648 	pxor	xmm3,xmm0
   3649 	pslldq	xmm0,4
   3650 	pxor	xmm3,xmm0
   3651 	pslldq	xmm0,4
   3652 	pxor	xmm0,xmm3
   3653 	pslld	xmm4,1
   3654 
   3655 	pxor	xmm0,xmm2
   3656 	movdqu	XMMWORD[rax],xmm0
   3657 
   3658 	dec	r10d
   3659 	jz	NEAR $L$done_key256
   3660 
   3661 	pshufd	xmm2,xmm0,0xff
   3662 	pxor	xmm3,xmm3
   3663 DB	102,15,56,221,211
   3664 
   3665 	movdqa	xmm3,xmm1
   3666 	pslldq	xmm1,4
   3667 	pxor	xmm3,xmm1
   3668 	pslldq	xmm1,4
   3669 	pxor	xmm3,xmm1
   3670 	pslldq	xmm1,4
   3671 	pxor	xmm1,xmm3
   3672 
   3673 	pxor	xmm2,xmm1
   3674 	movdqu	XMMWORD[16+rax],xmm2
   3675 	lea	rax,[32+rax]
   3676 	movdqa	xmm1,xmm2
   3677 
   3678 	jmp	NEAR $L$oop_key256
   3679 
   3680 $L$done_key256:
   3681 	mov	DWORD[16+rax],edx
   3682 	xor	eax,eax
   3683 	jmp	NEAR $L$enc_key_ret
   3684 
   3685 ALIGN	16
   3686 $L$bad_keybits:
   3687 	mov	rax,-2
   3688 $L$enc_key_ret:
   3689 	pxor	xmm0,xmm0
   3690 	pxor	xmm1,xmm1
   3691 	pxor	xmm2,xmm2
   3692 	pxor	xmm3,xmm3
   3693 	pxor	xmm4,xmm4
   3694 	pxor	xmm5,xmm5
   3695 	add	rsp,8
   3696 	DB	0F3h,0C3h		;repret
   3697 $L$SEH_end_set_encrypt_key:
   3698 
   3699 ALIGN	16
   3700 $L$key_expansion_128:
   3701 	movups	XMMWORD[rax],xmm0
   3702 	lea	rax,[16+rax]
   3703 $L$key_expansion_128_cold:
   3704 	shufps	xmm4,xmm0,16
   3705 	xorps	xmm0,xmm4
   3706 	shufps	xmm4,xmm0,140
   3707 	xorps	xmm0,xmm4
   3708 	shufps	xmm1,xmm1,255
   3709 	xorps	xmm0,xmm1
   3710 	DB	0F3h,0C3h		;repret
   3711 
   3712 ALIGN	16
   3713 $L$key_expansion_192a:
   3714 	movups	XMMWORD[rax],xmm0
   3715 	lea	rax,[16+rax]
   3716 $L$key_expansion_192a_cold:
   3717 	movaps	xmm5,xmm2
   3718 $L$key_expansion_192b_warm:
   3719 	shufps	xmm4,xmm0,16
   3720 	movdqa	xmm3,xmm2
   3721 	xorps	xmm0,xmm4
   3722 	shufps	xmm4,xmm0,140
   3723 	pslldq	xmm3,4
   3724 	xorps	xmm0,xmm4
   3725 	pshufd	xmm1,xmm1,85
   3726 	pxor	xmm2,xmm3
   3727 	pxor	xmm0,xmm1
   3728 	pshufd	xmm3,xmm0,255
   3729 	pxor	xmm2,xmm3
   3730 	DB	0F3h,0C3h		;repret
   3731 
   3732 ALIGN	16
   3733 $L$key_expansion_192b:
   3734 	movaps	xmm3,xmm0
   3735 	shufps	xmm5,xmm0,68
   3736 	movups	XMMWORD[rax],xmm5
   3737 	shufps	xmm3,xmm2,78
   3738 	movups	XMMWORD[16+rax],xmm3
   3739 	lea	rax,[32+rax]
   3740 	jmp	NEAR $L$key_expansion_192b_warm
   3741 
   3742 ALIGN	16
   3743 $L$key_expansion_256a:
   3744 	movups	XMMWORD[rax],xmm2
   3745 	lea	rax,[16+rax]
   3746 $L$key_expansion_256a_cold:
   3747 	shufps	xmm4,xmm0,16
   3748 	xorps	xmm0,xmm4
   3749 	shufps	xmm4,xmm0,140
   3750 	xorps	xmm0,xmm4
   3751 	shufps	xmm1,xmm1,255
   3752 	xorps	xmm0,xmm1
   3753 	DB	0F3h,0C3h		;repret
   3754 
   3755 ALIGN	16
   3756 $L$key_expansion_256b:
   3757 	movups	XMMWORD[rax],xmm0
   3758 	lea	rax,[16+rax]
   3759 
   3760 	shufps	xmm4,xmm2,16
   3761 	xorps	xmm2,xmm4
   3762 	shufps	xmm4,xmm2,140
   3763 	xorps	xmm2,xmm4
   3764 	shufps	xmm1,xmm1,170
   3765 	xorps	xmm2,xmm1
   3766 	DB	0F3h,0C3h		;repret
   3767 
   3768 
   3769 ALIGN	64
   3770 $L$bswap_mask:
   3771 DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   3772 $L$increment32:
   3773 	DD	6,6,6,0
   3774 $L$increment64:
   3775 	DD	1,0,0,0
   3776 $L$xts_magic:
   3777 	DD	0x87,0,1,0
   3778 $L$increment1:
   3779 DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
   3780 $L$key_rotate:
   3781 	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
   3782 $L$key_rotate192:
   3783 	DD	0x04070605,0x04070605,0x04070605,0x04070605
   3784 $L$key_rcon1:
   3785 	DD	1,1,1,1
   3786 $L$key_rcon1b:
   3787 	DD	0x1b,0x1b,0x1b,0x1b
   3788 
   3789 DB	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
   3790 DB	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
   3791 DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
   3792 DB	115,108,46,111,114,103,62,0
   3793 ALIGN	64
   3794 EXTERN	__imp_RtlVirtualUnwind
   3795 
   3796 ALIGN	16
   3797 ecb_ccm64_se_handler:
   3798 	push	rsi
   3799 	push	rdi
   3800 	push	rbx
   3801 	push	rbp
   3802 	push	r12
   3803 	push	r13
   3804 	push	r14
   3805 	push	r15
   3806 	pushfq
   3807 	sub	rsp,64
   3808 
   3809 	mov	rax,QWORD[120+r8]
   3810 	mov	rbx,QWORD[248+r8]
   3811 
   3812 	mov	rsi,QWORD[8+r9]
   3813 	mov	r11,QWORD[56+r9]
   3814 
   3815 	mov	r10d,DWORD[r11]
   3816 	lea	r10,[r10*1+rsi]
   3817 	cmp	rbx,r10
   3818 	jb	NEAR $L$common_seh_tail
   3819 
   3820 	mov	rax,QWORD[152+r8]
   3821 
   3822 	mov	r10d,DWORD[4+r11]
   3823 	lea	r10,[r10*1+rsi]
   3824 	cmp	rbx,r10
   3825 	jae	NEAR $L$common_seh_tail
   3826 
   3827 	lea	rsi,[rax]
   3828 	lea	rdi,[512+r8]
   3829 	mov	ecx,8
   3830 	DD	0xa548f3fc
   3831 	lea	rax,[88+rax]
   3832 
   3833 	jmp	NEAR $L$common_seh_tail
   3834 
   3835 
   3836 
   3837 ALIGN	16
   3838 ctr_xts_se_handler:
   3839 	push	rsi
   3840 	push	rdi
   3841 	push	rbx
   3842 	push	rbp
   3843 	push	r12
   3844 	push	r13
   3845 	push	r14
   3846 	push	r15
   3847 	pushfq
   3848 	sub	rsp,64
   3849 
   3850 	mov	rax,QWORD[120+r8]
   3851 	mov	rbx,QWORD[248+r8]
   3852 
   3853 	mov	rsi,QWORD[8+r9]
   3854 	mov	r11,QWORD[56+r9]
   3855 
   3856 	mov	r10d,DWORD[r11]
   3857 	lea	r10,[r10*1+rsi]
   3858 	cmp	rbx,r10
   3859 	jb	NEAR $L$common_seh_tail
   3860 
   3861 	mov	rax,QWORD[152+r8]
   3862 
   3863 	mov	r10d,DWORD[4+r11]
   3864 	lea	r10,[r10*1+rsi]
   3865 	cmp	rbx,r10
   3866 	jae	NEAR $L$common_seh_tail
   3867 
   3868 	mov	rax,QWORD[160+r8]
   3869 	lea	rsi,[((-160))+rax]
   3870 	lea	rdi,[512+r8]
   3871 	mov	ecx,20
   3872 	DD	0xa548f3fc
   3873 
   3874 	jmp	NEAR $L$common_rbp_tail
   3875 
   3876 
   3877 ALIGN	16
   3878 cbc_se_handler:
   3879 	push	rsi
   3880 	push	rdi
   3881 	push	rbx
   3882 	push	rbp
   3883 	push	r12
   3884 	push	r13
   3885 	push	r14
   3886 	push	r15
   3887 	pushfq
   3888 	sub	rsp,64
   3889 
   3890 	mov	rax,QWORD[152+r8]
   3891 	mov	rbx,QWORD[248+r8]
   3892 
   3893 	lea	r10,[$L$cbc_decrypt_bulk]
   3894 	cmp	rbx,r10
   3895 	jb	NEAR $L$common_seh_tail
   3896 
   3897 	lea	r10,[$L$cbc_decrypt_body]
   3898 	cmp	rbx,r10
   3899 	jb	NEAR $L$restore_cbc_rax
   3900 
   3901 	lea	r10,[$L$cbc_ret]
   3902 	cmp	rbx,r10
   3903 	jae	NEAR $L$common_seh_tail
   3904 
   3905 	lea	rsi,[16+rax]
   3906 	lea	rdi,[512+r8]
   3907 	mov	ecx,20
   3908 	DD	0xa548f3fc
   3909 
   3910 $L$common_rbp_tail:
   3911 	mov	rax,QWORD[160+r8]
   3912 	mov	rbp,QWORD[rax]
   3913 	lea	rax,[8+rax]
   3914 	mov	QWORD[160+r8],rbp
   3915 	jmp	NEAR $L$common_seh_tail
   3916 
   3917 $L$restore_cbc_rax:
   3918 	mov	rax,QWORD[120+r8]
   3919 
   3920 $L$common_seh_tail:
   3921 	mov	rdi,QWORD[8+rax]
   3922 	mov	rsi,QWORD[16+rax]
   3923 	mov	QWORD[152+r8],rax
   3924 	mov	QWORD[168+r8],rsi
   3925 	mov	QWORD[176+r8],rdi
   3926 
   3927 	mov	rdi,QWORD[40+r9]
   3928 	mov	rsi,r8
   3929 	mov	ecx,154
   3930 	DD	0xa548f3fc
   3931 
   3932 	mov	rsi,r9
   3933 	xor	rcx,rcx
   3934 	mov	rdx,QWORD[8+rsi]
   3935 	mov	r8,QWORD[rsi]
   3936 	mov	r9,QWORD[16+rsi]
   3937 	mov	r10,QWORD[40+rsi]
   3938 	lea	r11,[56+rsi]
   3939 	lea	r12,[24+rsi]
   3940 	mov	QWORD[32+rsp],r10
   3941 	mov	QWORD[40+rsp],r11
   3942 	mov	QWORD[48+rsp],r12
   3943 	mov	QWORD[56+rsp],rcx
   3944 	call	QWORD[__imp_RtlVirtualUnwind]
   3945 
   3946 	mov	eax,1
   3947 	add	rsp,64
   3948 	popfq
   3949 	pop	r15
   3950 	pop	r14
   3951 	pop	r13
   3952 	pop	r12
   3953 	pop	rbp
   3954 	pop	rbx
   3955 	pop	rdi
   3956 	pop	rsi
   3957 	DB	0F3h,0C3h		;repret
   3958 
   3959 
   3960 section	.pdata rdata align=4
   3961 ALIGN	4
   3962 	DD	$L$SEH_begin_aesni_ecb_encrypt wrt ..imagebase
   3963 	DD	$L$SEH_end_aesni_ecb_encrypt wrt ..imagebase
   3964 	DD	$L$SEH_info_ecb wrt ..imagebase
   3965 
   3966 	DD	$L$SEH_begin_aesni_ccm64_encrypt_blocks wrt ..imagebase
   3967 	DD	$L$SEH_end_aesni_ccm64_encrypt_blocks wrt ..imagebase
   3968 	DD	$L$SEH_info_ccm64_enc wrt ..imagebase
   3969 
   3970 	DD	$L$SEH_begin_aesni_ccm64_decrypt_blocks wrt ..imagebase
   3971 	DD	$L$SEH_end_aesni_ccm64_decrypt_blocks wrt ..imagebase
   3972 	DD	$L$SEH_info_ccm64_dec wrt ..imagebase
   3973 
   3974 	DD	$L$SEH_begin_aesni_ctr32_encrypt_blocks wrt ..imagebase
   3975 	DD	$L$SEH_end_aesni_ctr32_encrypt_blocks wrt ..imagebase
   3976 	DD	$L$SEH_info_ctr32 wrt ..imagebase
   3977 
   3978 	DD	$L$SEH_begin_aesni_xts_encrypt wrt ..imagebase
   3979 	DD	$L$SEH_end_aesni_xts_encrypt wrt ..imagebase
   3980 	DD	$L$SEH_info_xts_enc wrt ..imagebase
   3981 
   3982 	DD	$L$SEH_begin_aesni_xts_decrypt wrt ..imagebase
   3983 	DD	$L$SEH_end_aesni_xts_decrypt wrt ..imagebase
   3984 	DD	$L$SEH_info_xts_dec wrt ..imagebase
   3985 	DD	$L$SEH_begin_aesni_cbc_encrypt wrt ..imagebase
   3986 	DD	$L$SEH_end_aesni_cbc_encrypt wrt ..imagebase
   3987 	DD	$L$SEH_info_cbc wrt ..imagebase
   3988 
   3989 	DD	aesni_set_decrypt_key wrt ..imagebase
   3990 	DD	$L$SEH_end_set_decrypt_key wrt ..imagebase
   3991 	DD	$L$SEH_info_key wrt ..imagebase
   3992 
   3993 	DD	aesni_set_encrypt_key wrt ..imagebase
   3994 	DD	$L$SEH_end_set_encrypt_key wrt ..imagebase
   3995 	DD	$L$SEH_info_key wrt ..imagebase
   3996 section	.xdata rdata align=8
   3997 ALIGN	8
   3998 $L$SEH_info_ecb:
   3999 DB	9,0,0,0
   4000 	DD	ecb_ccm64_se_handler wrt ..imagebase
   4001 	DD	$L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
   4002 $L$SEH_info_ccm64_enc:
   4003 DB	9,0,0,0
   4004 	DD	ecb_ccm64_se_handler wrt ..imagebase
   4005 	DD	$L$ccm64_enc_body wrt ..imagebase,$L$ccm64_enc_ret wrt ..imagebase
   4006 $L$SEH_info_ccm64_dec:
   4007 DB	9,0,0,0
   4008 	DD	ecb_ccm64_se_handler wrt ..imagebase
   4009 	DD	$L$ccm64_dec_body wrt ..imagebase,$L$ccm64_dec_ret wrt ..imagebase
   4010 $L$SEH_info_ctr32:
   4011 DB	9,0,0,0
   4012 	DD	ctr_xts_se_handler wrt ..imagebase
   4013 	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
   4014 $L$SEH_info_xts_enc:
   4015 DB	9,0,0,0
   4016 	DD	ctr_xts_se_handler wrt ..imagebase
   4017 	DD	$L$xts_enc_body wrt ..imagebase,$L$xts_enc_epilogue wrt ..imagebase
   4018 $L$SEH_info_xts_dec:
   4019 DB	9,0,0,0
   4020 	DD	ctr_xts_se_handler wrt ..imagebase
   4021 	DD	$L$xts_dec_body wrt ..imagebase,$L$xts_dec_epilogue wrt ..imagebase
   4022 $L$SEH_info_cbc:
   4023 DB	9,0,0,0
   4024 	DD	cbc_se_handler wrt ..imagebase
   4025 $L$SEH_info_key:
   4026 DB	0x01,0x04,0x01,0x00
   4027 DB	0x04,0x02,0x00,0x00
   4028