Home | History | Annotate | Download | only in fipsmodule
      1 ; This file is generated from a similarly-named Perl script in the BoringSSL
      2 ; source tree. Do not edit by hand.
      3 
      4 default	rel
      5 %define XMMWORD
      6 %define YMMWORD
      7 %define ZMMWORD
      8 
      9 %ifdef BORINGSSL_PREFIX
     10 %include "boringssl_prefix_symbols_nasm.inc"
     11 %endif
     12 section	.text code align=64
     13 
     14 EXTERN	OPENSSL_ia32cap_P
     15 
     16 global	gcm_gmult_4bit
     17 
     18 ALIGN	16
     19 gcm_gmult_4bit:
     20 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
     21 	mov	QWORD[16+rsp],rsi
     22 	mov	rax,rsp
     23 $L$SEH_begin_gcm_gmult_4bit:
     24 	mov	rdi,rcx
     25 	mov	rsi,rdx
     26 
     27 
     28 
     29 	push	rbx
     30 
     31 	push	rbp
     32 
     33 	push	r12
     34 
     35 	push	r13
     36 
     37 	push	r14
     38 
     39 	push	r15
     40 
     41 	sub	rsp,280
     42 
     43 $L$gmult_prologue:
     44 
     45 	movzx	r8,BYTE[15+rdi]
     46 	lea	r11,[$L$rem_4bit]
     47 	xor	rax,rax
     48 	xor	rbx,rbx
     49 	mov	al,r8b
     50 	mov	bl,r8b
     51 	shl	al,4
     52 	mov	rcx,14
     53 	mov	r8,QWORD[8+rax*1+rsi]
     54 	mov	r9,QWORD[rax*1+rsi]
     55 	and	bl,0xf0
     56 	mov	rdx,r8
     57 	jmp	NEAR $L$oop1
     58 
     59 ALIGN	16
     60 $L$oop1:
     61 	shr	r8,4
     62 	and	rdx,0xf
     63 	mov	r10,r9
     64 	mov	al,BYTE[rcx*1+rdi]
     65 	shr	r9,4
     66 	xor	r8,QWORD[8+rbx*1+rsi]
     67 	shl	r10,60
     68 	xor	r9,QWORD[rbx*1+rsi]
     69 	mov	bl,al
     70 	xor	r9,QWORD[rdx*8+r11]
     71 	mov	rdx,r8
     72 	shl	al,4
     73 	xor	r8,r10
     74 	dec	rcx
     75 	js	NEAR $L$break1
     76 
     77 	shr	r8,4
     78 	and	rdx,0xf
     79 	mov	r10,r9
     80 	shr	r9,4
     81 	xor	r8,QWORD[8+rax*1+rsi]
     82 	shl	r10,60
     83 	xor	r9,QWORD[rax*1+rsi]
     84 	and	bl,0xf0
     85 	xor	r9,QWORD[rdx*8+r11]
     86 	mov	rdx,r8
     87 	xor	r8,r10
     88 	jmp	NEAR $L$oop1
     89 
     90 ALIGN	16
     91 $L$break1:
     92 	shr	r8,4
     93 	and	rdx,0xf
     94 	mov	r10,r9
     95 	shr	r9,4
     96 	xor	r8,QWORD[8+rax*1+rsi]
     97 	shl	r10,60
     98 	xor	r9,QWORD[rax*1+rsi]
     99 	and	bl,0xf0
    100 	xor	r9,QWORD[rdx*8+r11]
    101 	mov	rdx,r8
    102 	xor	r8,r10
    103 
    104 	shr	r8,4
    105 	and	rdx,0xf
    106 	mov	r10,r9
    107 	shr	r9,4
    108 	xor	r8,QWORD[8+rbx*1+rsi]
    109 	shl	r10,60
    110 	xor	r9,QWORD[rbx*1+rsi]
    111 	xor	r8,r10
    112 	xor	r9,QWORD[rdx*8+r11]
    113 
    114 	bswap	r8
    115 	bswap	r9
    116 	mov	QWORD[8+rdi],r8
    117 	mov	QWORD[rdi],r9
    118 
    119 	lea	rsi,[((280+48))+rsp]
    120 
    121 	mov	rbx,QWORD[((-8))+rsi]
    122 
    123 	lea	rsp,[rsi]
    124 
    125 $L$gmult_epilogue:
    126 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    127 	mov	rsi,QWORD[16+rsp]
    128 	DB	0F3h,0C3h		;repret
    129 
    130 $L$SEH_end_gcm_gmult_4bit:
    131 global	gcm_ghash_4bit
    132 
    133 ALIGN	16
    134 gcm_ghash_4bit:
    135 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    136 	mov	QWORD[16+rsp],rsi
    137 	mov	rax,rsp
    138 $L$SEH_begin_gcm_ghash_4bit:
    139 	mov	rdi,rcx
    140 	mov	rsi,rdx
    141 	mov	rdx,r8
    142 	mov	rcx,r9
    143 
    144 
    145 
    146 	push	rbx
    147 
    148 	push	rbp
    149 
    150 	push	r12
    151 
    152 	push	r13
    153 
    154 	push	r14
    155 
    156 	push	r15
    157 
    158 	sub	rsp,280
    159 
    160 $L$ghash_prologue:
    161 	mov	r14,rdx
    162 	mov	r15,rcx
    163 	sub	rsi,-128
    164 	lea	rbp,[((16+128))+rsp]
    165 	xor	edx,edx
    166 	mov	r8,QWORD[((0+0-128))+rsi]
    167 	mov	rax,QWORD[((0+8-128))+rsi]
    168 	mov	dl,al
    169 	shr	rax,4
    170 	mov	r10,r8
    171 	shr	r8,4
    172 	mov	r9,QWORD[((16+0-128))+rsi]
    173 	shl	dl,4
    174 	mov	rbx,QWORD[((16+8-128))+rsi]
    175 	shl	r10,60
    176 	mov	BYTE[rsp],dl
    177 	or	rax,r10
    178 	mov	dl,bl
    179 	shr	rbx,4
    180 	mov	r10,r9
    181 	shr	r9,4
    182 	mov	QWORD[rbp],r8
    183 	mov	r8,QWORD[((32+0-128))+rsi]
    184 	shl	dl,4
    185 	mov	QWORD[((0-128))+rbp],rax
    186 	mov	rax,QWORD[((32+8-128))+rsi]
    187 	shl	r10,60
    188 	mov	BYTE[1+rsp],dl
    189 	or	rbx,r10
    190 	mov	dl,al
    191 	shr	rax,4
    192 	mov	r10,r8
    193 	shr	r8,4
    194 	mov	QWORD[8+rbp],r9
    195 	mov	r9,QWORD[((48+0-128))+rsi]
    196 	shl	dl,4
    197 	mov	QWORD[((8-128))+rbp],rbx
    198 	mov	rbx,QWORD[((48+8-128))+rsi]
    199 	shl	r10,60
    200 	mov	BYTE[2+rsp],dl
    201 	or	rax,r10
    202 	mov	dl,bl
    203 	shr	rbx,4
    204 	mov	r10,r9
    205 	shr	r9,4
    206 	mov	QWORD[16+rbp],r8
    207 	mov	r8,QWORD[((64+0-128))+rsi]
    208 	shl	dl,4
    209 	mov	QWORD[((16-128))+rbp],rax
    210 	mov	rax,QWORD[((64+8-128))+rsi]
    211 	shl	r10,60
    212 	mov	BYTE[3+rsp],dl
    213 	or	rbx,r10
    214 	mov	dl,al
    215 	shr	rax,4
    216 	mov	r10,r8
    217 	shr	r8,4
    218 	mov	QWORD[24+rbp],r9
    219 	mov	r9,QWORD[((80+0-128))+rsi]
    220 	shl	dl,4
    221 	mov	QWORD[((24-128))+rbp],rbx
    222 	mov	rbx,QWORD[((80+8-128))+rsi]
    223 	shl	r10,60
    224 	mov	BYTE[4+rsp],dl
    225 	or	rax,r10
    226 	mov	dl,bl
    227 	shr	rbx,4
    228 	mov	r10,r9
    229 	shr	r9,4
    230 	mov	QWORD[32+rbp],r8
    231 	mov	r8,QWORD[((96+0-128))+rsi]
    232 	shl	dl,4
    233 	mov	QWORD[((32-128))+rbp],rax
    234 	mov	rax,QWORD[((96+8-128))+rsi]
    235 	shl	r10,60
    236 	mov	BYTE[5+rsp],dl
    237 	or	rbx,r10
    238 	mov	dl,al
    239 	shr	rax,4
    240 	mov	r10,r8
    241 	shr	r8,4
    242 	mov	QWORD[40+rbp],r9
    243 	mov	r9,QWORD[((112+0-128))+rsi]
    244 	shl	dl,4
    245 	mov	QWORD[((40-128))+rbp],rbx
    246 	mov	rbx,QWORD[((112+8-128))+rsi]
    247 	shl	r10,60
    248 	mov	BYTE[6+rsp],dl
    249 	or	rax,r10
    250 	mov	dl,bl
    251 	shr	rbx,4
    252 	mov	r10,r9
    253 	shr	r9,4
    254 	mov	QWORD[48+rbp],r8
    255 	mov	r8,QWORD[((128+0-128))+rsi]
    256 	shl	dl,4
    257 	mov	QWORD[((48-128))+rbp],rax
    258 	mov	rax,QWORD[((128+8-128))+rsi]
    259 	shl	r10,60
    260 	mov	BYTE[7+rsp],dl
    261 	or	rbx,r10
    262 	mov	dl,al
    263 	shr	rax,4
    264 	mov	r10,r8
    265 	shr	r8,4
    266 	mov	QWORD[56+rbp],r9
    267 	mov	r9,QWORD[((144+0-128))+rsi]
    268 	shl	dl,4
    269 	mov	QWORD[((56-128))+rbp],rbx
    270 	mov	rbx,QWORD[((144+8-128))+rsi]
    271 	shl	r10,60
    272 	mov	BYTE[8+rsp],dl
    273 	or	rax,r10
    274 	mov	dl,bl
    275 	shr	rbx,4
    276 	mov	r10,r9
    277 	shr	r9,4
    278 	mov	QWORD[64+rbp],r8
    279 	mov	r8,QWORD[((160+0-128))+rsi]
    280 	shl	dl,4
    281 	mov	QWORD[((64-128))+rbp],rax
    282 	mov	rax,QWORD[((160+8-128))+rsi]
    283 	shl	r10,60
    284 	mov	BYTE[9+rsp],dl
    285 	or	rbx,r10
    286 	mov	dl,al
    287 	shr	rax,4
    288 	mov	r10,r8
    289 	shr	r8,4
    290 	mov	QWORD[72+rbp],r9
    291 	mov	r9,QWORD[((176+0-128))+rsi]
    292 	shl	dl,4
    293 	mov	QWORD[((72-128))+rbp],rbx
    294 	mov	rbx,QWORD[((176+8-128))+rsi]
    295 	shl	r10,60
    296 	mov	BYTE[10+rsp],dl
    297 	or	rax,r10
    298 	mov	dl,bl
    299 	shr	rbx,4
    300 	mov	r10,r9
    301 	shr	r9,4
    302 	mov	QWORD[80+rbp],r8
    303 	mov	r8,QWORD[((192+0-128))+rsi]
    304 	shl	dl,4
    305 	mov	QWORD[((80-128))+rbp],rax
    306 	mov	rax,QWORD[((192+8-128))+rsi]
    307 	shl	r10,60
    308 	mov	BYTE[11+rsp],dl
    309 	or	rbx,r10
    310 	mov	dl,al
    311 	shr	rax,4
    312 	mov	r10,r8
    313 	shr	r8,4
    314 	mov	QWORD[88+rbp],r9
    315 	mov	r9,QWORD[((208+0-128))+rsi]
    316 	shl	dl,4
    317 	mov	QWORD[((88-128))+rbp],rbx
    318 	mov	rbx,QWORD[((208+8-128))+rsi]
    319 	shl	r10,60
    320 	mov	BYTE[12+rsp],dl
    321 	or	rax,r10
    322 	mov	dl,bl
    323 	shr	rbx,4
    324 	mov	r10,r9
    325 	shr	r9,4
    326 	mov	QWORD[96+rbp],r8
    327 	mov	r8,QWORD[((224+0-128))+rsi]
    328 	shl	dl,4
    329 	mov	QWORD[((96-128))+rbp],rax
    330 	mov	rax,QWORD[((224+8-128))+rsi]
    331 	shl	r10,60
    332 	mov	BYTE[13+rsp],dl
    333 	or	rbx,r10
    334 	mov	dl,al
    335 	shr	rax,4
    336 	mov	r10,r8
    337 	shr	r8,4
    338 	mov	QWORD[104+rbp],r9
    339 	mov	r9,QWORD[((240+0-128))+rsi]
    340 	shl	dl,4
    341 	mov	QWORD[((104-128))+rbp],rbx
    342 	mov	rbx,QWORD[((240+8-128))+rsi]
    343 	shl	r10,60
    344 	mov	BYTE[14+rsp],dl
    345 	or	rax,r10
    346 	mov	dl,bl
    347 	shr	rbx,4
    348 	mov	r10,r9
    349 	shr	r9,4
    350 	mov	QWORD[112+rbp],r8
    351 	shl	dl,4
    352 	mov	QWORD[((112-128))+rbp],rax
    353 	shl	r10,60
    354 	mov	BYTE[15+rsp],dl
    355 	or	rbx,r10
    356 	mov	QWORD[120+rbp],r9
    357 	mov	QWORD[((120-128))+rbp],rbx
    358 	add	rsi,-128
    359 	mov	r8,QWORD[8+rdi]
    360 	mov	r9,QWORD[rdi]
    361 	add	r15,r14
    362 	lea	r11,[$L$rem_8bit]
    363 	jmp	NEAR $L$outer_loop
    364 ALIGN	16
    365 $L$outer_loop:
    366 	xor	r9,QWORD[r14]
    367 	mov	rdx,QWORD[8+r14]
    368 	lea	r14,[16+r14]
    369 	xor	rdx,r8
    370 	mov	QWORD[rdi],r9
    371 	mov	QWORD[8+rdi],rdx
    372 	shr	rdx,32
    373 	xor	rax,rax
    374 	rol	edx,8
    375 	mov	al,dl
    376 	movzx	ebx,dl
    377 	shl	al,4
    378 	shr	ebx,4
    379 	rol	edx,8
    380 	mov	r8,QWORD[8+rax*1+rsi]
    381 	mov	r9,QWORD[rax*1+rsi]
    382 	mov	al,dl
    383 	movzx	ecx,dl
    384 	shl	al,4
    385 	movzx	r12,BYTE[rbx*1+rsp]
    386 	shr	ecx,4
    387 	xor	r12,r8
    388 	mov	r10,r9
    389 	shr	r8,8
    390 	movzx	r12,r12b
    391 	shr	r9,8
    392 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    393 	shl	r10,56
    394 	xor	r9,QWORD[rbx*8+rbp]
    395 	rol	edx,8
    396 	xor	r8,QWORD[8+rax*1+rsi]
    397 	xor	r9,QWORD[rax*1+rsi]
    398 	mov	al,dl
    399 	xor	r8,r10
    400 	movzx	r12,WORD[r12*2+r11]
    401 	movzx	ebx,dl
    402 	shl	al,4
    403 	movzx	r13,BYTE[rcx*1+rsp]
    404 	shr	ebx,4
    405 	shl	r12,48
    406 	xor	r13,r8
    407 	mov	r10,r9
    408 	xor	r9,r12
    409 	shr	r8,8
    410 	movzx	r13,r13b
    411 	shr	r9,8
    412 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    413 	shl	r10,56
    414 	xor	r9,QWORD[rcx*8+rbp]
    415 	rol	edx,8
    416 	xor	r8,QWORD[8+rax*1+rsi]
    417 	xor	r9,QWORD[rax*1+rsi]
    418 	mov	al,dl
    419 	xor	r8,r10
    420 	movzx	r13,WORD[r13*2+r11]
    421 	movzx	ecx,dl
    422 	shl	al,4
    423 	movzx	r12,BYTE[rbx*1+rsp]
    424 	shr	ecx,4
    425 	shl	r13,48
    426 	xor	r12,r8
    427 	mov	r10,r9
    428 	xor	r9,r13
    429 	shr	r8,8
    430 	movzx	r12,r12b
    431 	mov	edx,DWORD[8+rdi]
    432 	shr	r9,8
    433 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    434 	shl	r10,56
    435 	xor	r9,QWORD[rbx*8+rbp]
    436 	rol	edx,8
    437 	xor	r8,QWORD[8+rax*1+rsi]
    438 	xor	r9,QWORD[rax*1+rsi]
    439 	mov	al,dl
    440 	xor	r8,r10
    441 	movzx	r12,WORD[r12*2+r11]
    442 	movzx	ebx,dl
    443 	shl	al,4
    444 	movzx	r13,BYTE[rcx*1+rsp]
    445 	shr	ebx,4
    446 	shl	r12,48
    447 	xor	r13,r8
    448 	mov	r10,r9
    449 	xor	r9,r12
    450 	shr	r8,8
    451 	movzx	r13,r13b
    452 	shr	r9,8
    453 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    454 	shl	r10,56
    455 	xor	r9,QWORD[rcx*8+rbp]
    456 	rol	edx,8
    457 	xor	r8,QWORD[8+rax*1+rsi]
    458 	xor	r9,QWORD[rax*1+rsi]
    459 	mov	al,dl
    460 	xor	r8,r10
    461 	movzx	r13,WORD[r13*2+r11]
    462 	movzx	ecx,dl
    463 	shl	al,4
    464 	movzx	r12,BYTE[rbx*1+rsp]
    465 	shr	ecx,4
    466 	shl	r13,48
    467 	xor	r12,r8
    468 	mov	r10,r9
    469 	xor	r9,r13
    470 	shr	r8,8
    471 	movzx	r12,r12b
    472 	shr	r9,8
    473 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    474 	shl	r10,56
    475 	xor	r9,QWORD[rbx*8+rbp]
    476 	rol	edx,8
    477 	xor	r8,QWORD[8+rax*1+rsi]
    478 	xor	r9,QWORD[rax*1+rsi]
    479 	mov	al,dl
    480 	xor	r8,r10
    481 	movzx	r12,WORD[r12*2+r11]
    482 	movzx	ebx,dl
    483 	shl	al,4
    484 	movzx	r13,BYTE[rcx*1+rsp]
    485 	shr	ebx,4
    486 	shl	r12,48
    487 	xor	r13,r8
    488 	mov	r10,r9
    489 	xor	r9,r12
    490 	shr	r8,8
    491 	movzx	r13,r13b
    492 	shr	r9,8
    493 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    494 	shl	r10,56
    495 	xor	r9,QWORD[rcx*8+rbp]
    496 	rol	edx,8
    497 	xor	r8,QWORD[8+rax*1+rsi]
    498 	xor	r9,QWORD[rax*1+rsi]
    499 	mov	al,dl
    500 	xor	r8,r10
    501 	movzx	r13,WORD[r13*2+r11]
    502 	movzx	ecx,dl
    503 	shl	al,4
    504 	movzx	r12,BYTE[rbx*1+rsp]
    505 	shr	ecx,4
    506 	shl	r13,48
    507 	xor	r12,r8
    508 	mov	r10,r9
    509 	xor	r9,r13
    510 	shr	r8,8
    511 	movzx	r12,r12b
    512 	mov	edx,DWORD[4+rdi]
    513 	shr	r9,8
    514 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    515 	shl	r10,56
    516 	xor	r9,QWORD[rbx*8+rbp]
    517 	rol	edx,8
    518 	xor	r8,QWORD[8+rax*1+rsi]
    519 	xor	r9,QWORD[rax*1+rsi]
    520 	mov	al,dl
    521 	xor	r8,r10
    522 	movzx	r12,WORD[r12*2+r11]
    523 	movzx	ebx,dl
    524 	shl	al,4
    525 	movzx	r13,BYTE[rcx*1+rsp]
    526 	shr	ebx,4
    527 	shl	r12,48
    528 	xor	r13,r8
    529 	mov	r10,r9
    530 	xor	r9,r12
    531 	shr	r8,8
    532 	movzx	r13,r13b
    533 	shr	r9,8
    534 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    535 	shl	r10,56
    536 	xor	r9,QWORD[rcx*8+rbp]
    537 	rol	edx,8
    538 	xor	r8,QWORD[8+rax*1+rsi]
    539 	xor	r9,QWORD[rax*1+rsi]
    540 	mov	al,dl
    541 	xor	r8,r10
    542 	movzx	r13,WORD[r13*2+r11]
    543 	movzx	ecx,dl
    544 	shl	al,4
    545 	movzx	r12,BYTE[rbx*1+rsp]
    546 	shr	ecx,4
    547 	shl	r13,48
    548 	xor	r12,r8
    549 	mov	r10,r9
    550 	xor	r9,r13
    551 	shr	r8,8
    552 	movzx	r12,r12b
    553 	shr	r9,8
    554 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    555 	shl	r10,56
    556 	xor	r9,QWORD[rbx*8+rbp]
    557 	rol	edx,8
    558 	xor	r8,QWORD[8+rax*1+rsi]
    559 	xor	r9,QWORD[rax*1+rsi]
    560 	mov	al,dl
    561 	xor	r8,r10
    562 	movzx	r12,WORD[r12*2+r11]
    563 	movzx	ebx,dl
    564 	shl	al,4
    565 	movzx	r13,BYTE[rcx*1+rsp]
    566 	shr	ebx,4
    567 	shl	r12,48
    568 	xor	r13,r8
    569 	mov	r10,r9
    570 	xor	r9,r12
    571 	shr	r8,8
    572 	movzx	r13,r13b
    573 	shr	r9,8
    574 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    575 	shl	r10,56
    576 	xor	r9,QWORD[rcx*8+rbp]
    577 	rol	edx,8
    578 	xor	r8,QWORD[8+rax*1+rsi]
    579 	xor	r9,QWORD[rax*1+rsi]
    580 	mov	al,dl
    581 	xor	r8,r10
    582 	movzx	r13,WORD[r13*2+r11]
    583 	movzx	ecx,dl
    584 	shl	al,4
    585 	movzx	r12,BYTE[rbx*1+rsp]
    586 	shr	ecx,4
    587 	shl	r13,48
    588 	xor	r12,r8
    589 	mov	r10,r9
    590 	xor	r9,r13
    591 	shr	r8,8
    592 	movzx	r12,r12b
    593 	mov	edx,DWORD[rdi]
    594 	shr	r9,8
    595 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    596 	shl	r10,56
    597 	xor	r9,QWORD[rbx*8+rbp]
    598 	rol	edx,8
    599 	xor	r8,QWORD[8+rax*1+rsi]
    600 	xor	r9,QWORD[rax*1+rsi]
    601 	mov	al,dl
    602 	xor	r8,r10
    603 	movzx	r12,WORD[r12*2+r11]
    604 	movzx	ebx,dl
    605 	shl	al,4
    606 	movzx	r13,BYTE[rcx*1+rsp]
    607 	shr	ebx,4
    608 	shl	r12,48
    609 	xor	r13,r8
    610 	mov	r10,r9
    611 	xor	r9,r12
    612 	shr	r8,8
    613 	movzx	r13,r13b
    614 	shr	r9,8
    615 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    616 	shl	r10,56
    617 	xor	r9,QWORD[rcx*8+rbp]
    618 	rol	edx,8
    619 	xor	r8,QWORD[8+rax*1+rsi]
    620 	xor	r9,QWORD[rax*1+rsi]
    621 	mov	al,dl
    622 	xor	r8,r10
    623 	movzx	r13,WORD[r13*2+r11]
    624 	movzx	ecx,dl
    625 	shl	al,4
    626 	movzx	r12,BYTE[rbx*1+rsp]
    627 	shr	ecx,4
    628 	shl	r13,48
    629 	xor	r12,r8
    630 	mov	r10,r9
    631 	xor	r9,r13
    632 	shr	r8,8
    633 	movzx	r12,r12b
    634 	shr	r9,8
    635 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    636 	shl	r10,56
    637 	xor	r9,QWORD[rbx*8+rbp]
    638 	rol	edx,8
    639 	xor	r8,QWORD[8+rax*1+rsi]
    640 	xor	r9,QWORD[rax*1+rsi]
    641 	mov	al,dl
    642 	xor	r8,r10
    643 	movzx	r12,WORD[r12*2+r11]
    644 	movzx	ebx,dl
    645 	shl	al,4
    646 	movzx	r13,BYTE[rcx*1+rsp]
    647 	shr	ebx,4
    648 	shl	r12,48
    649 	xor	r13,r8
    650 	mov	r10,r9
    651 	xor	r9,r12
    652 	shr	r8,8
    653 	movzx	r13,r13b
    654 	shr	r9,8
    655 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    656 	shl	r10,56
    657 	xor	r9,QWORD[rcx*8+rbp]
    658 	rol	edx,8
    659 	xor	r8,QWORD[8+rax*1+rsi]
    660 	xor	r9,QWORD[rax*1+rsi]
    661 	mov	al,dl
    662 	xor	r8,r10
    663 	movzx	r13,WORD[r13*2+r11]
    664 	movzx	ecx,dl
    665 	shl	al,4
    666 	movzx	r12,BYTE[rbx*1+rsp]
    667 	and	ecx,240
    668 	shl	r13,48
    669 	xor	r12,r8
    670 	mov	r10,r9
    671 	xor	r9,r13
    672 	shr	r8,8
    673 	movzx	r12,r12b
    674 	mov	edx,DWORD[((-4))+rdi]
    675 	shr	r9,8
    676 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    677 	shl	r10,56
    678 	xor	r9,QWORD[rbx*8+rbp]
    679 	movzx	r12,WORD[r12*2+r11]
    680 	xor	r8,QWORD[8+rax*1+rsi]
    681 	xor	r9,QWORD[rax*1+rsi]
    682 	shl	r12,48
    683 	xor	r8,r10
    684 	xor	r9,r12
    685 	movzx	r13,r8b
    686 	shr	r8,4
    687 	mov	r10,r9
    688 	shl	r13b,4
    689 	shr	r9,4
    690 	xor	r8,QWORD[8+rcx*1+rsi]
    691 	movzx	r13,WORD[r13*2+r11]
    692 	shl	r10,60
    693 	xor	r9,QWORD[rcx*1+rsi]
    694 	xor	r8,r10
    695 	shl	r13,48
    696 	bswap	r8
    697 	xor	r9,r13
    698 	bswap	r9
    699 	cmp	r14,r15
    700 	jb	NEAR $L$outer_loop
    701 	mov	QWORD[8+rdi],r8
    702 	mov	QWORD[rdi],r9
    703 
    704 	lea	rsi,[((280+48))+rsp]
    705 
    706 	mov	r15,QWORD[((-48))+rsi]
    707 
    708 	mov	r14,QWORD[((-40))+rsi]
    709 
    710 	mov	r13,QWORD[((-32))+rsi]
    711 
    712 	mov	r12,QWORD[((-24))+rsi]
    713 
    714 	mov	rbp,QWORD[((-16))+rsi]
    715 
    716 	mov	rbx,QWORD[((-8))+rsi]
    717 
    718 	lea	rsp,[rsi]
    719 
    720 $L$ghash_epilogue:
    721 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    722 	mov	rsi,QWORD[16+rsp]
    723 	DB	0F3h,0C3h		;repret
    724 
    725 $L$SEH_end_gcm_ghash_4bit:
    726 global	gcm_init_clmul
    727 
    728 ALIGN	16
    729 gcm_init_clmul:
    730 
    731 $L$_init_clmul:
    732 $L$SEH_begin_gcm_init_clmul:
    733 
    734 DB	0x48,0x83,0xec,0x18
    735 DB	0x0f,0x29,0x34,0x24
    736 	movdqu	xmm2,XMMWORD[rdx]
    737 	pshufd	xmm2,xmm2,78
    738 
    739 
    740 	pshufd	xmm4,xmm2,255
    741 	movdqa	xmm3,xmm2
    742 	psllq	xmm2,1
    743 	pxor	xmm5,xmm5
    744 	psrlq	xmm3,63
    745 	pcmpgtd	xmm5,xmm4
    746 	pslldq	xmm3,8
    747 	por	xmm2,xmm3
    748 
    749 
    750 	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
    751 	pxor	xmm2,xmm5
    752 
    753 
    754 	pshufd	xmm6,xmm2,78
    755 	movdqa	xmm0,xmm2
    756 	pxor	xmm6,xmm2
    757 	movdqa	xmm1,xmm0
    758 	pshufd	xmm3,xmm0,78
    759 	pxor	xmm3,xmm0
    760 DB	102,15,58,68,194,0
    761 DB	102,15,58,68,202,17
    762 DB	102,15,58,68,222,0
    763 	pxor	xmm3,xmm0
    764 	pxor	xmm3,xmm1
    765 
    766 	movdqa	xmm4,xmm3
    767 	psrldq	xmm3,8
    768 	pslldq	xmm4,8
    769 	pxor	xmm1,xmm3
    770 	pxor	xmm0,xmm4
    771 
    772 	movdqa	xmm4,xmm0
    773 	movdqa	xmm3,xmm0
    774 	psllq	xmm0,5
    775 	pxor	xmm3,xmm0
    776 	psllq	xmm0,1
    777 	pxor	xmm0,xmm3
    778 	psllq	xmm0,57
    779 	movdqa	xmm3,xmm0
    780 	pslldq	xmm0,8
    781 	psrldq	xmm3,8
    782 	pxor	xmm0,xmm4
    783 	pxor	xmm1,xmm3
    784 
    785 
    786 	movdqa	xmm4,xmm0
    787 	psrlq	xmm0,1
    788 	pxor	xmm1,xmm4
    789 	pxor	xmm4,xmm0
    790 	psrlq	xmm0,5
    791 	pxor	xmm0,xmm4
    792 	psrlq	xmm0,1
    793 	pxor	xmm0,xmm1
    794 	pshufd	xmm3,xmm2,78
    795 	pshufd	xmm4,xmm0,78
    796 	pxor	xmm3,xmm2
    797 	movdqu	XMMWORD[rcx],xmm2
    798 	pxor	xmm4,xmm0
    799 	movdqu	XMMWORD[16+rcx],xmm0
    800 DB	102,15,58,15,227,8
    801 	movdqu	XMMWORD[32+rcx],xmm4
    802 	movdqa	xmm1,xmm0
    803 	pshufd	xmm3,xmm0,78
    804 	pxor	xmm3,xmm0
    805 DB	102,15,58,68,194,0
    806 DB	102,15,58,68,202,17
    807 DB	102,15,58,68,222,0
    808 	pxor	xmm3,xmm0
    809 	pxor	xmm3,xmm1
    810 
    811 	movdqa	xmm4,xmm3
    812 	psrldq	xmm3,8
    813 	pslldq	xmm4,8
    814 	pxor	xmm1,xmm3
    815 	pxor	xmm0,xmm4
    816 
    817 	movdqa	xmm4,xmm0
    818 	movdqa	xmm3,xmm0
    819 	psllq	xmm0,5
    820 	pxor	xmm3,xmm0
    821 	psllq	xmm0,1
    822 	pxor	xmm0,xmm3
    823 	psllq	xmm0,57
    824 	movdqa	xmm3,xmm0
    825 	pslldq	xmm0,8
    826 	psrldq	xmm3,8
    827 	pxor	xmm0,xmm4
    828 	pxor	xmm1,xmm3
    829 
    830 
    831 	movdqa	xmm4,xmm0
    832 	psrlq	xmm0,1
    833 	pxor	xmm1,xmm4
    834 	pxor	xmm4,xmm0
    835 	psrlq	xmm0,5
    836 	pxor	xmm0,xmm4
    837 	psrlq	xmm0,1
    838 	pxor	xmm0,xmm1
    839 	movdqa	xmm5,xmm0
    840 	movdqa	xmm1,xmm0
    841 	pshufd	xmm3,xmm0,78
    842 	pxor	xmm3,xmm0
    843 DB	102,15,58,68,194,0
    844 DB	102,15,58,68,202,17
    845 DB	102,15,58,68,222,0
    846 	pxor	xmm3,xmm0
    847 	pxor	xmm3,xmm1
    848 
    849 	movdqa	xmm4,xmm3
    850 	psrldq	xmm3,8
    851 	pslldq	xmm4,8
    852 	pxor	xmm1,xmm3
    853 	pxor	xmm0,xmm4
    854 
    855 	movdqa	xmm4,xmm0
    856 	movdqa	xmm3,xmm0
    857 	psllq	xmm0,5
    858 	pxor	xmm3,xmm0
    859 	psllq	xmm0,1
    860 	pxor	xmm0,xmm3
    861 	psllq	xmm0,57
    862 	movdqa	xmm3,xmm0
    863 	pslldq	xmm0,8
    864 	psrldq	xmm3,8
    865 	pxor	xmm0,xmm4
    866 	pxor	xmm1,xmm3
    867 
    868 
    869 	movdqa	xmm4,xmm0
    870 	psrlq	xmm0,1
    871 	pxor	xmm1,xmm4
    872 	pxor	xmm4,xmm0
    873 	psrlq	xmm0,5
    874 	pxor	xmm0,xmm4
    875 	psrlq	xmm0,1
    876 	pxor	xmm0,xmm1
    877 	pshufd	xmm3,xmm5,78
    878 	pshufd	xmm4,xmm0,78
    879 	pxor	xmm3,xmm5
    880 	movdqu	XMMWORD[48+rcx],xmm5
    881 	pxor	xmm4,xmm0
    882 	movdqu	XMMWORD[64+rcx],xmm0
    883 DB	102,15,58,15,227,8
    884 	movdqu	XMMWORD[80+rcx],xmm4
    885 	movaps	xmm6,XMMWORD[rsp]
    886 	lea	rsp,[24+rsp]
    887 $L$SEH_end_gcm_init_clmul:
    888 	DB	0F3h,0C3h		;repret
    889 
    890 
    891 global	gcm_gmult_clmul
    892 
    893 ALIGN	16
    894 gcm_gmult_clmul:
    895 
    896 $L$_gmult_clmul:
    897 	movdqu	xmm0,XMMWORD[rcx]
    898 	movdqa	xmm5,XMMWORD[$L$bswap_mask]
    899 	movdqu	xmm2,XMMWORD[rdx]
    900 	movdqu	xmm4,XMMWORD[32+rdx]
    901 DB	102,15,56,0,197
    902 	movdqa	xmm1,xmm0
    903 	pshufd	xmm3,xmm0,78
    904 	pxor	xmm3,xmm0
    905 DB	102,15,58,68,194,0
    906 DB	102,15,58,68,202,17
    907 DB	102,15,58,68,220,0
    908 	pxor	xmm3,xmm0
    909 	pxor	xmm3,xmm1
    910 
    911 	movdqa	xmm4,xmm3
    912 	psrldq	xmm3,8
    913 	pslldq	xmm4,8
    914 	pxor	xmm1,xmm3
    915 	pxor	xmm0,xmm4
    916 
    917 	movdqa	xmm4,xmm0
    918 	movdqa	xmm3,xmm0
    919 	psllq	xmm0,5
    920 	pxor	xmm3,xmm0
    921 	psllq	xmm0,1
    922 	pxor	xmm0,xmm3
    923 	psllq	xmm0,57
    924 	movdqa	xmm3,xmm0
    925 	pslldq	xmm0,8
    926 	psrldq	xmm3,8
    927 	pxor	xmm0,xmm4
    928 	pxor	xmm1,xmm3
    929 
    930 
    931 	movdqa	xmm4,xmm0
    932 	psrlq	xmm0,1
    933 	pxor	xmm1,xmm4
    934 	pxor	xmm4,xmm0
    935 	psrlq	xmm0,5
    936 	pxor	xmm0,xmm4
    937 	psrlq	xmm0,1
    938 	pxor	xmm0,xmm1
    939 DB	102,15,56,0,197
    940 	movdqu	XMMWORD[rcx],xmm0
    941 	DB	0F3h,0C3h		;repret
    942 
    943 
    944 global	gcm_ghash_clmul
    945 
    946 ALIGN	32
    947 gcm_ghash_clmul:
    948 
    949 $L$_ghash_clmul:
    950 	lea	rax,[((-136))+rsp]
    951 $L$SEH_begin_gcm_ghash_clmul:
    952 
    953 DB	0x48,0x8d,0x60,0xe0
    954 DB	0x0f,0x29,0x70,0xe0
    955 DB	0x0f,0x29,0x78,0xf0
    956 DB	0x44,0x0f,0x29,0x00
    957 DB	0x44,0x0f,0x29,0x48,0x10
    958 DB	0x44,0x0f,0x29,0x50,0x20
    959 DB	0x44,0x0f,0x29,0x58,0x30
    960 DB	0x44,0x0f,0x29,0x60,0x40
    961 DB	0x44,0x0f,0x29,0x68,0x50
    962 DB	0x44,0x0f,0x29,0x70,0x60
    963 DB	0x44,0x0f,0x29,0x78,0x70
    964 	movdqa	xmm10,XMMWORD[$L$bswap_mask]
    965 
    966 	movdqu	xmm0,XMMWORD[rcx]
    967 	movdqu	xmm2,XMMWORD[rdx]
    968 	movdqu	xmm7,XMMWORD[32+rdx]
    969 DB	102,65,15,56,0,194
    970 
    971 	sub	r9,0x10
    972 	jz	NEAR $L$odd_tail
    973 
    974 	movdqu	xmm6,XMMWORD[16+rdx]
    975 	lea	rax,[OPENSSL_ia32cap_P]
    976 	mov	eax,DWORD[4+rax]
    977 	cmp	r9,0x30
    978 	jb	NEAR $L$skip4x
    979 
    980 	and	eax,71303168
    981 	cmp	eax,4194304
    982 	je	NEAR $L$skip4x
    983 
    984 	sub	r9,0x30
    985 	mov	rax,0xA040608020C0E000
    986 	movdqu	xmm14,XMMWORD[48+rdx]
    987 	movdqu	xmm15,XMMWORD[64+rdx]
    988 
    989 
    990 
    991 
    992 	movdqu	xmm3,XMMWORD[48+r8]
    993 	movdqu	xmm11,XMMWORD[32+r8]
    994 DB	102,65,15,56,0,218
    995 DB	102,69,15,56,0,218
    996 	movdqa	xmm5,xmm3
    997 	pshufd	xmm4,xmm3,78
    998 	pxor	xmm4,xmm3
    999 DB	102,15,58,68,218,0
   1000 DB	102,15,58,68,234,17
   1001 DB	102,15,58,68,231,0
   1002 
   1003 	movdqa	xmm13,xmm11
   1004 	pshufd	xmm12,xmm11,78
   1005 	pxor	xmm12,xmm11
   1006 DB	102,68,15,58,68,222,0
   1007 DB	102,68,15,58,68,238,17
   1008 DB	102,68,15,58,68,231,16
   1009 	xorps	xmm3,xmm11
   1010 	xorps	xmm5,xmm13
   1011 	movups	xmm7,XMMWORD[80+rdx]
   1012 	xorps	xmm4,xmm12
   1013 
   1014 	movdqu	xmm11,XMMWORD[16+r8]
   1015 	movdqu	xmm8,XMMWORD[r8]
   1016 DB	102,69,15,56,0,218
   1017 DB	102,69,15,56,0,194
   1018 	movdqa	xmm13,xmm11
   1019 	pshufd	xmm12,xmm11,78
   1020 	pxor	xmm0,xmm8
   1021 	pxor	xmm12,xmm11
   1022 DB	102,69,15,58,68,222,0
   1023 	movdqa	xmm1,xmm0
   1024 	pshufd	xmm8,xmm0,78
   1025 	pxor	xmm8,xmm0
   1026 DB	102,69,15,58,68,238,17
   1027 DB	102,68,15,58,68,231,0
   1028 	xorps	xmm3,xmm11
   1029 	xorps	xmm5,xmm13
   1030 
   1031 	lea	r8,[64+r8]
   1032 	sub	r9,0x40
   1033 	jc	NEAR $L$tail4x
   1034 
   1035 	jmp	NEAR $L$mod4_loop
   1036 ALIGN	32
   1037 $L$mod4_loop:
   1038 DB	102,65,15,58,68,199,0
   1039 	xorps	xmm4,xmm12
   1040 	movdqu	xmm11,XMMWORD[48+r8]
   1041 DB	102,69,15,56,0,218
   1042 DB	102,65,15,58,68,207,17
   1043 	xorps	xmm0,xmm3
   1044 	movdqu	xmm3,XMMWORD[32+r8]
   1045 	movdqa	xmm13,xmm11
   1046 DB	102,68,15,58,68,199,16
   1047 	pshufd	xmm12,xmm11,78
   1048 	xorps	xmm1,xmm5
   1049 	pxor	xmm12,xmm11
   1050 DB	102,65,15,56,0,218
   1051 	movups	xmm7,XMMWORD[32+rdx]
   1052 	xorps	xmm8,xmm4
   1053 DB	102,68,15,58,68,218,0
   1054 	pshufd	xmm4,xmm3,78
   1055 
   1056 	pxor	xmm8,xmm0
   1057 	movdqa	xmm5,xmm3
   1058 	pxor	xmm8,xmm1
   1059 	pxor	xmm4,xmm3
   1060 	movdqa	xmm9,xmm8
   1061 DB	102,68,15,58,68,234,17
   1062 	pslldq	xmm8,8
   1063 	psrldq	xmm9,8
   1064 	pxor	xmm0,xmm8
   1065 	movdqa	xmm8,XMMWORD[$L$7_mask]
   1066 	pxor	xmm1,xmm9
   1067 DB	102,76,15,110,200
   1068 
   1069 	pand	xmm8,xmm0
   1070 DB	102,69,15,56,0,200
   1071 	pxor	xmm9,xmm0
   1072 DB	102,68,15,58,68,231,0
   1073 	psllq	xmm9,57
   1074 	movdqa	xmm8,xmm9
   1075 	pslldq	xmm9,8
   1076 DB	102,15,58,68,222,0
   1077 	psrldq	xmm8,8
   1078 	pxor	xmm0,xmm9
   1079 	pxor	xmm1,xmm8
   1080 	movdqu	xmm8,XMMWORD[r8]
   1081 
   1082 	movdqa	xmm9,xmm0
   1083 	psrlq	xmm0,1
   1084 DB	102,15,58,68,238,17
   1085 	xorps	xmm3,xmm11
   1086 	movdqu	xmm11,XMMWORD[16+r8]
   1087 DB	102,69,15,56,0,218
   1088 DB	102,15,58,68,231,16
   1089 	xorps	xmm5,xmm13
   1090 	movups	xmm7,XMMWORD[80+rdx]
   1091 DB	102,69,15,56,0,194
   1092 	pxor	xmm1,xmm9
   1093 	pxor	xmm9,xmm0
   1094 	psrlq	xmm0,5
   1095 
   1096 	movdqa	xmm13,xmm11
   1097 	pxor	xmm4,xmm12
   1098 	pshufd	xmm12,xmm11,78
   1099 	pxor	xmm0,xmm9
   1100 	pxor	xmm1,xmm8
   1101 	pxor	xmm12,xmm11
   1102 DB	102,69,15,58,68,222,0
   1103 	psrlq	xmm0,1
   1104 	pxor	xmm0,xmm1
   1105 	movdqa	xmm1,xmm0
   1106 DB	102,69,15,58,68,238,17
   1107 	xorps	xmm3,xmm11
   1108 	pshufd	xmm8,xmm0,78
   1109 	pxor	xmm8,xmm0
   1110 
   1111 DB	102,68,15,58,68,231,0
   1112 	xorps	xmm5,xmm13
   1113 
   1114 	lea	r8,[64+r8]
   1115 	sub	r9,0x40
   1116 	jnc	NEAR $L$mod4_loop
   1117 
   1118 $L$tail4x:
   1119 DB	102,65,15,58,68,199,0
   1120 DB	102,65,15,58,68,207,17
   1121 DB	102,68,15,58,68,199,16
   1122 	xorps	xmm4,xmm12
   1123 	xorps	xmm0,xmm3
   1124 	xorps	xmm1,xmm5
   1125 	pxor	xmm1,xmm0
   1126 	pxor	xmm8,xmm4
   1127 
   1128 	pxor	xmm8,xmm1
   1129 	pxor	xmm1,xmm0
   1130 
   1131 	movdqa	xmm9,xmm8
   1132 	psrldq	xmm8,8
   1133 	pslldq	xmm9,8
   1134 	pxor	xmm1,xmm8
   1135 	pxor	xmm0,xmm9
   1136 
   1137 	movdqa	xmm4,xmm0
   1138 	movdqa	xmm3,xmm0
   1139 	psllq	xmm0,5
   1140 	pxor	xmm3,xmm0
   1141 	psllq	xmm0,1
   1142 	pxor	xmm0,xmm3
   1143 	psllq	xmm0,57
   1144 	movdqa	xmm3,xmm0
   1145 	pslldq	xmm0,8
   1146 	psrldq	xmm3,8
   1147 	pxor	xmm0,xmm4
   1148 	pxor	xmm1,xmm3
   1149 
   1150 
   1151 	movdqa	xmm4,xmm0
   1152 	psrlq	xmm0,1
   1153 	pxor	xmm1,xmm4
   1154 	pxor	xmm4,xmm0
   1155 	psrlq	xmm0,5
   1156 	pxor	xmm0,xmm4
   1157 	psrlq	xmm0,1
   1158 	pxor	xmm0,xmm1
   1159 	add	r9,0x40
   1160 	jz	NEAR $L$done
   1161 	movdqu	xmm7,XMMWORD[32+rdx]
   1162 	sub	r9,0x10
   1163 	jz	NEAR $L$odd_tail
   1164 $L$skip4x:
   1165 
   1166 
   1167 
   1168 
   1169 
   1170 	movdqu	xmm8,XMMWORD[r8]
   1171 	movdqu	xmm3,XMMWORD[16+r8]
   1172 DB	102,69,15,56,0,194
   1173 DB	102,65,15,56,0,218
   1174 	pxor	xmm0,xmm8
   1175 
   1176 	movdqa	xmm5,xmm3
   1177 	pshufd	xmm4,xmm3,78
   1178 	pxor	xmm4,xmm3
   1179 DB	102,15,58,68,218,0
   1180 DB	102,15,58,68,234,17
   1181 DB	102,15,58,68,231,0
   1182 
   1183 	lea	r8,[32+r8]
   1184 	nop
   1185 	sub	r9,0x20
   1186 	jbe	NEAR $L$even_tail
   1187 	nop
   1188 	jmp	NEAR $L$mod_loop
   1189 
   1190 ALIGN	32
   1191 $L$mod_loop:
   1192 	movdqa	xmm1,xmm0
   1193 	movdqa	xmm8,xmm4
   1194 	pshufd	xmm4,xmm0,78
   1195 	pxor	xmm4,xmm0
   1196 
   1197 DB	102,15,58,68,198,0
   1198 DB	102,15,58,68,206,17
   1199 DB	102,15,58,68,231,16
   1200 
   1201 	pxor	xmm0,xmm3
   1202 	pxor	xmm1,xmm5
   1203 	movdqu	xmm9,XMMWORD[r8]
   1204 	pxor	xmm8,xmm0
   1205 DB	102,69,15,56,0,202
   1206 	movdqu	xmm3,XMMWORD[16+r8]
   1207 
   1208 	pxor	xmm8,xmm1
   1209 	pxor	xmm1,xmm9
   1210 	pxor	xmm4,xmm8
   1211 DB	102,65,15,56,0,218
   1212 	movdqa	xmm8,xmm4
   1213 	psrldq	xmm8,8
   1214 	pslldq	xmm4,8
   1215 	pxor	xmm1,xmm8
   1216 	pxor	xmm0,xmm4
   1217 
   1218 	movdqa	xmm5,xmm3
   1219 
   1220 	movdqa	xmm9,xmm0
   1221 	movdqa	xmm8,xmm0
   1222 	psllq	xmm0,5
   1223 	pxor	xmm8,xmm0
   1224 DB	102,15,58,68,218,0
   1225 	psllq	xmm0,1
   1226 	pxor	xmm0,xmm8
   1227 	psllq	xmm0,57
   1228 	movdqa	xmm8,xmm0
   1229 	pslldq	xmm0,8
   1230 	psrldq	xmm8,8
   1231 	pxor	xmm0,xmm9
   1232 	pshufd	xmm4,xmm5,78
   1233 	pxor	xmm1,xmm8
   1234 	pxor	xmm4,xmm5
   1235 
   1236 	movdqa	xmm9,xmm0
   1237 	psrlq	xmm0,1
   1238 DB	102,15,58,68,234,17
   1239 	pxor	xmm1,xmm9
   1240 	pxor	xmm9,xmm0
   1241 	psrlq	xmm0,5
   1242 	pxor	xmm0,xmm9
   1243 	lea	r8,[32+r8]
   1244 	psrlq	xmm0,1
   1245 DB	102,15,58,68,231,0
   1246 	pxor	xmm0,xmm1
   1247 
   1248 	sub	r9,0x20
   1249 	ja	NEAR $L$mod_loop
   1250 
   1251 $L$even_tail:
   1252 	movdqa	xmm1,xmm0
   1253 	movdqa	xmm8,xmm4
   1254 	pshufd	xmm4,xmm0,78
   1255 	pxor	xmm4,xmm0
   1256 
   1257 DB	102,15,58,68,198,0
   1258 DB	102,15,58,68,206,17
   1259 DB	102,15,58,68,231,16
   1260 
   1261 	pxor	xmm0,xmm3
   1262 	pxor	xmm1,xmm5
   1263 	pxor	xmm8,xmm0
   1264 	pxor	xmm8,xmm1
   1265 	pxor	xmm4,xmm8
   1266 	movdqa	xmm8,xmm4
   1267 	psrldq	xmm8,8
   1268 	pslldq	xmm4,8
   1269 	pxor	xmm1,xmm8
   1270 	pxor	xmm0,xmm4
   1271 
   1272 	movdqa	xmm4,xmm0
   1273 	movdqa	xmm3,xmm0
   1274 	psllq	xmm0,5
   1275 	pxor	xmm3,xmm0
   1276 	psllq	xmm0,1
   1277 	pxor	xmm0,xmm3
   1278 	psllq	xmm0,57
   1279 	movdqa	xmm3,xmm0
   1280 	pslldq	xmm0,8
   1281 	psrldq	xmm3,8
   1282 	pxor	xmm0,xmm4
   1283 	pxor	xmm1,xmm3
   1284 
   1285 
   1286 	movdqa	xmm4,xmm0
   1287 	psrlq	xmm0,1
   1288 	pxor	xmm1,xmm4
   1289 	pxor	xmm4,xmm0
   1290 	psrlq	xmm0,5
   1291 	pxor	xmm0,xmm4
   1292 	psrlq	xmm0,1
   1293 	pxor	xmm0,xmm1
   1294 	test	r9,r9
   1295 	jnz	NEAR $L$done
   1296 
   1297 $L$odd_tail:
   1298 	movdqu	xmm8,XMMWORD[r8]
   1299 DB	102,69,15,56,0,194
   1300 	pxor	xmm0,xmm8
   1301 	movdqa	xmm1,xmm0
   1302 	pshufd	xmm3,xmm0,78
   1303 	pxor	xmm3,xmm0
   1304 DB	102,15,58,68,194,0
   1305 DB	102,15,58,68,202,17
   1306 DB	102,15,58,68,223,0
   1307 	pxor	xmm3,xmm0
   1308 	pxor	xmm3,xmm1
   1309 
   1310 	movdqa	xmm4,xmm3
   1311 	psrldq	xmm3,8
   1312 	pslldq	xmm4,8
   1313 	pxor	xmm1,xmm3
   1314 	pxor	xmm0,xmm4
   1315 
   1316 	movdqa	xmm4,xmm0
   1317 	movdqa	xmm3,xmm0
   1318 	psllq	xmm0,5
   1319 	pxor	xmm3,xmm0
   1320 	psllq	xmm0,1
   1321 	pxor	xmm0,xmm3
   1322 	psllq	xmm0,57
   1323 	movdqa	xmm3,xmm0
   1324 	pslldq	xmm0,8
   1325 	psrldq	xmm3,8
   1326 	pxor	xmm0,xmm4
   1327 	pxor	xmm1,xmm3
   1328 
   1329 
   1330 	movdqa	xmm4,xmm0
   1331 	psrlq	xmm0,1
   1332 	pxor	xmm1,xmm4
   1333 	pxor	xmm4,xmm0
   1334 	psrlq	xmm0,5
   1335 	pxor	xmm0,xmm4
   1336 	psrlq	xmm0,1
   1337 	pxor	xmm0,xmm1
   1338 $L$done:
   1339 DB	102,65,15,56,0,194
   1340 	movdqu	XMMWORD[rcx],xmm0
   1341 	movaps	xmm6,XMMWORD[rsp]
   1342 	movaps	xmm7,XMMWORD[16+rsp]
   1343 	movaps	xmm8,XMMWORD[32+rsp]
   1344 	movaps	xmm9,XMMWORD[48+rsp]
   1345 	movaps	xmm10,XMMWORD[64+rsp]
   1346 	movaps	xmm11,XMMWORD[80+rsp]
   1347 	movaps	xmm12,XMMWORD[96+rsp]
   1348 	movaps	xmm13,XMMWORD[112+rsp]
   1349 	movaps	xmm14,XMMWORD[128+rsp]
   1350 	movaps	xmm15,XMMWORD[144+rsp]
   1351 	lea	rsp,[168+rsp]
   1352 $L$SEH_end_gcm_ghash_clmul:
   1353 	DB	0F3h,0C3h		;repret
   1354 
   1355 
   1356 global	gcm_init_avx
   1357 
   1358 ALIGN	32
   1359 gcm_init_avx:
   1360 
   1361 $L$SEH_begin_gcm_init_avx:
   1362 
   1363 DB	0x48,0x83,0xec,0x18
   1364 DB	0x0f,0x29,0x34,0x24
   1365 	vzeroupper
   1366 
   1367 	vmovdqu	xmm2,XMMWORD[rdx]
   1368 	vpshufd	xmm2,xmm2,78
   1369 
   1370 
   1371 	vpshufd	xmm4,xmm2,255
   1372 	vpsrlq	xmm3,xmm2,63
   1373 	vpsllq	xmm2,xmm2,1
   1374 	vpxor	xmm5,xmm5,xmm5
   1375 	vpcmpgtd	xmm5,xmm5,xmm4
   1376 	vpslldq	xmm3,xmm3,8
   1377 	vpor	xmm2,xmm2,xmm3
   1378 
   1379 
   1380 	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
   1381 	vpxor	xmm2,xmm2,xmm5
   1382 
   1383 	vpunpckhqdq	xmm6,xmm2,xmm2
   1384 	vmovdqa	xmm0,xmm2
   1385 	vpxor	xmm6,xmm6,xmm2
   1386 	mov	r10,4
   1387 	jmp	NEAR $L$init_start_avx
   1388 ALIGN	32
   1389 $L$init_loop_avx:
   1390 	vpalignr	xmm5,xmm4,xmm3,8
   1391 	vmovdqu	XMMWORD[(-16)+rcx],xmm5
   1392 	vpunpckhqdq	xmm3,xmm0,xmm0
   1393 	vpxor	xmm3,xmm3,xmm0
   1394 	vpclmulqdq	xmm1,xmm0,xmm2,0x11
   1395 	vpclmulqdq	xmm0,xmm0,xmm2,0x00
   1396 	vpclmulqdq	xmm3,xmm3,xmm6,0x00
   1397 	vpxor	xmm4,xmm1,xmm0
   1398 	vpxor	xmm3,xmm3,xmm4
   1399 
   1400 	vpslldq	xmm4,xmm3,8
   1401 	vpsrldq	xmm3,xmm3,8
   1402 	vpxor	xmm0,xmm0,xmm4
   1403 	vpxor	xmm1,xmm1,xmm3
   1404 	vpsllq	xmm3,xmm0,57
   1405 	vpsllq	xmm4,xmm0,62
   1406 	vpxor	xmm4,xmm4,xmm3
   1407 	vpsllq	xmm3,xmm0,63
   1408 	vpxor	xmm4,xmm4,xmm3
   1409 	vpslldq	xmm3,xmm4,8
   1410 	vpsrldq	xmm4,xmm4,8
   1411 	vpxor	xmm0,xmm0,xmm3
   1412 	vpxor	xmm1,xmm1,xmm4
   1413 
   1414 	vpsrlq	xmm4,xmm0,1
   1415 	vpxor	xmm1,xmm1,xmm0
   1416 	vpxor	xmm0,xmm0,xmm4
   1417 	vpsrlq	xmm4,xmm4,5
   1418 	vpxor	xmm0,xmm0,xmm4
   1419 	vpsrlq	xmm0,xmm0,1
   1420 	vpxor	xmm0,xmm0,xmm1
   1421 $L$init_start_avx:
   1422 	vmovdqa	xmm5,xmm0
   1423 	vpunpckhqdq	xmm3,xmm0,xmm0
   1424 	vpxor	xmm3,xmm3,xmm0
   1425 	vpclmulqdq	xmm1,xmm0,xmm2,0x11
   1426 	vpclmulqdq	xmm0,xmm0,xmm2,0x00
   1427 	vpclmulqdq	xmm3,xmm3,xmm6,0x00
   1428 	vpxor	xmm4,xmm1,xmm0
   1429 	vpxor	xmm3,xmm3,xmm4
   1430 
   1431 	vpslldq	xmm4,xmm3,8
   1432 	vpsrldq	xmm3,xmm3,8
   1433 	vpxor	xmm0,xmm0,xmm4
   1434 	vpxor	xmm1,xmm1,xmm3
   1435 	vpsllq	xmm3,xmm0,57
   1436 	vpsllq	xmm4,xmm0,62
   1437 	vpxor	xmm4,xmm4,xmm3
   1438 	vpsllq	xmm3,xmm0,63
   1439 	vpxor	xmm4,xmm4,xmm3
   1440 	vpslldq	xmm3,xmm4,8
   1441 	vpsrldq	xmm4,xmm4,8
   1442 	vpxor	xmm0,xmm0,xmm3
   1443 	vpxor	xmm1,xmm1,xmm4
   1444 
   1445 	vpsrlq	xmm4,xmm0,1
   1446 	vpxor	xmm1,xmm1,xmm0
   1447 	vpxor	xmm0,xmm0,xmm4
   1448 	vpsrlq	xmm4,xmm4,5
   1449 	vpxor	xmm0,xmm0,xmm4
   1450 	vpsrlq	xmm0,xmm0,1
   1451 	vpxor	xmm0,xmm0,xmm1
   1452 	vpshufd	xmm3,xmm5,78
   1453 	vpshufd	xmm4,xmm0,78
   1454 	vpxor	xmm3,xmm3,xmm5
   1455 	vmovdqu	XMMWORD[rcx],xmm5
   1456 	vpxor	xmm4,xmm4,xmm0
   1457 	vmovdqu	XMMWORD[16+rcx],xmm0
   1458 	lea	rcx,[48+rcx]
   1459 	sub	r10,1
   1460 	jnz	NEAR $L$init_loop_avx
   1461 
   1462 	vpalignr	xmm5,xmm3,xmm4,8
   1463 	vmovdqu	XMMWORD[(-16)+rcx],xmm5
   1464 
   1465 	vzeroupper
   1466 	movaps	xmm6,XMMWORD[rsp]
   1467 	lea	rsp,[24+rsp]
   1468 $L$SEH_end_gcm_init_avx:
   1469 	DB	0F3h,0C3h		;repret
   1470 
   1471 
   1472 global	gcm_gmult_avx
   1473 
   1474 ALIGN	32
   1475 gcm_gmult_avx:
   1476 
   1477 	jmp	NEAR $L$_gmult_clmul
   1478 
   1479 
   1480 global	gcm_ghash_avx
   1481 
   1482 ALIGN	32
   1483 gcm_ghash_avx:
   1484 
   1485 	lea	rax,[((-136))+rsp]
   1486 $L$SEH_begin_gcm_ghash_avx:
   1487 
   1488 DB	0x48,0x8d,0x60,0xe0
   1489 DB	0x0f,0x29,0x70,0xe0
   1490 DB	0x0f,0x29,0x78,0xf0
   1491 DB	0x44,0x0f,0x29,0x00
   1492 DB	0x44,0x0f,0x29,0x48,0x10
   1493 DB	0x44,0x0f,0x29,0x50,0x20
   1494 DB	0x44,0x0f,0x29,0x58,0x30
   1495 DB	0x44,0x0f,0x29,0x60,0x40
   1496 DB	0x44,0x0f,0x29,0x68,0x50
   1497 DB	0x44,0x0f,0x29,0x70,0x60
   1498 DB	0x44,0x0f,0x29,0x78,0x70
   1499 	vzeroupper
   1500 
   1501 	vmovdqu	xmm10,XMMWORD[rcx]
   1502 	lea	r10,[$L$0x1c2_polynomial]
   1503 	lea	rdx,[64+rdx]
   1504 	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
   1505 	vpshufb	xmm10,xmm10,xmm13
   1506 	cmp	r9,0x80
   1507 	jb	NEAR $L$short_avx
   1508 	sub	r9,0x80
   1509 
   1510 	vmovdqu	xmm14,XMMWORD[112+r8]
   1511 	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
   1512 	vpshufb	xmm14,xmm14,xmm13
   1513 	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
   1514 
   1515 	vpunpckhqdq	xmm9,xmm14,xmm14
   1516 	vmovdqu	xmm15,XMMWORD[96+r8]
   1517 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1518 	vpxor	xmm9,xmm9,xmm14
   1519 	vpshufb	xmm15,xmm15,xmm13
   1520 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1521 	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
   1522 	vpunpckhqdq	xmm8,xmm15,xmm15
   1523 	vmovdqu	xmm14,XMMWORD[80+r8]
   1524 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1525 	vpxor	xmm8,xmm8,xmm15
   1526 
   1527 	vpshufb	xmm14,xmm14,xmm13
   1528 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1529 	vpunpckhqdq	xmm9,xmm14,xmm14
   1530 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1531 	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
   1532 	vpxor	xmm9,xmm9,xmm14
   1533 	vmovdqu	xmm15,XMMWORD[64+r8]
   1534 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1535 	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
   1536 
   1537 	vpshufb	xmm15,xmm15,xmm13
   1538 	vpxor	xmm3,xmm3,xmm0
   1539 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1540 	vpxor	xmm4,xmm4,xmm1
   1541 	vpunpckhqdq	xmm8,xmm15,xmm15
   1542 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1543 	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
   1544 	vpxor	xmm5,xmm5,xmm2
   1545 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1546 	vpxor	xmm8,xmm8,xmm15
   1547 
   1548 	vmovdqu	xmm14,XMMWORD[48+r8]
   1549 	vpxor	xmm0,xmm0,xmm3
   1550 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1551 	vpxor	xmm1,xmm1,xmm4
   1552 	vpshufb	xmm14,xmm14,xmm13
   1553 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1554 	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
   1555 	vpxor	xmm2,xmm2,xmm5
   1556 	vpunpckhqdq	xmm9,xmm14,xmm14
   1557 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1558 	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
   1559 	vpxor	xmm9,xmm9,xmm14
   1560 
   1561 	vmovdqu	xmm15,XMMWORD[32+r8]
   1562 	vpxor	xmm3,xmm3,xmm0
   1563 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1564 	vpxor	xmm4,xmm4,xmm1
   1565 	vpshufb	xmm15,xmm15,xmm13
   1566 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1567 	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
   1568 	vpxor	xmm5,xmm5,xmm2
   1569 	vpunpckhqdq	xmm8,xmm15,xmm15
   1570 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1571 	vpxor	xmm8,xmm8,xmm15
   1572 
   1573 	vmovdqu	xmm14,XMMWORD[16+r8]
   1574 	vpxor	xmm0,xmm0,xmm3
   1575 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1576 	vpxor	xmm1,xmm1,xmm4
   1577 	vpshufb	xmm14,xmm14,xmm13
   1578 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1579 	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
   1580 	vpxor	xmm2,xmm2,xmm5
   1581 	vpunpckhqdq	xmm9,xmm14,xmm14
   1582 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1583 	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
   1584 	vpxor	xmm9,xmm9,xmm14
   1585 
   1586 	vmovdqu	xmm15,XMMWORD[r8]
   1587 	vpxor	xmm3,xmm3,xmm0
   1588 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1589 	vpxor	xmm4,xmm4,xmm1
   1590 	vpshufb	xmm15,xmm15,xmm13
   1591 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1592 	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
   1593 	vpxor	xmm5,xmm5,xmm2
   1594 	vpclmulqdq	xmm2,xmm9,xmm7,0x10
   1595 
   1596 	lea	r8,[128+r8]
   1597 	cmp	r9,0x80
   1598 	jb	NEAR $L$tail_avx
   1599 
   1600 	vpxor	xmm15,xmm15,xmm10
   1601 	sub	r9,0x80
   1602 	jmp	NEAR $L$oop8x_avx
   1603 
   1604 ALIGN	32
   1605 $L$oop8x_avx:
   1606 	vpunpckhqdq	xmm8,xmm15,xmm15
   1607 	vmovdqu	xmm14,XMMWORD[112+r8]
   1608 	vpxor	xmm3,xmm3,xmm0
   1609 	vpxor	xmm8,xmm8,xmm15
   1610 	vpclmulqdq	xmm10,xmm15,xmm6,0x00
   1611 	vpshufb	xmm14,xmm14,xmm13
   1612 	vpxor	xmm4,xmm4,xmm1
   1613 	vpclmulqdq	xmm11,xmm15,xmm6,0x11
   1614 	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
   1615 	vpunpckhqdq	xmm9,xmm14,xmm14
   1616 	vpxor	xmm5,xmm5,xmm2
   1617 	vpclmulqdq	xmm12,xmm8,xmm7,0x00
   1618 	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
   1619 	vpxor	xmm9,xmm9,xmm14
   1620 
   1621 	vmovdqu	xmm15,XMMWORD[96+r8]
   1622 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1623 	vpxor	xmm10,xmm10,xmm3
   1624 	vpshufb	xmm15,xmm15,xmm13
   1625 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1626 	vxorps	xmm11,xmm11,xmm4
   1627 	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
   1628 	vpunpckhqdq	xmm8,xmm15,xmm15
   1629 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1630 	vpxor	xmm12,xmm12,xmm5
   1631 	vxorps	xmm8,xmm8,xmm15
   1632 
   1633 	vmovdqu	xmm14,XMMWORD[80+r8]
   1634 	vpxor	xmm12,xmm12,xmm10
   1635 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1636 	vpxor	xmm12,xmm12,xmm11
   1637 	vpslldq	xmm9,xmm12,8
   1638 	vpxor	xmm3,xmm3,xmm0
   1639 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1640 	vpsrldq	xmm12,xmm12,8
   1641 	vpxor	xmm10,xmm10,xmm9
   1642 	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
   1643 	vpshufb	xmm14,xmm14,xmm13
   1644 	vxorps	xmm11,xmm11,xmm12
   1645 	vpxor	xmm4,xmm4,xmm1
   1646 	vpunpckhqdq	xmm9,xmm14,xmm14
   1647 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1648 	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
   1649 	vpxor	xmm9,xmm9,xmm14
   1650 	vpxor	xmm5,xmm5,xmm2
   1651 
   1652 	vmovdqu	xmm15,XMMWORD[64+r8]
   1653 	vpalignr	xmm12,xmm10,xmm10,8
   1654 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1655 	vpshufb	xmm15,xmm15,xmm13
   1656 	vpxor	xmm0,xmm0,xmm3
   1657 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1658 	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
   1659 	vpunpckhqdq	xmm8,xmm15,xmm15
   1660 	vpxor	xmm1,xmm1,xmm4
   1661 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1662 	vxorps	xmm8,xmm8,xmm15
   1663 	vpxor	xmm2,xmm2,xmm5
   1664 
   1665 	vmovdqu	xmm14,XMMWORD[48+r8]
   1666 	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
   1667 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1668 	vpshufb	xmm14,xmm14,xmm13
   1669 	vpxor	xmm3,xmm3,xmm0
   1670 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1671 	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
   1672 	vpunpckhqdq	xmm9,xmm14,xmm14
   1673 	vpxor	xmm4,xmm4,xmm1
   1674 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1675 	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
   1676 	vpxor	xmm9,xmm9,xmm14
   1677 	vpxor	xmm5,xmm5,xmm2
   1678 
   1679 	vmovdqu	xmm15,XMMWORD[32+r8]
   1680 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1681 	vpshufb	xmm15,xmm15,xmm13
   1682 	vpxor	xmm0,xmm0,xmm3
   1683 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1684 	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
   1685 	vpunpckhqdq	xmm8,xmm15,xmm15
   1686 	vpxor	xmm1,xmm1,xmm4
   1687 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1688 	vpxor	xmm8,xmm8,xmm15
   1689 	vpxor	xmm2,xmm2,xmm5
   1690 	vxorps	xmm10,xmm10,xmm12
   1691 
   1692 	vmovdqu	xmm14,XMMWORD[16+r8]
   1693 	vpalignr	xmm12,xmm10,xmm10,8
   1694 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1695 	vpshufb	xmm14,xmm14,xmm13
   1696 	vpxor	xmm3,xmm3,xmm0
   1697 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1698 	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
   1699 	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
   1700 	vxorps	xmm12,xmm12,xmm11
   1701 	vpunpckhqdq	xmm9,xmm14,xmm14
   1702 	vpxor	xmm4,xmm4,xmm1
   1703 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1704 	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
   1705 	vpxor	xmm9,xmm9,xmm14
   1706 	vpxor	xmm5,xmm5,xmm2
   1707 
   1708 	vmovdqu	xmm15,XMMWORD[r8]
   1709 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1710 	vpshufb	xmm15,xmm15,xmm13
   1711 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1712 	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
   1713 	vpxor	xmm15,xmm15,xmm12
   1714 	vpclmulqdq	xmm2,xmm9,xmm7,0x10
   1715 	vpxor	xmm15,xmm15,xmm10
   1716 
   1717 	lea	r8,[128+r8]
   1718 	sub	r9,0x80
   1719 	jnc	NEAR $L$oop8x_avx
   1720 
   1721 	add	r9,0x80
   1722 	jmp	NEAR $L$tail_no_xor_avx
   1723 
   1724 ALIGN	32
   1725 $L$short_avx:
   1726 	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
   1727 	lea	r8,[r9*1+r8]
   1728 	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
   1729 	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
   1730 	vpshufb	xmm15,xmm14,xmm13
   1731 
   1732 	vmovdqa	xmm3,xmm0
   1733 	vmovdqa	xmm4,xmm1
   1734 	vmovdqa	xmm5,xmm2
   1735 	sub	r9,0x10
   1736 	jz	NEAR $L$tail_avx
   1737 
   1738 	vpunpckhqdq	xmm8,xmm15,xmm15
   1739 	vpxor	xmm3,xmm3,xmm0
   1740 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1741 	vpxor	xmm8,xmm8,xmm15
   1742 	vmovdqu	xmm14,XMMWORD[((-32))+r8]
   1743 	vpxor	xmm4,xmm4,xmm1
   1744 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1745 	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
   1746 	vpshufb	xmm15,xmm14,xmm13
   1747 	vpxor	xmm5,xmm5,xmm2
   1748 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1749 	vpsrldq	xmm7,xmm7,8
   1750 	sub	r9,0x10
   1751 	jz	NEAR $L$tail_avx
   1752 
   1753 	vpunpckhqdq	xmm8,xmm15,xmm15
   1754 	vpxor	xmm3,xmm3,xmm0
   1755 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1756 	vpxor	xmm8,xmm8,xmm15
   1757 	vmovdqu	xmm14,XMMWORD[((-48))+r8]
   1758 	vpxor	xmm4,xmm4,xmm1
   1759 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1760 	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
   1761 	vpshufb	xmm15,xmm14,xmm13
   1762 	vpxor	xmm5,xmm5,xmm2
   1763 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1764 	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
   1765 	sub	r9,0x10
   1766 	jz	NEAR $L$tail_avx
   1767 
   1768 	vpunpckhqdq	xmm8,xmm15,xmm15
   1769 	vpxor	xmm3,xmm3,xmm0
   1770 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1771 	vpxor	xmm8,xmm8,xmm15
   1772 	vmovdqu	xmm14,XMMWORD[((-64))+r8]
   1773 	vpxor	xmm4,xmm4,xmm1
   1774 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1775 	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
   1776 	vpshufb	xmm15,xmm14,xmm13
   1777 	vpxor	xmm5,xmm5,xmm2
   1778 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1779 	vpsrldq	xmm7,xmm7,8
   1780 	sub	r9,0x10
   1781 	jz	NEAR $L$tail_avx
   1782 
   1783 	vpunpckhqdq	xmm8,xmm15,xmm15
   1784 	vpxor	xmm3,xmm3,xmm0
   1785 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1786 	vpxor	xmm8,xmm8,xmm15
   1787 	vmovdqu	xmm14,XMMWORD[((-80))+r8]
   1788 	vpxor	xmm4,xmm4,xmm1
   1789 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1790 	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
   1791 	vpshufb	xmm15,xmm14,xmm13
   1792 	vpxor	xmm5,xmm5,xmm2
   1793 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1794 	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
   1795 	sub	r9,0x10
   1796 	jz	NEAR $L$tail_avx
   1797 
   1798 	vpunpckhqdq	xmm8,xmm15,xmm15
   1799 	vpxor	xmm3,xmm3,xmm0
   1800 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1801 	vpxor	xmm8,xmm8,xmm15
   1802 	vmovdqu	xmm14,XMMWORD[((-96))+r8]
   1803 	vpxor	xmm4,xmm4,xmm1
   1804 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1805 	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
   1806 	vpshufb	xmm15,xmm14,xmm13
   1807 	vpxor	xmm5,xmm5,xmm2
   1808 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1809 	vpsrldq	xmm7,xmm7,8
   1810 	sub	r9,0x10
   1811 	jz	NEAR $L$tail_avx
   1812 
   1813 	vpunpckhqdq	xmm8,xmm15,xmm15
   1814 	vpxor	xmm3,xmm3,xmm0
   1815 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1816 	vpxor	xmm8,xmm8,xmm15
   1817 	vmovdqu	xmm14,XMMWORD[((-112))+r8]
   1818 	vpxor	xmm4,xmm4,xmm1
   1819 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1820 	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
   1821 	vpshufb	xmm15,xmm14,xmm13
   1822 	vpxor	xmm5,xmm5,xmm2
   1823 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1824 	vmovq	xmm7,QWORD[((184-64))+rdx]
   1825 	sub	r9,0x10
   1826 	jmp	NEAR $L$tail_avx
   1827 
   1828 ALIGN	32
   1829 $L$tail_avx:
   1830 	vpxor	xmm15,xmm15,xmm10
   1831 $L$tail_no_xor_avx:
   1832 	vpunpckhqdq	xmm8,xmm15,xmm15
   1833 	vpxor	xmm3,xmm3,xmm0
   1834 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1835 	vpxor	xmm8,xmm8,xmm15
   1836 	vpxor	xmm4,xmm4,xmm1
   1837 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1838 	vpxor	xmm5,xmm5,xmm2
   1839 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1840 
   1841 	vmovdqu	xmm12,XMMWORD[r10]
   1842 
   1843 	vpxor	xmm10,xmm3,xmm0
   1844 	vpxor	xmm11,xmm4,xmm1
   1845 	vpxor	xmm5,xmm5,xmm2
   1846 
   1847 	vpxor	xmm5,xmm5,xmm10
   1848 	vpxor	xmm5,xmm5,xmm11
   1849 	vpslldq	xmm9,xmm5,8
   1850 	vpsrldq	xmm5,xmm5,8
   1851 	vpxor	xmm10,xmm10,xmm9
   1852 	vpxor	xmm11,xmm11,xmm5
   1853 
   1854 	vpclmulqdq	xmm9,xmm10,xmm12,0x10
   1855 	vpalignr	xmm10,xmm10,xmm10,8
   1856 	vpxor	xmm10,xmm10,xmm9
   1857 
   1858 	vpclmulqdq	xmm9,xmm10,xmm12,0x10
   1859 	vpalignr	xmm10,xmm10,xmm10,8
   1860 	vpxor	xmm10,xmm10,xmm11
   1861 	vpxor	xmm10,xmm10,xmm9
   1862 
   1863 	cmp	r9,0
   1864 	jne	NEAR $L$short_avx
   1865 
   1866 	vpshufb	xmm10,xmm10,xmm13
   1867 	vmovdqu	XMMWORD[rcx],xmm10
   1868 	vzeroupper
   1869 	movaps	xmm6,XMMWORD[rsp]
   1870 	movaps	xmm7,XMMWORD[16+rsp]
   1871 	movaps	xmm8,XMMWORD[32+rsp]
   1872 	movaps	xmm9,XMMWORD[48+rsp]
   1873 	movaps	xmm10,XMMWORD[64+rsp]
   1874 	movaps	xmm11,XMMWORD[80+rsp]
   1875 	movaps	xmm12,XMMWORD[96+rsp]
   1876 	movaps	xmm13,XMMWORD[112+rsp]
   1877 	movaps	xmm14,XMMWORD[128+rsp]
   1878 	movaps	xmm15,XMMWORD[144+rsp]
   1879 	lea	rsp,[168+rsp]
   1880 $L$SEH_end_gcm_ghash_avx:
   1881 	DB	0F3h,0C3h		;repret
   1882 
   1883 
   1884 ALIGN	64
   1885 $L$bswap_mask:
   1886 DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1887 $L$0x1c2_polynomial:
   1888 DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
   1889 $L$7_mask:
   1890 	DD	7,0,7,0
   1891 $L$7_mask_poly:
   1892 	DD	7,0,450,0
   1893 ALIGN	64
   1894 
   1895 $L$rem_4bit:
   1896 	DD	0,0,0,471859200,0,943718400,0,610271232
   1897 	DD	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1898 	DD	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1899 	DD	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1900 
   1901 $L$rem_8bit:
   1902 	DW	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
   1903 	DW	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
   1904 	DW	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
   1905 	DW	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
   1906 	DW	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
   1907 	DW	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
   1908 	DW	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
   1909 	DW	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
   1910 	DW	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
   1911 	DW	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
   1912 	DW	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
   1913 	DW	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
   1914 	DW	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
   1915 	DW	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
   1916 	DW	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
   1917 	DW	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
   1918 	DW	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
   1919 	DW	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
   1920 	DW	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
   1921 	DW	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
   1922 	DW	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
   1923 	DW	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
   1924 	DW	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
   1925 	DW	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
   1926 	DW	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
   1927 	DW	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
   1928 	DW	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
   1929 	DW	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
   1930 	DW	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
   1931 	DW	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
   1932 	DW	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
   1933 	DW	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
   1934 
   1935 DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
   1936 DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
   1937 DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
   1938 DB	114,103,62,0
   1939 ALIGN	64
   1940 EXTERN	__imp_RtlVirtualUnwind
   1941 
   1942 ALIGN	16
   1943 se_handler:
   1944 	push	rsi
   1945 	push	rdi
   1946 	push	rbx
   1947 	push	rbp
   1948 	push	r12
   1949 	push	r13
   1950 	push	r14
   1951 	push	r15
   1952 	pushfq
   1953 	sub	rsp,64
   1954 
   1955 	mov	rax,QWORD[120+r8]
   1956 	mov	rbx,QWORD[248+r8]
   1957 
   1958 	mov	rsi,QWORD[8+r9]
   1959 	mov	r11,QWORD[56+r9]
   1960 
   1961 	mov	r10d,DWORD[r11]
   1962 	lea	r10,[r10*1+rsi]
   1963 	cmp	rbx,r10
   1964 	jb	NEAR $L$in_prologue
   1965 
   1966 	mov	rax,QWORD[152+r8]
   1967 
   1968 	mov	r10d,DWORD[4+r11]
   1969 	lea	r10,[r10*1+rsi]
   1970 	cmp	rbx,r10
   1971 	jae	NEAR $L$in_prologue
   1972 
   1973 	lea	rax,[((48+280))+rax]
   1974 
   1975 	mov	rbx,QWORD[((-8))+rax]
   1976 	mov	rbp,QWORD[((-16))+rax]
   1977 	mov	r12,QWORD[((-24))+rax]
   1978 	mov	r13,QWORD[((-32))+rax]
   1979 	mov	r14,QWORD[((-40))+rax]
   1980 	mov	r15,QWORD[((-48))+rax]
   1981 	mov	QWORD[144+r8],rbx
   1982 	mov	QWORD[160+r8],rbp
   1983 	mov	QWORD[216+r8],r12
   1984 	mov	QWORD[224+r8],r13
   1985 	mov	QWORD[232+r8],r14
   1986 	mov	QWORD[240+r8],r15
   1987 
   1988 $L$in_prologue:
   1989 	mov	rdi,QWORD[8+rax]
   1990 	mov	rsi,QWORD[16+rax]
   1991 	mov	QWORD[152+r8],rax
   1992 	mov	QWORD[168+r8],rsi
   1993 	mov	QWORD[176+r8],rdi
   1994 
   1995 	mov	rdi,QWORD[40+r9]
   1996 	mov	rsi,r8
   1997 	mov	ecx,154
   1998 	DD	0xa548f3fc
   1999 
   2000 	mov	rsi,r9
   2001 	xor	rcx,rcx
   2002 	mov	rdx,QWORD[8+rsi]
   2003 	mov	r8,QWORD[rsi]
   2004 	mov	r9,QWORD[16+rsi]
   2005 	mov	r10,QWORD[40+rsi]
   2006 	lea	r11,[56+rsi]
   2007 	lea	r12,[24+rsi]
   2008 	mov	QWORD[32+rsp],r10
   2009 	mov	QWORD[40+rsp],r11
   2010 	mov	QWORD[48+rsp],r12
   2011 	mov	QWORD[56+rsp],rcx
   2012 	call	QWORD[__imp_RtlVirtualUnwind]
   2013 
   2014 	mov	eax,1
   2015 	add	rsp,64
   2016 	popfq
   2017 	pop	r15
   2018 	pop	r14
   2019 	pop	r13
   2020 	pop	r12
   2021 	pop	rbp
   2022 	pop	rbx
   2023 	pop	rdi
   2024 	pop	rsi
   2025 	DB	0F3h,0C3h		;repret
   2026 
   2027 
   2028 section	.pdata rdata align=4
   2029 ALIGN	4
   2030 	DD	$L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
   2031 	DD	$L$SEH_end_gcm_gmult_4bit wrt ..imagebase
   2032 	DD	$L$SEH_info_gcm_gmult_4bit wrt ..imagebase
   2033 
   2034 	DD	$L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
   2035 	DD	$L$SEH_end_gcm_ghash_4bit wrt ..imagebase
   2036 	DD	$L$SEH_info_gcm_ghash_4bit wrt ..imagebase
   2037 
   2038 	DD	$L$SEH_begin_gcm_init_clmul wrt ..imagebase
   2039 	DD	$L$SEH_end_gcm_init_clmul wrt ..imagebase
   2040 	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
   2041 
   2042 	DD	$L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
   2043 	DD	$L$SEH_end_gcm_ghash_clmul wrt ..imagebase
   2044 	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
   2045 	DD	$L$SEH_begin_gcm_init_avx wrt ..imagebase
   2046 	DD	$L$SEH_end_gcm_init_avx wrt ..imagebase
   2047 	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
   2048 
   2049 	DD	$L$SEH_begin_gcm_ghash_avx wrt ..imagebase
   2050 	DD	$L$SEH_end_gcm_ghash_avx wrt ..imagebase
   2051 	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
   2052 section	.xdata rdata align=8
   2053 ALIGN	8
   2054 $L$SEH_info_gcm_gmult_4bit:
   2055 DB	9,0,0,0
   2056 	DD	se_handler wrt ..imagebase
   2057 	DD	$L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
   2058 $L$SEH_info_gcm_ghash_4bit:
   2059 DB	9,0,0,0
   2060 	DD	se_handler wrt ..imagebase
   2061 	DD	$L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
   2062 $L$SEH_info_gcm_init_clmul:
   2063 DB	0x01,0x08,0x03,0x00
   2064 DB	0x08,0x68,0x00,0x00
   2065 DB	0x04,0x22,0x00,0x00
   2066 $L$SEH_info_gcm_ghash_clmul:
   2067 DB	0x01,0x33,0x16,0x00
   2068 DB	0x33,0xf8,0x09,0x00
   2069 DB	0x2e,0xe8,0x08,0x00
   2070 DB	0x29,0xd8,0x07,0x00
   2071 DB	0x24,0xc8,0x06,0x00
   2072 DB	0x1f,0xb8,0x05,0x00
   2073 DB	0x1a,0xa8,0x04,0x00
   2074 DB	0x15,0x98,0x03,0x00
   2075 DB	0x10,0x88,0x02,0x00
   2076 DB	0x0c,0x78,0x01,0x00
   2077 DB	0x08,0x68,0x00,0x00
   2078 DB	0x04,0x01,0x15,0x00
   2079