Home | History | Annotate | Download | only in fipsmodule
      1 default	rel
      2 %define XMMWORD
      3 %define YMMWORD
      4 %define ZMMWORD
      5 section	.text code align=64
      6 
      7 EXTERN	OPENSSL_ia32cap_P
      8 
      9 global	gcm_gmult_4bit
     10 
     11 ALIGN	16
     12 gcm_gmult_4bit:
     13 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
     14 	mov	QWORD[16+rsp],rsi
     15 	mov	rax,rsp
     16 $L$SEH_begin_gcm_gmult_4bit:
     17 	mov	rdi,rcx
     18 	mov	rsi,rdx
     19 
     20 
     21 	push	rbx
     22 	push	rbp
     23 	push	r12
     24 	push	r13
     25 	push	r14
     26 	push	r15
     27 	sub	rsp,280
     28 $L$gmult_prologue:
     29 
     30 	movzx	r8,BYTE[15+rdi]
     31 	lea	r11,[$L$rem_4bit]
     32 	xor	rax,rax
     33 	xor	rbx,rbx
     34 	mov	al,r8b
     35 	mov	bl,r8b
     36 	shl	al,4
     37 	mov	rcx,14
     38 	mov	r8,QWORD[8+rax*1+rsi]
     39 	mov	r9,QWORD[rax*1+rsi]
     40 	and	bl,0xf0
     41 	mov	rdx,r8
     42 	jmp	NEAR $L$oop1
     43 
     44 ALIGN	16
     45 $L$oop1:
     46 	shr	r8,4
     47 	and	rdx,0xf
     48 	mov	r10,r9
     49 	mov	al,BYTE[rcx*1+rdi]
     50 	shr	r9,4
     51 	xor	r8,QWORD[8+rbx*1+rsi]
     52 	shl	r10,60
     53 	xor	r9,QWORD[rbx*1+rsi]
     54 	mov	bl,al
     55 	xor	r9,QWORD[rdx*8+r11]
     56 	mov	rdx,r8
     57 	shl	al,4
     58 	xor	r8,r10
     59 	dec	rcx
     60 	js	NEAR $L$break1
     61 
     62 	shr	r8,4
     63 	and	rdx,0xf
     64 	mov	r10,r9
     65 	shr	r9,4
     66 	xor	r8,QWORD[8+rax*1+rsi]
     67 	shl	r10,60
     68 	xor	r9,QWORD[rax*1+rsi]
     69 	and	bl,0xf0
     70 	xor	r9,QWORD[rdx*8+r11]
     71 	mov	rdx,r8
     72 	xor	r8,r10
     73 	jmp	NEAR $L$oop1
     74 
     75 ALIGN	16
     76 $L$break1:
     77 	shr	r8,4
     78 	and	rdx,0xf
     79 	mov	r10,r9
     80 	shr	r9,4
     81 	xor	r8,QWORD[8+rax*1+rsi]
     82 	shl	r10,60
     83 	xor	r9,QWORD[rax*1+rsi]
     84 	and	bl,0xf0
     85 	xor	r9,QWORD[rdx*8+r11]
     86 	mov	rdx,r8
     87 	xor	r8,r10
     88 
     89 	shr	r8,4
     90 	and	rdx,0xf
     91 	mov	r10,r9
     92 	shr	r9,4
     93 	xor	r8,QWORD[8+rbx*1+rsi]
     94 	shl	r10,60
     95 	xor	r9,QWORD[rbx*1+rsi]
     96 	xor	r8,r10
     97 	xor	r9,QWORD[rdx*8+r11]
     98 
     99 	bswap	r8
    100 	bswap	r9
    101 	mov	QWORD[8+rdi],r8
    102 	mov	QWORD[rdi],r9
    103 
    104 	lea	rsi,[((280+48))+rsp]
    105 	mov	rbx,QWORD[((-8))+rsi]
    106 	lea	rsp,[rsi]
    107 $L$gmult_epilogue:
    108 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    109 	mov	rsi,QWORD[16+rsp]
    110 	DB	0F3h,0C3h		;repret
    111 $L$SEH_end_gcm_gmult_4bit:
    112 global	gcm_ghash_4bit
    113 
    114 ALIGN	16
    115 gcm_ghash_4bit:
    116 	mov	QWORD[8+rsp],rdi	;WIN64 prologue
    117 	mov	QWORD[16+rsp],rsi
    118 	mov	rax,rsp
    119 $L$SEH_begin_gcm_ghash_4bit:
    120 	mov	rdi,rcx
    121 	mov	rsi,rdx
    122 	mov	rdx,r8
    123 	mov	rcx,r9
    124 
    125 
    126 	push	rbx
    127 	push	rbp
    128 	push	r12
    129 	push	r13
    130 	push	r14
    131 	push	r15
    132 	sub	rsp,280
    133 $L$ghash_prologue:
    134 	mov	r14,rdx
    135 	mov	r15,rcx
    136 	sub	rsi,-128
    137 	lea	rbp,[((16+128))+rsp]
    138 	xor	edx,edx
    139 	mov	r8,QWORD[((0+0-128))+rsi]
    140 	mov	rax,QWORD[((0+8-128))+rsi]
    141 	mov	dl,al
    142 	shr	rax,4
    143 	mov	r10,r8
    144 	shr	r8,4
    145 	mov	r9,QWORD[((16+0-128))+rsi]
    146 	shl	dl,4
    147 	mov	rbx,QWORD[((16+8-128))+rsi]
    148 	shl	r10,60
    149 	mov	BYTE[rsp],dl
    150 	or	rax,r10
    151 	mov	dl,bl
    152 	shr	rbx,4
    153 	mov	r10,r9
    154 	shr	r9,4
    155 	mov	QWORD[rbp],r8
    156 	mov	r8,QWORD[((32+0-128))+rsi]
    157 	shl	dl,4
    158 	mov	QWORD[((0-128))+rbp],rax
    159 	mov	rax,QWORD[((32+8-128))+rsi]
    160 	shl	r10,60
    161 	mov	BYTE[1+rsp],dl
    162 	or	rbx,r10
    163 	mov	dl,al
    164 	shr	rax,4
    165 	mov	r10,r8
    166 	shr	r8,4
    167 	mov	QWORD[8+rbp],r9
    168 	mov	r9,QWORD[((48+0-128))+rsi]
    169 	shl	dl,4
    170 	mov	QWORD[((8-128))+rbp],rbx
    171 	mov	rbx,QWORD[((48+8-128))+rsi]
    172 	shl	r10,60
    173 	mov	BYTE[2+rsp],dl
    174 	or	rax,r10
    175 	mov	dl,bl
    176 	shr	rbx,4
    177 	mov	r10,r9
    178 	shr	r9,4
    179 	mov	QWORD[16+rbp],r8
    180 	mov	r8,QWORD[((64+0-128))+rsi]
    181 	shl	dl,4
    182 	mov	QWORD[((16-128))+rbp],rax
    183 	mov	rax,QWORD[((64+8-128))+rsi]
    184 	shl	r10,60
    185 	mov	BYTE[3+rsp],dl
    186 	or	rbx,r10
    187 	mov	dl,al
    188 	shr	rax,4
    189 	mov	r10,r8
    190 	shr	r8,4
    191 	mov	QWORD[24+rbp],r9
    192 	mov	r9,QWORD[((80+0-128))+rsi]
    193 	shl	dl,4
    194 	mov	QWORD[((24-128))+rbp],rbx
    195 	mov	rbx,QWORD[((80+8-128))+rsi]
    196 	shl	r10,60
    197 	mov	BYTE[4+rsp],dl
    198 	or	rax,r10
    199 	mov	dl,bl
    200 	shr	rbx,4
    201 	mov	r10,r9
    202 	shr	r9,4
    203 	mov	QWORD[32+rbp],r8
    204 	mov	r8,QWORD[((96+0-128))+rsi]
    205 	shl	dl,4
    206 	mov	QWORD[((32-128))+rbp],rax
    207 	mov	rax,QWORD[((96+8-128))+rsi]
    208 	shl	r10,60
    209 	mov	BYTE[5+rsp],dl
    210 	or	rbx,r10
    211 	mov	dl,al
    212 	shr	rax,4
    213 	mov	r10,r8
    214 	shr	r8,4
    215 	mov	QWORD[40+rbp],r9
    216 	mov	r9,QWORD[((112+0-128))+rsi]
    217 	shl	dl,4
    218 	mov	QWORD[((40-128))+rbp],rbx
    219 	mov	rbx,QWORD[((112+8-128))+rsi]
    220 	shl	r10,60
    221 	mov	BYTE[6+rsp],dl
    222 	or	rax,r10
    223 	mov	dl,bl
    224 	shr	rbx,4
    225 	mov	r10,r9
    226 	shr	r9,4
    227 	mov	QWORD[48+rbp],r8
    228 	mov	r8,QWORD[((128+0-128))+rsi]
    229 	shl	dl,4
    230 	mov	QWORD[((48-128))+rbp],rax
    231 	mov	rax,QWORD[((128+8-128))+rsi]
    232 	shl	r10,60
    233 	mov	BYTE[7+rsp],dl
    234 	or	rbx,r10
    235 	mov	dl,al
    236 	shr	rax,4
    237 	mov	r10,r8
    238 	shr	r8,4
    239 	mov	QWORD[56+rbp],r9
    240 	mov	r9,QWORD[((144+0-128))+rsi]
    241 	shl	dl,4
    242 	mov	QWORD[((56-128))+rbp],rbx
    243 	mov	rbx,QWORD[((144+8-128))+rsi]
    244 	shl	r10,60
    245 	mov	BYTE[8+rsp],dl
    246 	or	rax,r10
    247 	mov	dl,bl
    248 	shr	rbx,4
    249 	mov	r10,r9
    250 	shr	r9,4
    251 	mov	QWORD[64+rbp],r8
    252 	mov	r8,QWORD[((160+0-128))+rsi]
    253 	shl	dl,4
    254 	mov	QWORD[((64-128))+rbp],rax
    255 	mov	rax,QWORD[((160+8-128))+rsi]
    256 	shl	r10,60
    257 	mov	BYTE[9+rsp],dl
    258 	or	rbx,r10
    259 	mov	dl,al
    260 	shr	rax,4
    261 	mov	r10,r8
    262 	shr	r8,4
    263 	mov	QWORD[72+rbp],r9
    264 	mov	r9,QWORD[((176+0-128))+rsi]
    265 	shl	dl,4
    266 	mov	QWORD[((72-128))+rbp],rbx
    267 	mov	rbx,QWORD[((176+8-128))+rsi]
    268 	shl	r10,60
    269 	mov	BYTE[10+rsp],dl
    270 	or	rax,r10
    271 	mov	dl,bl
    272 	shr	rbx,4
    273 	mov	r10,r9
    274 	shr	r9,4
    275 	mov	QWORD[80+rbp],r8
    276 	mov	r8,QWORD[((192+0-128))+rsi]
    277 	shl	dl,4
    278 	mov	QWORD[((80-128))+rbp],rax
    279 	mov	rax,QWORD[((192+8-128))+rsi]
    280 	shl	r10,60
    281 	mov	BYTE[11+rsp],dl
    282 	or	rbx,r10
    283 	mov	dl,al
    284 	shr	rax,4
    285 	mov	r10,r8
    286 	shr	r8,4
    287 	mov	QWORD[88+rbp],r9
    288 	mov	r9,QWORD[((208+0-128))+rsi]
    289 	shl	dl,4
    290 	mov	QWORD[((88-128))+rbp],rbx
    291 	mov	rbx,QWORD[((208+8-128))+rsi]
    292 	shl	r10,60
    293 	mov	BYTE[12+rsp],dl
    294 	or	rax,r10
    295 	mov	dl,bl
    296 	shr	rbx,4
    297 	mov	r10,r9
    298 	shr	r9,4
    299 	mov	QWORD[96+rbp],r8
    300 	mov	r8,QWORD[((224+0-128))+rsi]
    301 	shl	dl,4
    302 	mov	QWORD[((96-128))+rbp],rax
    303 	mov	rax,QWORD[((224+8-128))+rsi]
    304 	shl	r10,60
    305 	mov	BYTE[13+rsp],dl
    306 	or	rbx,r10
    307 	mov	dl,al
    308 	shr	rax,4
    309 	mov	r10,r8
    310 	shr	r8,4
    311 	mov	QWORD[104+rbp],r9
    312 	mov	r9,QWORD[((240+0-128))+rsi]
    313 	shl	dl,4
    314 	mov	QWORD[((104-128))+rbp],rbx
    315 	mov	rbx,QWORD[((240+8-128))+rsi]
    316 	shl	r10,60
    317 	mov	BYTE[14+rsp],dl
    318 	or	rax,r10
    319 	mov	dl,bl
    320 	shr	rbx,4
    321 	mov	r10,r9
    322 	shr	r9,4
    323 	mov	QWORD[112+rbp],r8
    324 	shl	dl,4
    325 	mov	QWORD[((112-128))+rbp],rax
    326 	shl	r10,60
    327 	mov	BYTE[15+rsp],dl
    328 	or	rbx,r10
    329 	mov	QWORD[120+rbp],r9
    330 	mov	QWORD[((120-128))+rbp],rbx
    331 	add	rsi,-128
    332 	mov	r8,QWORD[8+rdi]
    333 	mov	r9,QWORD[rdi]
    334 	add	r15,r14
    335 	lea	r11,[$L$rem_8bit]
    336 	jmp	NEAR $L$outer_loop
    337 ALIGN	16
    338 $L$outer_loop:
    339 	xor	r9,QWORD[r14]
    340 	mov	rdx,QWORD[8+r14]
    341 	lea	r14,[16+r14]
    342 	xor	rdx,r8
    343 	mov	QWORD[rdi],r9
    344 	mov	QWORD[8+rdi],rdx
    345 	shr	rdx,32
    346 	xor	rax,rax
    347 	rol	edx,8
    348 	mov	al,dl
    349 	movzx	ebx,dl
    350 	shl	al,4
    351 	shr	ebx,4
    352 	rol	edx,8
    353 	mov	r8,QWORD[8+rax*1+rsi]
    354 	mov	r9,QWORD[rax*1+rsi]
    355 	mov	al,dl
    356 	movzx	ecx,dl
    357 	shl	al,4
    358 	movzx	r12,BYTE[rbx*1+rsp]
    359 	shr	ecx,4
    360 	xor	r12,r8
    361 	mov	r10,r9
    362 	shr	r8,8
    363 	movzx	r12,r12b
    364 	shr	r9,8
    365 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    366 	shl	r10,56
    367 	xor	r9,QWORD[rbx*8+rbp]
    368 	rol	edx,8
    369 	xor	r8,QWORD[8+rax*1+rsi]
    370 	xor	r9,QWORD[rax*1+rsi]
    371 	mov	al,dl
    372 	xor	r8,r10
    373 	movzx	r12,WORD[r12*2+r11]
    374 	movzx	ebx,dl
    375 	shl	al,4
    376 	movzx	r13,BYTE[rcx*1+rsp]
    377 	shr	ebx,4
    378 	shl	r12,48
    379 	xor	r13,r8
    380 	mov	r10,r9
    381 	xor	r9,r12
    382 	shr	r8,8
    383 	movzx	r13,r13b
    384 	shr	r9,8
    385 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    386 	shl	r10,56
    387 	xor	r9,QWORD[rcx*8+rbp]
    388 	rol	edx,8
    389 	xor	r8,QWORD[8+rax*1+rsi]
    390 	xor	r9,QWORD[rax*1+rsi]
    391 	mov	al,dl
    392 	xor	r8,r10
    393 	movzx	r13,WORD[r13*2+r11]
    394 	movzx	ecx,dl
    395 	shl	al,4
    396 	movzx	r12,BYTE[rbx*1+rsp]
    397 	shr	ecx,4
    398 	shl	r13,48
    399 	xor	r12,r8
    400 	mov	r10,r9
    401 	xor	r9,r13
    402 	shr	r8,8
    403 	movzx	r12,r12b
    404 	mov	edx,DWORD[8+rdi]
    405 	shr	r9,8
    406 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    407 	shl	r10,56
    408 	xor	r9,QWORD[rbx*8+rbp]
    409 	rol	edx,8
    410 	xor	r8,QWORD[8+rax*1+rsi]
    411 	xor	r9,QWORD[rax*1+rsi]
    412 	mov	al,dl
    413 	xor	r8,r10
    414 	movzx	r12,WORD[r12*2+r11]
    415 	movzx	ebx,dl
    416 	shl	al,4
    417 	movzx	r13,BYTE[rcx*1+rsp]
    418 	shr	ebx,4
    419 	shl	r12,48
    420 	xor	r13,r8
    421 	mov	r10,r9
    422 	xor	r9,r12
    423 	shr	r8,8
    424 	movzx	r13,r13b
    425 	shr	r9,8
    426 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    427 	shl	r10,56
    428 	xor	r9,QWORD[rcx*8+rbp]
    429 	rol	edx,8
    430 	xor	r8,QWORD[8+rax*1+rsi]
    431 	xor	r9,QWORD[rax*1+rsi]
    432 	mov	al,dl
    433 	xor	r8,r10
    434 	movzx	r13,WORD[r13*2+r11]
    435 	movzx	ecx,dl
    436 	shl	al,4
    437 	movzx	r12,BYTE[rbx*1+rsp]
    438 	shr	ecx,4
    439 	shl	r13,48
    440 	xor	r12,r8
    441 	mov	r10,r9
    442 	xor	r9,r13
    443 	shr	r8,8
    444 	movzx	r12,r12b
    445 	shr	r9,8
    446 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    447 	shl	r10,56
    448 	xor	r9,QWORD[rbx*8+rbp]
    449 	rol	edx,8
    450 	xor	r8,QWORD[8+rax*1+rsi]
    451 	xor	r9,QWORD[rax*1+rsi]
    452 	mov	al,dl
    453 	xor	r8,r10
    454 	movzx	r12,WORD[r12*2+r11]
    455 	movzx	ebx,dl
    456 	shl	al,4
    457 	movzx	r13,BYTE[rcx*1+rsp]
    458 	shr	ebx,4
    459 	shl	r12,48
    460 	xor	r13,r8
    461 	mov	r10,r9
    462 	xor	r9,r12
    463 	shr	r8,8
    464 	movzx	r13,r13b
    465 	shr	r9,8
    466 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    467 	shl	r10,56
    468 	xor	r9,QWORD[rcx*8+rbp]
    469 	rol	edx,8
    470 	xor	r8,QWORD[8+rax*1+rsi]
    471 	xor	r9,QWORD[rax*1+rsi]
    472 	mov	al,dl
    473 	xor	r8,r10
    474 	movzx	r13,WORD[r13*2+r11]
    475 	movzx	ecx,dl
    476 	shl	al,4
    477 	movzx	r12,BYTE[rbx*1+rsp]
    478 	shr	ecx,4
    479 	shl	r13,48
    480 	xor	r12,r8
    481 	mov	r10,r9
    482 	xor	r9,r13
    483 	shr	r8,8
    484 	movzx	r12,r12b
    485 	mov	edx,DWORD[4+rdi]
    486 	shr	r9,8
    487 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    488 	shl	r10,56
    489 	xor	r9,QWORD[rbx*8+rbp]
    490 	rol	edx,8
    491 	xor	r8,QWORD[8+rax*1+rsi]
    492 	xor	r9,QWORD[rax*1+rsi]
    493 	mov	al,dl
    494 	xor	r8,r10
    495 	movzx	r12,WORD[r12*2+r11]
    496 	movzx	ebx,dl
    497 	shl	al,4
    498 	movzx	r13,BYTE[rcx*1+rsp]
    499 	shr	ebx,4
    500 	shl	r12,48
    501 	xor	r13,r8
    502 	mov	r10,r9
    503 	xor	r9,r12
    504 	shr	r8,8
    505 	movzx	r13,r13b
    506 	shr	r9,8
    507 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    508 	shl	r10,56
    509 	xor	r9,QWORD[rcx*8+rbp]
    510 	rol	edx,8
    511 	xor	r8,QWORD[8+rax*1+rsi]
    512 	xor	r9,QWORD[rax*1+rsi]
    513 	mov	al,dl
    514 	xor	r8,r10
    515 	movzx	r13,WORD[r13*2+r11]
    516 	movzx	ecx,dl
    517 	shl	al,4
    518 	movzx	r12,BYTE[rbx*1+rsp]
    519 	shr	ecx,4
    520 	shl	r13,48
    521 	xor	r12,r8
    522 	mov	r10,r9
    523 	xor	r9,r13
    524 	shr	r8,8
    525 	movzx	r12,r12b
    526 	shr	r9,8
    527 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    528 	shl	r10,56
    529 	xor	r9,QWORD[rbx*8+rbp]
    530 	rol	edx,8
    531 	xor	r8,QWORD[8+rax*1+rsi]
    532 	xor	r9,QWORD[rax*1+rsi]
    533 	mov	al,dl
    534 	xor	r8,r10
    535 	movzx	r12,WORD[r12*2+r11]
    536 	movzx	ebx,dl
    537 	shl	al,4
    538 	movzx	r13,BYTE[rcx*1+rsp]
    539 	shr	ebx,4
    540 	shl	r12,48
    541 	xor	r13,r8
    542 	mov	r10,r9
    543 	xor	r9,r12
    544 	shr	r8,8
    545 	movzx	r13,r13b
    546 	shr	r9,8
    547 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    548 	shl	r10,56
    549 	xor	r9,QWORD[rcx*8+rbp]
    550 	rol	edx,8
    551 	xor	r8,QWORD[8+rax*1+rsi]
    552 	xor	r9,QWORD[rax*1+rsi]
    553 	mov	al,dl
    554 	xor	r8,r10
    555 	movzx	r13,WORD[r13*2+r11]
    556 	movzx	ecx,dl
    557 	shl	al,4
    558 	movzx	r12,BYTE[rbx*1+rsp]
    559 	shr	ecx,4
    560 	shl	r13,48
    561 	xor	r12,r8
    562 	mov	r10,r9
    563 	xor	r9,r13
    564 	shr	r8,8
    565 	movzx	r12,r12b
    566 	mov	edx,DWORD[rdi]
    567 	shr	r9,8
    568 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    569 	shl	r10,56
    570 	xor	r9,QWORD[rbx*8+rbp]
    571 	rol	edx,8
    572 	xor	r8,QWORD[8+rax*1+rsi]
    573 	xor	r9,QWORD[rax*1+rsi]
    574 	mov	al,dl
    575 	xor	r8,r10
    576 	movzx	r12,WORD[r12*2+r11]
    577 	movzx	ebx,dl
    578 	shl	al,4
    579 	movzx	r13,BYTE[rcx*1+rsp]
    580 	shr	ebx,4
    581 	shl	r12,48
    582 	xor	r13,r8
    583 	mov	r10,r9
    584 	xor	r9,r12
    585 	shr	r8,8
    586 	movzx	r13,r13b
    587 	shr	r9,8
    588 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    589 	shl	r10,56
    590 	xor	r9,QWORD[rcx*8+rbp]
    591 	rol	edx,8
    592 	xor	r8,QWORD[8+rax*1+rsi]
    593 	xor	r9,QWORD[rax*1+rsi]
    594 	mov	al,dl
    595 	xor	r8,r10
    596 	movzx	r13,WORD[r13*2+r11]
    597 	movzx	ecx,dl
    598 	shl	al,4
    599 	movzx	r12,BYTE[rbx*1+rsp]
    600 	shr	ecx,4
    601 	shl	r13,48
    602 	xor	r12,r8
    603 	mov	r10,r9
    604 	xor	r9,r13
    605 	shr	r8,8
    606 	movzx	r12,r12b
    607 	shr	r9,8
    608 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    609 	shl	r10,56
    610 	xor	r9,QWORD[rbx*8+rbp]
    611 	rol	edx,8
    612 	xor	r8,QWORD[8+rax*1+rsi]
    613 	xor	r9,QWORD[rax*1+rsi]
    614 	mov	al,dl
    615 	xor	r8,r10
    616 	movzx	r12,WORD[r12*2+r11]
    617 	movzx	ebx,dl
    618 	shl	al,4
    619 	movzx	r13,BYTE[rcx*1+rsp]
    620 	shr	ebx,4
    621 	shl	r12,48
    622 	xor	r13,r8
    623 	mov	r10,r9
    624 	xor	r9,r12
    625 	shr	r8,8
    626 	movzx	r13,r13b
    627 	shr	r9,8
    628 	xor	r8,QWORD[((-128))+rcx*8+rbp]
    629 	shl	r10,56
    630 	xor	r9,QWORD[rcx*8+rbp]
    631 	rol	edx,8
    632 	xor	r8,QWORD[8+rax*1+rsi]
    633 	xor	r9,QWORD[rax*1+rsi]
    634 	mov	al,dl
    635 	xor	r8,r10
    636 	movzx	r13,WORD[r13*2+r11]
    637 	movzx	ecx,dl
    638 	shl	al,4
    639 	movzx	r12,BYTE[rbx*1+rsp]
    640 	and	ecx,240
    641 	shl	r13,48
    642 	xor	r12,r8
    643 	mov	r10,r9
    644 	xor	r9,r13
    645 	shr	r8,8
    646 	movzx	r12,r12b
    647 	mov	edx,DWORD[((-4))+rdi]
    648 	shr	r9,8
    649 	xor	r8,QWORD[((-128))+rbx*8+rbp]
    650 	shl	r10,56
    651 	xor	r9,QWORD[rbx*8+rbp]
    652 	movzx	r12,WORD[r12*2+r11]
    653 	xor	r8,QWORD[8+rax*1+rsi]
    654 	xor	r9,QWORD[rax*1+rsi]
    655 	shl	r12,48
    656 	xor	r8,r10
    657 	xor	r9,r12
    658 	movzx	r13,r8b
    659 	shr	r8,4
    660 	mov	r10,r9
    661 	shl	r13b,4
    662 	shr	r9,4
    663 	xor	r8,QWORD[8+rcx*1+rsi]
    664 	movzx	r13,WORD[r13*2+r11]
    665 	shl	r10,60
    666 	xor	r9,QWORD[rcx*1+rsi]
    667 	xor	r8,r10
    668 	shl	r13,48
    669 	bswap	r8
    670 	xor	r9,r13
    671 	bswap	r9
    672 	cmp	r14,r15
    673 	jb	NEAR $L$outer_loop
    674 	mov	QWORD[8+rdi],r8
    675 	mov	QWORD[rdi],r9
    676 
    677 	lea	rsi,[((280+48))+rsp]
    678 	mov	r15,QWORD[((-48))+rsi]
    679 	mov	r14,QWORD[((-40))+rsi]
    680 	mov	r13,QWORD[((-32))+rsi]
    681 	mov	r12,QWORD[((-24))+rsi]
    682 	mov	rbp,QWORD[((-16))+rsi]
    683 	mov	rbx,QWORD[((-8))+rsi]
    684 	lea	rsp,[rsi]
    685 $L$ghash_epilogue:
    686 	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
    687 	mov	rsi,QWORD[16+rsp]
    688 	DB	0F3h,0C3h		;repret
    689 $L$SEH_end_gcm_ghash_4bit:
    690 global	gcm_init_clmul
    691 
    692 ALIGN	16
    693 gcm_init_clmul:
    694 $L$_init_clmul:
    695 $L$SEH_begin_gcm_init_clmul:
    696 
    697 DB	0x48,0x83,0xec,0x18
    698 DB	0x0f,0x29,0x34,0x24
    699 	movdqu	xmm2,XMMWORD[rdx]
    700 	pshufd	xmm2,xmm2,78
    701 
    702 
    703 	pshufd	xmm4,xmm2,255
    704 	movdqa	xmm3,xmm2
    705 	psllq	xmm2,1
    706 	pxor	xmm5,xmm5
    707 	psrlq	xmm3,63
    708 	pcmpgtd	xmm5,xmm4
    709 	pslldq	xmm3,8
    710 	por	xmm2,xmm3
    711 
    712 
    713 	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
    714 	pxor	xmm2,xmm5
    715 
    716 
    717 	pshufd	xmm6,xmm2,78
    718 	movdqa	xmm0,xmm2
    719 	pxor	xmm6,xmm2
    720 	movdqa	xmm1,xmm0
    721 	pshufd	xmm3,xmm0,78
    722 	pxor	xmm3,xmm0
    723 DB	102,15,58,68,194,0
    724 DB	102,15,58,68,202,17
    725 DB	102,15,58,68,222,0
    726 	pxor	xmm3,xmm0
    727 	pxor	xmm3,xmm1
    728 
    729 	movdqa	xmm4,xmm3
    730 	psrldq	xmm3,8
    731 	pslldq	xmm4,8
    732 	pxor	xmm1,xmm3
    733 	pxor	xmm0,xmm4
    734 
    735 	movdqa	xmm4,xmm0
    736 	movdqa	xmm3,xmm0
    737 	psllq	xmm0,5
    738 	pxor	xmm3,xmm0
    739 	psllq	xmm0,1
    740 	pxor	xmm0,xmm3
    741 	psllq	xmm0,57
    742 	movdqa	xmm3,xmm0
    743 	pslldq	xmm0,8
    744 	psrldq	xmm3,8
    745 	pxor	xmm0,xmm4
    746 	pxor	xmm1,xmm3
    747 
    748 
    749 	movdqa	xmm4,xmm0
    750 	psrlq	xmm0,1
    751 	pxor	xmm1,xmm4
    752 	pxor	xmm4,xmm0
    753 	psrlq	xmm0,5
    754 	pxor	xmm0,xmm4
    755 	psrlq	xmm0,1
    756 	pxor	xmm0,xmm1
    757 	pshufd	xmm3,xmm2,78
    758 	pshufd	xmm4,xmm0,78
    759 	pxor	xmm3,xmm2
    760 	movdqu	XMMWORD[rcx],xmm2
    761 	pxor	xmm4,xmm0
    762 	movdqu	XMMWORD[16+rcx],xmm0
    763 DB	102,15,58,15,227,8
    764 	movdqu	XMMWORD[32+rcx],xmm4
    765 	movdqa	xmm1,xmm0
    766 	pshufd	xmm3,xmm0,78
    767 	pxor	xmm3,xmm0
    768 DB	102,15,58,68,194,0
    769 DB	102,15,58,68,202,17
    770 DB	102,15,58,68,222,0
    771 	pxor	xmm3,xmm0
    772 	pxor	xmm3,xmm1
    773 
    774 	movdqa	xmm4,xmm3
    775 	psrldq	xmm3,8
    776 	pslldq	xmm4,8
    777 	pxor	xmm1,xmm3
    778 	pxor	xmm0,xmm4
    779 
    780 	movdqa	xmm4,xmm0
    781 	movdqa	xmm3,xmm0
    782 	psllq	xmm0,5
    783 	pxor	xmm3,xmm0
    784 	psllq	xmm0,1
    785 	pxor	xmm0,xmm3
    786 	psllq	xmm0,57
    787 	movdqa	xmm3,xmm0
    788 	pslldq	xmm0,8
    789 	psrldq	xmm3,8
    790 	pxor	xmm0,xmm4
    791 	pxor	xmm1,xmm3
    792 
    793 
    794 	movdqa	xmm4,xmm0
    795 	psrlq	xmm0,1
    796 	pxor	xmm1,xmm4
    797 	pxor	xmm4,xmm0
    798 	psrlq	xmm0,5
    799 	pxor	xmm0,xmm4
    800 	psrlq	xmm0,1
    801 	pxor	xmm0,xmm1
    802 	movdqa	xmm5,xmm0
    803 	movdqa	xmm1,xmm0
    804 	pshufd	xmm3,xmm0,78
    805 	pxor	xmm3,xmm0
    806 DB	102,15,58,68,194,0
    807 DB	102,15,58,68,202,17
    808 DB	102,15,58,68,222,0
    809 	pxor	xmm3,xmm0
    810 	pxor	xmm3,xmm1
    811 
    812 	movdqa	xmm4,xmm3
    813 	psrldq	xmm3,8
    814 	pslldq	xmm4,8
    815 	pxor	xmm1,xmm3
    816 	pxor	xmm0,xmm4
    817 
    818 	movdqa	xmm4,xmm0
    819 	movdqa	xmm3,xmm0
    820 	psllq	xmm0,5
    821 	pxor	xmm3,xmm0
    822 	psllq	xmm0,1
    823 	pxor	xmm0,xmm3
    824 	psllq	xmm0,57
    825 	movdqa	xmm3,xmm0
    826 	pslldq	xmm0,8
    827 	psrldq	xmm3,8
    828 	pxor	xmm0,xmm4
    829 	pxor	xmm1,xmm3
    830 
    831 
    832 	movdqa	xmm4,xmm0
    833 	psrlq	xmm0,1
    834 	pxor	xmm1,xmm4
    835 	pxor	xmm4,xmm0
    836 	psrlq	xmm0,5
    837 	pxor	xmm0,xmm4
    838 	psrlq	xmm0,1
    839 	pxor	xmm0,xmm1
    840 	pshufd	xmm3,xmm5,78
    841 	pshufd	xmm4,xmm0,78
    842 	pxor	xmm3,xmm5
    843 	movdqu	XMMWORD[48+rcx],xmm5
    844 	pxor	xmm4,xmm0
    845 	movdqu	XMMWORD[64+rcx],xmm0
    846 DB	102,15,58,15,227,8
    847 	movdqu	XMMWORD[80+rcx],xmm4
    848 	movaps	xmm6,XMMWORD[rsp]
    849 	lea	rsp,[24+rsp]
    850 $L$SEH_end_gcm_init_clmul:
    851 	DB	0F3h,0C3h		;repret
    852 
    853 global	gcm_gmult_clmul
    854 
    855 ALIGN	16
    856 gcm_gmult_clmul:
    857 $L$_gmult_clmul:
    858 	movdqu	xmm0,XMMWORD[rcx]
    859 	movdqa	xmm5,XMMWORD[$L$bswap_mask]
    860 	movdqu	xmm2,XMMWORD[rdx]
    861 	movdqu	xmm4,XMMWORD[32+rdx]
    862 DB	102,15,56,0,197
    863 	movdqa	xmm1,xmm0
    864 	pshufd	xmm3,xmm0,78
    865 	pxor	xmm3,xmm0
    866 DB	102,15,58,68,194,0
    867 DB	102,15,58,68,202,17
    868 DB	102,15,58,68,220,0
    869 	pxor	xmm3,xmm0
    870 	pxor	xmm3,xmm1
    871 
    872 	movdqa	xmm4,xmm3
    873 	psrldq	xmm3,8
    874 	pslldq	xmm4,8
    875 	pxor	xmm1,xmm3
    876 	pxor	xmm0,xmm4
    877 
    878 	movdqa	xmm4,xmm0
    879 	movdqa	xmm3,xmm0
    880 	psllq	xmm0,5
    881 	pxor	xmm3,xmm0
    882 	psllq	xmm0,1
    883 	pxor	xmm0,xmm3
    884 	psllq	xmm0,57
    885 	movdqa	xmm3,xmm0
    886 	pslldq	xmm0,8
    887 	psrldq	xmm3,8
    888 	pxor	xmm0,xmm4
    889 	pxor	xmm1,xmm3
    890 
    891 
    892 	movdqa	xmm4,xmm0
    893 	psrlq	xmm0,1
    894 	pxor	xmm1,xmm4
    895 	pxor	xmm4,xmm0
    896 	psrlq	xmm0,5
    897 	pxor	xmm0,xmm4
    898 	psrlq	xmm0,1
    899 	pxor	xmm0,xmm1
    900 DB	102,15,56,0,197
    901 	movdqu	XMMWORD[rcx],xmm0
    902 	DB	0F3h,0C3h		;repret
    903 
    904 global	gcm_ghash_clmul
    905 
    906 ALIGN	32
    907 gcm_ghash_clmul:
    908 $L$_ghash_clmul:
    909 	lea	rax,[((-136))+rsp]
    910 $L$SEH_begin_gcm_ghash_clmul:
    911 
    912 DB	0x48,0x8d,0x60,0xe0
    913 DB	0x0f,0x29,0x70,0xe0
    914 DB	0x0f,0x29,0x78,0xf0
    915 DB	0x44,0x0f,0x29,0x00
    916 DB	0x44,0x0f,0x29,0x48,0x10
    917 DB	0x44,0x0f,0x29,0x50,0x20
    918 DB	0x44,0x0f,0x29,0x58,0x30
    919 DB	0x44,0x0f,0x29,0x60,0x40
    920 DB	0x44,0x0f,0x29,0x68,0x50
    921 DB	0x44,0x0f,0x29,0x70,0x60
    922 DB	0x44,0x0f,0x29,0x78,0x70
    923 	movdqa	xmm10,XMMWORD[$L$bswap_mask]
    924 
    925 	movdqu	xmm0,XMMWORD[rcx]
    926 	movdqu	xmm2,XMMWORD[rdx]
    927 	movdqu	xmm7,XMMWORD[32+rdx]
    928 DB	102,65,15,56,0,194
    929 
    930 	sub	r9,0x10
    931 	jz	NEAR $L$odd_tail
    932 
    933 	movdqu	xmm6,XMMWORD[16+rdx]
    934 	lea	rax,[OPENSSL_ia32cap_P]
    935 	mov	eax,DWORD[4+rax]
    936 	cmp	r9,0x30
    937 	jb	NEAR $L$skip4x
    938 
    939 	and	eax,71303168
    940 	cmp	eax,4194304
    941 	je	NEAR $L$skip4x
    942 
    943 	sub	r9,0x30
    944 	mov	rax,0xA040608020C0E000
    945 	movdqu	xmm14,XMMWORD[48+rdx]
    946 	movdqu	xmm15,XMMWORD[64+rdx]
    947 
    948 
    949 
    950 
    951 	movdqu	xmm3,XMMWORD[48+r8]
    952 	movdqu	xmm11,XMMWORD[32+r8]
    953 DB	102,65,15,56,0,218
    954 DB	102,69,15,56,0,218
    955 	movdqa	xmm5,xmm3
    956 	pshufd	xmm4,xmm3,78
    957 	pxor	xmm4,xmm3
    958 DB	102,15,58,68,218,0
    959 DB	102,15,58,68,234,17
    960 DB	102,15,58,68,231,0
    961 
    962 	movdqa	xmm13,xmm11
    963 	pshufd	xmm12,xmm11,78
    964 	pxor	xmm12,xmm11
    965 DB	102,68,15,58,68,222,0
    966 DB	102,68,15,58,68,238,17
    967 DB	102,68,15,58,68,231,16
    968 	xorps	xmm3,xmm11
    969 	xorps	xmm5,xmm13
    970 	movups	xmm7,XMMWORD[80+rdx]
    971 	xorps	xmm4,xmm12
    972 
    973 	movdqu	xmm11,XMMWORD[16+r8]
    974 	movdqu	xmm8,XMMWORD[r8]
    975 DB	102,69,15,56,0,218
    976 DB	102,69,15,56,0,194
    977 	movdqa	xmm13,xmm11
    978 	pshufd	xmm12,xmm11,78
    979 	pxor	xmm0,xmm8
    980 	pxor	xmm12,xmm11
    981 DB	102,69,15,58,68,222,0
    982 	movdqa	xmm1,xmm0
    983 	pshufd	xmm8,xmm0,78
    984 	pxor	xmm8,xmm0
    985 DB	102,69,15,58,68,238,17
    986 DB	102,68,15,58,68,231,0
    987 	xorps	xmm3,xmm11
    988 	xorps	xmm5,xmm13
    989 
    990 	lea	r8,[64+r8]
    991 	sub	r9,0x40
    992 	jc	NEAR $L$tail4x
    993 
    994 	jmp	NEAR $L$mod4_loop
    995 ALIGN	32
    996 $L$mod4_loop:
    997 DB	102,65,15,58,68,199,0
    998 	xorps	xmm4,xmm12
    999 	movdqu	xmm11,XMMWORD[48+r8]
   1000 DB	102,69,15,56,0,218
   1001 DB	102,65,15,58,68,207,17
   1002 	xorps	xmm0,xmm3
   1003 	movdqu	xmm3,XMMWORD[32+r8]
   1004 	movdqa	xmm13,xmm11
   1005 DB	102,68,15,58,68,199,16
   1006 	pshufd	xmm12,xmm11,78
   1007 	xorps	xmm1,xmm5
   1008 	pxor	xmm12,xmm11
   1009 DB	102,65,15,56,0,218
   1010 	movups	xmm7,XMMWORD[32+rdx]
   1011 	xorps	xmm8,xmm4
   1012 DB	102,68,15,58,68,218,0
   1013 	pshufd	xmm4,xmm3,78
   1014 
   1015 	pxor	xmm8,xmm0
   1016 	movdqa	xmm5,xmm3
   1017 	pxor	xmm8,xmm1
   1018 	pxor	xmm4,xmm3
   1019 	movdqa	xmm9,xmm8
   1020 DB	102,68,15,58,68,234,17
   1021 	pslldq	xmm8,8
   1022 	psrldq	xmm9,8
   1023 	pxor	xmm0,xmm8
   1024 	movdqa	xmm8,XMMWORD[$L$7_mask]
   1025 	pxor	xmm1,xmm9
   1026 DB	102,76,15,110,200
   1027 
   1028 	pand	xmm8,xmm0
   1029 DB	102,69,15,56,0,200
   1030 	pxor	xmm9,xmm0
   1031 DB	102,68,15,58,68,231,0
   1032 	psllq	xmm9,57
   1033 	movdqa	xmm8,xmm9
   1034 	pslldq	xmm9,8
   1035 DB	102,15,58,68,222,0
   1036 	psrldq	xmm8,8
   1037 	pxor	xmm0,xmm9
   1038 	pxor	xmm1,xmm8
   1039 	movdqu	xmm8,XMMWORD[r8]
   1040 
   1041 	movdqa	xmm9,xmm0
   1042 	psrlq	xmm0,1
   1043 DB	102,15,58,68,238,17
   1044 	xorps	xmm3,xmm11
   1045 	movdqu	xmm11,XMMWORD[16+r8]
   1046 DB	102,69,15,56,0,218
   1047 DB	102,15,58,68,231,16
   1048 	xorps	xmm5,xmm13
   1049 	movups	xmm7,XMMWORD[80+rdx]
   1050 DB	102,69,15,56,0,194
   1051 	pxor	xmm1,xmm9
   1052 	pxor	xmm9,xmm0
   1053 	psrlq	xmm0,5
   1054 
   1055 	movdqa	xmm13,xmm11
   1056 	pxor	xmm4,xmm12
   1057 	pshufd	xmm12,xmm11,78
   1058 	pxor	xmm0,xmm9
   1059 	pxor	xmm1,xmm8
   1060 	pxor	xmm12,xmm11
   1061 DB	102,69,15,58,68,222,0
   1062 	psrlq	xmm0,1
   1063 	pxor	xmm0,xmm1
   1064 	movdqa	xmm1,xmm0
   1065 DB	102,69,15,58,68,238,17
   1066 	xorps	xmm3,xmm11
   1067 	pshufd	xmm8,xmm0,78
   1068 	pxor	xmm8,xmm0
   1069 
   1070 DB	102,68,15,58,68,231,0
   1071 	xorps	xmm5,xmm13
   1072 
   1073 	lea	r8,[64+r8]
   1074 	sub	r9,0x40
   1075 	jnc	NEAR $L$mod4_loop
   1076 
   1077 $L$tail4x:
   1078 DB	102,65,15,58,68,199,0
   1079 DB	102,65,15,58,68,207,17
   1080 DB	102,68,15,58,68,199,16
   1081 	xorps	xmm4,xmm12
   1082 	xorps	xmm0,xmm3
   1083 	xorps	xmm1,xmm5
   1084 	pxor	xmm1,xmm0
   1085 	pxor	xmm8,xmm4
   1086 
   1087 	pxor	xmm8,xmm1
   1088 	pxor	xmm1,xmm0
   1089 
   1090 	movdqa	xmm9,xmm8
   1091 	psrldq	xmm8,8
   1092 	pslldq	xmm9,8
   1093 	pxor	xmm1,xmm8
   1094 	pxor	xmm0,xmm9
   1095 
   1096 	movdqa	xmm4,xmm0
   1097 	movdqa	xmm3,xmm0
   1098 	psllq	xmm0,5
   1099 	pxor	xmm3,xmm0
   1100 	psllq	xmm0,1
   1101 	pxor	xmm0,xmm3
   1102 	psllq	xmm0,57
   1103 	movdqa	xmm3,xmm0
   1104 	pslldq	xmm0,8
   1105 	psrldq	xmm3,8
   1106 	pxor	xmm0,xmm4
   1107 	pxor	xmm1,xmm3
   1108 
   1109 
   1110 	movdqa	xmm4,xmm0
   1111 	psrlq	xmm0,1
   1112 	pxor	xmm1,xmm4
   1113 	pxor	xmm4,xmm0
   1114 	psrlq	xmm0,5
   1115 	pxor	xmm0,xmm4
   1116 	psrlq	xmm0,1
   1117 	pxor	xmm0,xmm1
   1118 	add	r9,0x40
   1119 	jz	NEAR $L$done
   1120 	movdqu	xmm7,XMMWORD[32+rdx]
   1121 	sub	r9,0x10
   1122 	jz	NEAR $L$odd_tail
   1123 $L$skip4x:
   1124 
   1125 
   1126 
   1127 
   1128 
   1129 	movdqu	xmm8,XMMWORD[r8]
   1130 	movdqu	xmm3,XMMWORD[16+r8]
   1131 DB	102,69,15,56,0,194
   1132 DB	102,65,15,56,0,218
   1133 	pxor	xmm0,xmm8
   1134 
   1135 	movdqa	xmm5,xmm3
   1136 	pshufd	xmm4,xmm3,78
   1137 	pxor	xmm4,xmm3
   1138 DB	102,15,58,68,218,0
   1139 DB	102,15,58,68,234,17
   1140 DB	102,15,58,68,231,0
   1141 
   1142 	lea	r8,[32+r8]
   1143 	nop
   1144 	sub	r9,0x20
   1145 	jbe	NEAR $L$even_tail
   1146 	nop
   1147 	jmp	NEAR $L$mod_loop
   1148 
   1149 ALIGN	32
   1150 $L$mod_loop:
   1151 	movdqa	xmm1,xmm0
   1152 	movdqa	xmm8,xmm4
   1153 	pshufd	xmm4,xmm0,78
   1154 	pxor	xmm4,xmm0
   1155 
   1156 DB	102,15,58,68,198,0
   1157 DB	102,15,58,68,206,17
   1158 DB	102,15,58,68,231,16
   1159 
   1160 	pxor	xmm0,xmm3
   1161 	pxor	xmm1,xmm5
   1162 	movdqu	xmm9,XMMWORD[r8]
   1163 	pxor	xmm8,xmm0
   1164 DB	102,69,15,56,0,202
   1165 	movdqu	xmm3,XMMWORD[16+r8]
   1166 
   1167 	pxor	xmm8,xmm1
   1168 	pxor	xmm1,xmm9
   1169 	pxor	xmm4,xmm8
   1170 DB	102,65,15,56,0,218
   1171 	movdqa	xmm8,xmm4
   1172 	psrldq	xmm8,8
   1173 	pslldq	xmm4,8
   1174 	pxor	xmm1,xmm8
   1175 	pxor	xmm0,xmm4
   1176 
   1177 	movdqa	xmm5,xmm3
   1178 
   1179 	movdqa	xmm9,xmm0
   1180 	movdqa	xmm8,xmm0
   1181 	psllq	xmm0,5
   1182 	pxor	xmm8,xmm0
   1183 DB	102,15,58,68,218,0
   1184 	psllq	xmm0,1
   1185 	pxor	xmm0,xmm8
   1186 	psllq	xmm0,57
   1187 	movdqa	xmm8,xmm0
   1188 	pslldq	xmm0,8
   1189 	psrldq	xmm8,8
   1190 	pxor	xmm0,xmm9
   1191 	pshufd	xmm4,xmm5,78
   1192 	pxor	xmm1,xmm8
   1193 	pxor	xmm4,xmm5
   1194 
   1195 	movdqa	xmm9,xmm0
   1196 	psrlq	xmm0,1
   1197 DB	102,15,58,68,234,17
   1198 	pxor	xmm1,xmm9
   1199 	pxor	xmm9,xmm0
   1200 	psrlq	xmm0,5
   1201 	pxor	xmm0,xmm9
   1202 	lea	r8,[32+r8]
   1203 	psrlq	xmm0,1
   1204 DB	102,15,58,68,231,0
   1205 	pxor	xmm0,xmm1
   1206 
   1207 	sub	r9,0x20
   1208 	ja	NEAR $L$mod_loop
   1209 
   1210 $L$even_tail:
   1211 	movdqa	xmm1,xmm0
   1212 	movdqa	xmm8,xmm4
   1213 	pshufd	xmm4,xmm0,78
   1214 	pxor	xmm4,xmm0
   1215 
   1216 DB	102,15,58,68,198,0
   1217 DB	102,15,58,68,206,17
   1218 DB	102,15,58,68,231,16
   1219 
   1220 	pxor	xmm0,xmm3
   1221 	pxor	xmm1,xmm5
   1222 	pxor	xmm8,xmm0
   1223 	pxor	xmm8,xmm1
   1224 	pxor	xmm4,xmm8
   1225 	movdqa	xmm8,xmm4
   1226 	psrldq	xmm8,8
   1227 	pslldq	xmm4,8
   1228 	pxor	xmm1,xmm8
   1229 	pxor	xmm0,xmm4
   1230 
   1231 	movdqa	xmm4,xmm0
   1232 	movdqa	xmm3,xmm0
   1233 	psllq	xmm0,5
   1234 	pxor	xmm3,xmm0
   1235 	psllq	xmm0,1
   1236 	pxor	xmm0,xmm3
   1237 	psllq	xmm0,57
   1238 	movdqa	xmm3,xmm0
   1239 	pslldq	xmm0,8
   1240 	psrldq	xmm3,8
   1241 	pxor	xmm0,xmm4
   1242 	pxor	xmm1,xmm3
   1243 
   1244 
   1245 	movdqa	xmm4,xmm0
   1246 	psrlq	xmm0,1
   1247 	pxor	xmm1,xmm4
   1248 	pxor	xmm4,xmm0
   1249 	psrlq	xmm0,5
   1250 	pxor	xmm0,xmm4
   1251 	psrlq	xmm0,1
   1252 	pxor	xmm0,xmm1
   1253 	test	r9,r9
   1254 	jnz	NEAR $L$done
   1255 
   1256 $L$odd_tail:
   1257 	movdqu	xmm8,XMMWORD[r8]
   1258 DB	102,69,15,56,0,194
   1259 	pxor	xmm0,xmm8
   1260 	movdqa	xmm1,xmm0
   1261 	pshufd	xmm3,xmm0,78
   1262 	pxor	xmm3,xmm0
   1263 DB	102,15,58,68,194,0
   1264 DB	102,15,58,68,202,17
   1265 DB	102,15,58,68,223,0
   1266 	pxor	xmm3,xmm0
   1267 	pxor	xmm3,xmm1
   1268 
   1269 	movdqa	xmm4,xmm3
   1270 	psrldq	xmm3,8
   1271 	pslldq	xmm4,8
   1272 	pxor	xmm1,xmm3
   1273 	pxor	xmm0,xmm4
   1274 
   1275 	movdqa	xmm4,xmm0
   1276 	movdqa	xmm3,xmm0
   1277 	psllq	xmm0,5
   1278 	pxor	xmm3,xmm0
   1279 	psllq	xmm0,1
   1280 	pxor	xmm0,xmm3
   1281 	psllq	xmm0,57
   1282 	movdqa	xmm3,xmm0
   1283 	pslldq	xmm0,8
   1284 	psrldq	xmm3,8
   1285 	pxor	xmm0,xmm4
   1286 	pxor	xmm1,xmm3
   1287 
   1288 
   1289 	movdqa	xmm4,xmm0
   1290 	psrlq	xmm0,1
   1291 	pxor	xmm1,xmm4
   1292 	pxor	xmm4,xmm0
   1293 	psrlq	xmm0,5
   1294 	pxor	xmm0,xmm4
   1295 	psrlq	xmm0,1
   1296 	pxor	xmm0,xmm1
   1297 $L$done:
   1298 DB	102,65,15,56,0,194
   1299 	movdqu	XMMWORD[rcx],xmm0
   1300 	movaps	xmm6,XMMWORD[rsp]
   1301 	movaps	xmm7,XMMWORD[16+rsp]
   1302 	movaps	xmm8,XMMWORD[32+rsp]
   1303 	movaps	xmm9,XMMWORD[48+rsp]
   1304 	movaps	xmm10,XMMWORD[64+rsp]
   1305 	movaps	xmm11,XMMWORD[80+rsp]
   1306 	movaps	xmm12,XMMWORD[96+rsp]
   1307 	movaps	xmm13,XMMWORD[112+rsp]
   1308 	movaps	xmm14,XMMWORD[128+rsp]
   1309 	movaps	xmm15,XMMWORD[144+rsp]
   1310 	lea	rsp,[168+rsp]
   1311 $L$SEH_end_gcm_ghash_clmul:
   1312 	DB	0F3h,0C3h		;repret
   1313 
   1314 global	gcm_init_avx
   1315 
   1316 ALIGN	32
   1317 gcm_init_avx:
   1318 $L$SEH_begin_gcm_init_avx:
   1319 
   1320 DB	0x48,0x83,0xec,0x18
   1321 DB	0x0f,0x29,0x34,0x24
   1322 	vzeroupper
   1323 
   1324 	vmovdqu	xmm2,XMMWORD[rdx]
   1325 	vpshufd	xmm2,xmm2,78
   1326 
   1327 
   1328 	vpshufd	xmm4,xmm2,255
   1329 	vpsrlq	xmm3,xmm2,63
   1330 	vpsllq	xmm2,xmm2,1
   1331 	vpxor	xmm5,xmm5,xmm5
   1332 	vpcmpgtd	xmm5,xmm5,xmm4
   1333 	vpslldq	xmm3,xmm3,8
   1334 	vpor	xmm2,xmm2,xmm3
   1335 
   1336 
   1337 	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
   1338 	vpxor	xmm2,xmm2,xmm5
   1339 
   1340 	vpunpckhqdq	xmm6,xmm2,xmm2
   1341 	vmovdqa	xmm0,xmm2
   1342 	vpxor	xmm6,xmm6,xmm2
   1343 	mov	r10,4
   1344 	jmp	NEAR $L$init_start_avx
   1345 ALIGN	32
   1346 $L$init_loop_avx:
   1347 	vpalignr	xmm5,xmm4,xmm3,8
   1348 	vmovdqu	XMMWORD[(-16)+rcx],xmm5
   1349 	vpunpckhqdq	xmm3,xmm0,xmm0
   1350 	vpxor	xmm3,xmm3,xmm0
   1351 	vpclmulqdq	xmm1,xmm0,xmm2,0x11
   1352 	vpclmulqdq	xmm0,xmm0,xmm2,0x00
   1353 	vpclmulqdq	xmm3,xmm3,xmm6,0x00
   1354 	vpxor	xmm4,xmm1,xmm0
   1355 	vpxor	xmm3,xmm3,xmm4
   1356 
   1357 	vpslldq	xmm4,xmm3,8
   1358 	vpsrldq	xmm3,xmm3,8
   1359 	vpxor	xmm0,xmm0,xmm4
   1360 	vpxor	xmm1,xmm1,xmm3
   1361 	vpsllq	xmm3,xmm0,57
   1362 	vpsllq	xmm4,xmm0,62
   1363 	vpxor	xmm4,xmm4,xmm3
   1364 	vpsllq	xmm3,xmm0,63
   1365 	vpxor	xmm4,xmm4,xmm3
   1366 	vpslldq	xmm3,xmm4,8
   1367 	vpsrldq	xmm4,xmm4,8
   1368 	vpxor	xmm0,xmm0,xmm3
   1369 	vpxor	xmm1,xmm1,xmm4
   1370 
   1371 	vpsrlq	xmm4,xmm0,1
   1372 	vpxor	xmm1,xmm1,xmm0
   1373 	vpxor	xmm0,xmm0,xmm4
   1374 	vpsrlq	xmm4,xmm4,5
   1375 	vpxor	xmm0,xmm0,xmm4
   1376 	vpsrlq	xmm0,xmm0,1
   1377 	vpxor	xmm0,xmm0,xmm1
   1378 $L$init_start_avx:
   1379 	vmovdqa	xmm5,xmm0
   1380 	vpunpckhqdq	xmm3,xmm0,xmm0
   1381 	vpxor	xmm3,xmm3,xmm0
   1382 	vpclmulqdq	xmm1,xmm0,xmm2,0x11
   1383 	vpclmulqdq	xmm0,xmm0,xmm2,0x00
   1384 	vpclmulqdq	xmm3,xmm3,xmm6,0x00
   1385 	vpxor	xmm4,xmm1,xmm0
   1386 	vpxor	xmm3,xmm3,xmm4
   1387 
   1388 	vpslldq	xmm4,xmm3,8
   1389 	vpsrldq	xmm3,xmm3,8
   1390 	vpxor	xmm0,xmm0,xmm4
   1391 	vpxor	xmm1,xmm1,xmm3
   1392 	vpsllq	xmm3,xmm0,57
   1393 	vpsllq	xmm4,xmm0,62
   1394 	vpxor	xmm4,xmm4,xmm3
   1395 	vpsllq	xmm3,xmm0,63
   1396 	vpxor	xmm4,xmm4,xmm3
   1397 	vpslldq	xmm3,xmm4,8
   1398 	vpsrldq	xmm4,xmm4,8
   1399 	vpxor	xmm0,xmm0,xmm3
   1400 	vpxor	xmm1,xmm1,xmm4
   1401 
   1402 	vpsrlq	xmm4,xmm0,1
   1403 	vpxor	xmm1,xmm1,xmm0
   1404 	vpxor	xmm0,xmm0,xmm4
   1405 	vpsrlq	xmm4,xmm4,5
   1406 	vpxor	xmm0,xmm0,xmm4
   1407 	vpsrlq	xmm0,xmm0,1
   1408 	vpxor	xmm0,xmm0,xmm1
   1409 	vpshufd	xmm3,xmm5,78
   1410 	vpshufd	xmm4,xmm0,78
   1411 	vpxor	xmm3,xmm3,xmm5
   1412 	vmovdqu	XMMWORD[rcx],xmm5
   1413 	vpxor	xmm4,xmm4,xmm0
   1414 	vmovdqu	XMMWORD[16+rcx],xmm0
   1415 	lea	rcx,[48+rcx]
   1416 	sub	r10,1
   1417 	jnz	NEAR $L$init_loop_avx
   1418 
   1419 	vpalignr	xmm5,xmm3,xmm4,8
   1420 	vmovdqu	XMMWORD[(-16)+rcx],xmm5
   1421 
   1422 	vzeroupper
   1423 	movaps	xmm6,XMMWORD[rsp]
   1424 	lea	rsp,[24+rsp]
   1425 $L$SEH_end_gcm_init_avx:
   1426 	DB	0F3h,0C3h		;repret
   1427 
   1428 global	gcm_gmult_avx
   1429 
   1430 ALIGN	32
   1431 gcm_gmult_avx:
   1432 	jmp	NEAR $L$_gmult_clmul
   1433 
   1434 global	gcm_ghash_avx
   1435 
   1436 ALIGN	32
   1437 gcm_ghash_avx:
   1438 	lea	rax,[((-136))+rsp]
   1439 $L$SEH_begin_gcm_ghash_avx:
   1440 
   1441 DB	0x48,0x8d,0x60,0xe0
   1442 DB	0x0f,0x29,0x70,0xe0
   1443 DB	0x0f,0x29,0x78,0xf0
   1444 DB	0x44,0x0f,0x29,0x00
   1445 DB	0x44,0x0f,0x29,0x48,0x10
   1446 DB	0x44,0x0f,0x29,0x50,0x20
   1447 DB	0x44,0x0f,0x29,0x58,0x30
   1448 DB	0x44,0x0f,0x29,0x60,0x40
   1449 DB	0x44,0x0f,0x29,0x68,0x50
   1450 DB	0x44,0x0f,0x29,0x70,0x60
   1451 DB	0x44,0x0f,0x29,0x78,0x70
   1452 	vzeroupper
   1453 
   1454 	vmovdqu	xmm10,XMMWORD[rcx]
   1455 	lea	r10,[$L$0x1c2_polynomial]
   1456 	lea	rdx,[64+rdx]
   1457 	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
   1458 	vpshufb	xmm10,xmm10,xmm13
   1459 	cmp	r9,0x80
   1460 	jb	NEAR $L$short_avx
   1461 	sub	r9,0x80
   1462 
   1463 	vmovdqu	xmm14,XMMWORD[112+r8]
   1464 	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
   1465 	vpshufb	xmm14,xmm14,xmm13
   1466 	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
   1467 
   1468 	vpunpckhqdq	xmm9,xmm14,xmm14
   1469 	vmovdqu	xmm15,XMMWORD[96+r8]
   1470 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1471 	vpxor	xmm9,xmm9,xmm14
   1472 	vpshufb	xmm15,xmm15,xmm13
   1473 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1474 	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
   1475 	vpunpckhqdq	xmm8,xmm15,xmm15
   1476 	vmovdqu	xmm14,XMMWORD[80+r8]
   1477 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1478 	vpxor	xmm8,xmm8,xmm15
   1479 
   1480 	vpshufb	xmm14,xmm14,xmm13
   1481 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1482 	vpunpckhqdq	xmm9,xmm14,xmm14
   1483 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1484 	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
   1485 	vpxor	xmm9,xmm9,xmm14
   1486 	vmovdqu	xmm15,XMMWORD[64+r8]
   1487 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1488 	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
   1489 
   1490 	vpshufb	xmm15,xmm15,xmm13
   1491 	vpxor	xmm3,xmm3,xmm0
   1492 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1493 	vpxor	xmm4,xmm4,xmm1
   1494 	vpunpckhqdq	xmm8,xmm15,xmm15
   1495 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1496 	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
   1497 	vpxor	xmm5,xmm5,xmm2
   1498 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1499 	vpxor	xmm8,xmm8,xmm15
   1500 
   1501 	vmovdqu	xmm14,XMMWORD[48+r8]
   1502 	vpxor	xmm0,xmm0,xmm3
   1503 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1504 	vpxor	xmm1,xmm1,xmm4
   1505 	vpshufb	xmm14,xmm14,xmm13
   1506 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1507 	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
   1508 	vpxor	xmm2,xmm2,xmm5
   1509 	vpunpckhqdq	xmm9,xmm14,xmm14
   1510 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1511 	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
   1512 	vpxor	xmm9,xmm9,xmm14
   1513 
   1514 	vmovdqu	xmm15,XMMWORD[32+r8]
   1515 	vpxor	xmm3,xmm3,xmm0
   1516 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1517 	vpxor	xmm4,xmm4,xmm1
   1518 	vpshufb	xmm15,xmm15,xmm13
   1519 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1520 	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
   1521 	vpxor	xmm5,xmm5,xmm2
   1522 	vpunpckhqdq	xmm8,xmm15,xmm15
   1523 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1524 	vpxor	xmm8,xmm8,xmm15
   1525 
   1526 	vmovdqu	xmm14,XMMWORD[16+r8]
   1527 	vpxor	xmm0,xmm0,xmm3
   1528 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1529 	vpxor	xmm1,xmm1,xmm4
   1530 	vpshufb	xmm14,xmm14,xmm13
   1531 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1532 	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
   1533 	vpxor	xmm2,xmm2,xmm5
   1534 	vpunpckhqdq	xmm9,xmm14,xmm14
   1535 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1536 	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
   1537 	vpxor	xmm9,xmm9,xmm14
   1538 
   1539 	vmovdqu	xmm15,XMMWORD[r8]
   1540 	vpxor	xmm3,xmm3,xmm0
   1541 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1542 	vpxor	xmm4,xmm4,xmm1
   1543 	vpshufb	xmm15,xmm15,xmm13
   1544 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1545 	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
   1546 	vpxor	xmm5,xmm5,xmm2
   1547 	vpclmulqdq	xmm2,xmm9,xmm7,0x10
   1548 
   1549 	lea	r8,[128+r8]
   1550 	cmp	r9,0x80
   1551 	jb	NEAR $L$tail_avx
   1552 
   1553 	vpxor	xmm15,xmm15,xmm10
   1554 	sub	r9,0x80
   1555 	jmp	NEAR $L$oop8x_avx
   1556 
   1557 ALIGN	32
   1558 $L$oop8x_avx:
   1559 	vpunpckhqdq	xmm8,xmm15,xmm15
   1560 	vmovdqu	xmm14,XMMWORD[112+r8]
   1561 	vpxor	xmm3,xmm3,xmm0
   1562 	vpxor	xmm8,xmm8,xmm15
   1563 	vpclmulqdq	xmm10,xmm15,xmm6,0x00
   1564 	vpshufb	xmm14,xmm14,xmm13
   1565 	vpxor	xmm4,xmm4,xmm1
   1566 	vpclmulqdq	xmm11,xmm15,xmm6,0x11
   1567 	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
   1568 	vpunpckhqdq	xmm9,xmm14,xmm14
   1569 	vpxor	xmm5,xmm5,xmm2
   1570 	vpclmulqdq	xmm12,xmm8,xmm7,0x00
   1571 	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
   1572 	vpxor	xmm9,xmm9,xmm14
   1573 
   1574 	vmovdqu	xmm15,XMMWORD[96+r8]
   1575 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1576 	vpxor	xmm10,xmm10,xmm3
   1577 	vpshufb	xmm15,xmm15,xmm13
   1578 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1579 	vxorps	xmm11,xmm11,xmm4
   1580 	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
   1581 	vpunpckhqdq	xmm8,xmm15,xmm15
   1582 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1583 	vpxor	xmm12,xmm12,xmm5
   1584 	vxorps	xmm8,xmm8,xmm15
   1585 
   1586 	vmovdqu	xmm14,XMMWORD[80+r8]
   1587 	vpxor	xmm12,xmm12,xmm10
   1588 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1589 	vpxor	xmm12,xmm12,xmm11
   1590 	vpslldq	xmm9,xmm12,8
   1591 	vpxor	xmm3,xmm3,xmm0
   1592 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1593 	vpsrldq	xmm12,xmm12,8
   1594 	vpxor	xmm10,xmm10,xmm9
   1595 	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
   1596 	vpshufb	xmm14,xmm14,xmm13
   1597 	vxorps	xmm11,xmm11,xmm12
   1598 	vpxor	xmm4,xmm4,xmm1
   1599 	vpunpckhqdq	xmm9,xmm14,xmm14
   1600 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1601 	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
   1602 	vpxor	xmm9,xmm9,xmm14
   1603 	vpxor	xmm5,xmm5,xmm2
   1604 
   1605 	vmovdqu	xmm15,XMMWORD[64+r8]
   1606 	vpalignr	xmm12,xmm10,xmm10,8
   1607 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1608 	vpshufb	xmm15,xmm15,xmm13
   1609 	vpxor	xmm0,xmm0,xmm3
   1610 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1611 	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
   1612 	vpunpckhqdq	xmm8,xmm15,xmm15
   1613 	vpxor	xmm1,xmm1,xmm4
   1614 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1615 	vxorps	xmm8,xmm8,xmm15
   1616 	vpxor	xmm2,xmm2,xmm5
   1617 
   1618 	vmovdqu	xmm14,XMMWORD[48+r8]
   1619 	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
   1620 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1621 	vpshufb	xmm14,xmm14,xmm13
   1622 	vpxor	xmm3,xmm3,xmm0
   1623 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1624 	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
   1625 	vpunpckhqdq	xmm9,xmm14,xmm14
   1626 	vpxor	xmm4,xmm4,xmm1
   1627 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1628 	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
   1629 	vpxor	xmm9,xmm9,xmm14
   1630 	vpxor	xmm5,xmm5,xmm2
   1631 
   1632 	vmovdqu	xmm15,XMMWORD[32+r8]
   1633 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1634 	vpshufb	xmm15,xmm15,xmm13
   1635 	vpxor	xmm0,xmm0,xmm3
   1636 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1637 	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
   1638 	vpunpckhqdq	xmm8,xmm15,xmm15
   1639 	vpxor	xmm1,xmm1,xmm4
   1640 	vpclmulqdq	xmm2,xmm9,xmm7,0x00
   1641 	vpxor	xmm8,xmm8,xmm15
   1642 	vpxor	xmm2,xmm2,xmm5
   1643 	vxorps	xmm10,xmm10,xmm12
   1644 
   1645 	vmovdqu	xmm14,XMMWORD[16+r8]
   1646 	vpalignr	xmm12,xmm10,xmm10,8
   1647 	vpclmulqdq	xmm3,xmm15,xmm6,0x00
   1648 	vpshufb	xmm14,xmm14,xmm13
   1649 	vpxor	xmm3,xmm3,xmm0
   1650 	vpclmulqdq	xmm4,xmm15,xmm6,0x11
   1651 	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
   1652 	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
   1653 	vxorps	xmm12,xmm12,xmm11
   1654 	vpunpckhqdq	xmm9,xmm14,xmm14
   1655 	vpxor	xmm4,xmm4,xmm1
   1656 	vpclmulqdq	xmm5,xmm8,xmm7,0x10
   1657 	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
   1658 	vpxor	xmm9,xmm9,xmm14
   1659 	vpxor	xmm5,xmm5,xmm2
   1660 
   1661 	vmovdqu	xmm15,XMMWORD[r8]
   1662 	vpclmulqdq	xmm0,xmm14,xmm6,0x00
   1663 	vpshufb	xmm15,xmm15,xmm13
   1664 	vpclmulqdq	xmm1,xmm14,xmm6,0x11
   1665 	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
   1666 	vpxor	xmm15,xmm15,xmm12
   1667 	vpclmulqdq	xmm2,xmm9,xmm7,0x10
   1668 	vpxor	xmm15,xmm15,xmm10
   1669 
   1670 	lea	r8,[128+r8]
   1671 	sub	r9,0x80
   1672 	jnc	NEAR $L$oop8x_avx
   1673 
   1674 	add	r9,0x80
   1675 	jmp	NEAR $L$tail_no_xor_avx
   1676 
   1677 ALIGN	32
   1678 $L$short_avx:
   1679 	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
   1680 	lea	r8,[r9*1+r8]
   1681 	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
   1682 	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
   1683 	vpshufb	xmm15,xmm14,xmm13
   1684 
   1685 	vmovdqa	xmm3,xmm0
   1686 	vmovdqa	xmm4,xmm1
   1687 	vmovdqa	xmm5,xmm2
   1688 	sub	r9,0x10
   1689 	jz	NEAR $L$tail_avx
   1690 
   1691 	vpunpckhqdq	xmm8,xmm15,xmm15
   1692 	vpxor	xmm3,xmm3,xmm0
   1693 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1694 	vpxor	xmm8,xmm8,xmm15
   1695 	vmovdqu	xmm14,XMMWORD[((-32))+r8]
   1696 	vpxor	xmm4,xmm4,xmm1
   1697 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1698 	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
   1699 	vpshufb	xmm15,xmm14,xmm13
   1700 	vpxor	xmm5,xmm5,xmm2
   1701 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1702 	vpsrldq	xmm7,xmm7,8
   1703 	sub	r9,0x10
   1704 	jz	NEAR $L$tail_avx
   1705 
   1706 	vpunpckhqdq	xmm8,xmm15,xmm15
   1707 	vpxor	xmm3,xmm3,xmm0
   1708 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1709 	vpxor	xmm8,xmm8,xmm15
   1710 	vmovdqu	xmm14,XMMWORD[((-48))+r8]
   1711 	vpxor	xmm4,xmm4,xmm1
   1712 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1713 	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
   1714 	vpshufb	xmm15,xmm14,xmm13
   1715 	vpxor	xmm5,xmm5,xmm2
   1716 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1717 	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
   1718 	sub	r9,0x10
   1719 	jz	NEAR $L$tail_avx
   1720 
   1721 	vpunpckhqdq	xmm8,xmm15,xmm15
   1722 	vpxor	xmm3,xmm3,xmm0
   1723 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1724 	vpxor	xmm8,xmm8,xmm15
   1725 	vmovdqu	xmm14,XMMWORD[((-64))+r8]
   1726 	vpxor	xmm4,xmm4,xmm1
   1727 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1728 	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
   1729 	vpshufb	xmm15,xmm14,xmm13
   1730 	vpxor	xmm5,xmm5,xmm2
   1731 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1732 	vpsrldq	xmm7,xmm7,8
   1733 	sub	r9,0x10
   1734 	jz	NEAR $L$tail_avx
   1735 
   1736 	vpunpckhqdq	xmm8,xmm15,xmm15
   1737 	vpxor	xmm3,xmm3,xmm0
   1738 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1739 	vpxor	xmm8,xmm8,xmm15
   1740 	vmovdqu	xmm14,XMMWORD[((-80))+r8]
   1741 	vpxor	xmm4,xmm4,xmm1
   1742 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1743 	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
   1744 	vpshufb	xmm15,xmm14,xmm13
   1745 	vpxor	xmm5,xmm5,xmm2
   1746 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1747 	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
   1748 	sub	r9,0x10
   1749 	jz	NEAR $L$tail_avx
   1750 
   1751 	vpunpckhqdq	xmm8,xmm15,xmm15
   1752 	vpxor	xmm3,xmm3,xmm0
   1753 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1754 	vpxor	xmm8,xmm8,xmm15
   1755 	vmovdqu	xmm14,XMMWORD[((-96))+r8]
   1756 	vpxor	xmm4,xmm4,xmm1
   1757 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1758 	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
   1759 	vpshufb	xmm15,xmm14,xmm13
   1760 	vpxor	xmm5,xmm5,xmm2
   1761 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1762 	vpsrldq	xmm7,xmm7,8
   1763 	sub	r9,0x10
   1764 	jz	NEAR $L$tail_avx
   1765 
   1766 	vpunpckhqdq	xmm8,xmm15,xmm15
   1767 	vpxor	xmm3,xmm3,xmm0
   1768 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1769 	vpxor	xmm8,xmm8,xmm15
   1770 	vmovdqu	xmm14,XMMWORD[((-112))+r8]
   1771 	vpxor	xmm4,xmm4,xmm1
   1772 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1773 	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
   1774 	vpshufb	xmm15,xmm14,xmm13
   1775 	vpxor	xmm5,xmm5,xmm2
   1776 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1777 	vmovq	xmm7,QWORD[((184-64))+rdx]
   1778 	sub	r9,0x10
   1779 	jmp	NEAR $L$tail_avx
   1780 
   1781 ALIGN	32
   1782 $L$tail_avx:
   1783 	vpxor	xmm15,xmm15,xmm10
   1784 $L$tail_no_xor_avx:
   1785 	vpunpckhqdq	xmm8,xmm15,xmm15
   1786 	vpxor	xmm3,xmm3,xmm0
   1787 	vpclmulqdq	xmm0,xmm15,xmm6,0x00
   1788 	vpxor	xmm8,xmm8,xmm15
   1789 	vpxor	xmm4,xmm4,xmm1
   1790 	vpclmulqdq	xmm1,xmm15,xmm6,0x11
   1791 	vpxor	xmm5,xmm5,xmm2
   1792 	vpclmulqdq	xmm2,xmm8,xmm7,0x00
   1793 
   1794 	vmovdqu	xmm12,XMMWORD[r10]
   1795 
   1796 	vpxor	xmm10,xmm3,xmm0
   1797 	vpxor	xmm11,xmm4,xmm1
   1798 	vpxor	xmm5,xmm5,xmm2
   1799 
   1800 	vpxor	xmm5,xmm5,xmm10
   1801 	vpxor	xmm5,xmm5,xmm11
   1802 	vpslldq	xmm9,xmm5,8
   1803 	vpsrldq	xmm5,xmm5,8
   1804 	vpxor	xmm10,xmm10,xmm9
   1805 	vpxor	xmm11,xmm11,xmm5
   1806 
   1807 	vpclmulqdq	xmm9,xmm10,xmm12,0x10
   1808 	vpalignr	xmm10,xmm10,xmm10,8
   1809 	vpxor	xmm10,xmm10,xmm9
   1810 
   1811 	vpclmulqdq	xmm9,xmm10,xmm12,0x10
   1812 	vpalignr	xmm10,xmm10,xmm10,8
   1813 	vpxor	xmm10,xmm10,xmm11
   1814 	vpxor	xmm10,xmm10,xmm9
   1815 
   1816 	cmp	r9,0
   1817 	jne	NEAR $L$short_avx
   1818 
   1819 	vpshufb	xmm10,xmm10,xmm13
   1820 	vmovdqu	XMMWORD[rcx],xmm10
   1821 	vzeroupper
   1822 	movaps	xmm6,XMMWORD[rsp]
   1823 	movaps	xmm7,XMMWORD[16+rsp]
   1824 	movaps	xmm8,XMMWORD[32+rsp]
   1825 	movaps	xmm9,XMMWORD[48+rsp]
   1826 	movaps	xmm10,XMMWORD[64+rsp]
   1827 	movaps	xmm11,XMMWORD[80+rsp]
   1828 	movaps	xmm12,XMMWORD[96+rsp]
   1829 	movaps	xmm13,XMMWORD[112+rsp]
   1830 	movaps	xmm14,XMMWORD[128+rsp]
   1831 	movaps	xmm15,XMMWORD[144+rsp]
   1832 	lea	rsp,[168+rsp]
   1833 $L$SEH_end_gcm_ghash_avx:
   1834 	DB	0F3h,0C3h		;repret
   1835 
   1836 ALIGN	64
   1837 $L$bswap_mask:
   1838 DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1839 $L$0x1c2_polynomial:
   1840 DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
   1841 $L$7_mask:
   1842 	DD	7,0,7,0
   1843 $L$7_mask_poly:
   1844 	DD	7,0,450,0
   1845 ALIGN	64
   1846 
   1847 $L$rem_4bit:
   1848 	DD	0,0,0,471859200,0,943718400,0,610271232
   1849 	DD	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1850 	DD	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1851 	DD	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1852 
   1853 $L$rem_8bit:
   1854 	DW	0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E
   1855 	DW	0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E
   1856 	DW	0x1C20,0x1DE2,0x1FA4,0x1E66,0x1B28,0x1AEA,0x18AC,0x196E
   1857 	DW	0x1230,0x13F2,0x11B4,0x1076,0x1538,0x14FA,0x16BC,0x177E
   1858 	DW	0x3840,0x3982,0x3BC4,0x3A06,0x3F48,0x3E8A,0x3CCC,0x3D0E
   1859 	DW	0x3650,0x3792,0x35D4,0x3416,0x3158,0x309A,0x32DC,0x331E
   1860 	DW	0x2460,0x25A2,0x27E4,0x2626,0x2368,0x22AA,0x20EC,0x212E
   1861 	DW	0x2A70,0x2BB2,0x29F4,0x2836,0x2D78,0x2CBA,0x2EFC,0x2F3E
   1862 	DW	0x7080,0x7142,0x7304,0x72C6,0x7788,0x764A,0x740C,0x75CE
   1863 	DW	0x7E90,0x7F52,0x7D14,0x7CD6,0x7998,0x785A,0x7A1C,0x7BDE
   1864 	DW	0x6CA0,0x6D62,0x6F24,0x6EE6,0x6BA8,0x6A6A,0x682C,0x69EE
   1865 	DW	0x62B0,0x6372,0x6134,0x60F6,0x65B8,0x647A,0x663C,0x67FE
   1866 	DW	0x48C0,0x4902,0x4B44,0x4A86,0x4FC8,0x4E0A,0x4C4C,0x4D8E
   1867 	DW	0x46D0,0x4712,0x4554,0x4496,0x41D8,0x401A,0x425C,0x439E
   1868 	DW	0x54E0,0x5522,0x5764,0x56A6,0x53E8,0x522A,0x506C,0x51AE
   1869 	DW	0x5AF0,0x5B32,0x5974,0x58B6,0x5DF8,0x5C3A,0x5E7C,0x5FBE
   1870 	DW	0xE100,0xE0C2,0xE284,0xE346,0xE608,0xE7CA,0xE58C,0xE44E
   1871 	DW	0xEF10,0xEED2,0xEC94,0xED56,0xE818,0xE9DA,0xEB9C,0xEA5E
   1872 	DW	0xFD20,0xFCE2,0xFEA4,0xFF66,0xFA28,0xFBEA,0xF9AC,0xF86E
   1873 	DW	0xF330,0xF2F2,0xF0B4,0xF176,0xF438,0xF5FA,0xF7BC,0xF67E
   1874 	DW	0xD940,0xD882,0xDAC4,0xDB06,0xDE48,0xDF8A,0xDDCC,0xDC0E
   1875 	DW	0xD750,0xD692,0xD4D4,0xD516,0xD058,0xD19A,0xD3DC,0xD21E
   1876 	DW	0xC560,0xC4A2,0xC6E4,0xC726,0xC268,0xC3AA,0xC1EC,0xC02E
   1877 	DW	0xCB70,0xCAB2,0xC8F4,0xC936,0xCC78,0xCDBA,0xCFFC,0xCE3E
   1878 	DW	0x9180,0x9042,0x9204,0x93C6,0x9688,0x974A,0x950C,0x94CE
   1879 	DW	0x9F90,0x9E52,0x9C14,0x9DD6,0x9898,0x995A,0x9B1C,0x9ADE
   1880 	DW	0x8DA0,0x8C62,0x8E24,0x8FE6,0x8AA8,0x8B6A,0x892C,0x88EE
   1881 	DW	0x83B0,0x8272,0x8034,0x81F6,0x84B8,0x857A,0x873C,0x86FE
   1882 	DW	0xA9C0,0xA802,0xAA44,0xAB86,0xAEC8,0xAF0A,0xAD4C,0xAC8E
   1883 	DW	0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E
   1884 	DW	0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE
   1885 	DW	0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE
   1886 
   1887 DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
   1888 DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
   1889 DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
   1890 DB	114,103,62,0
   1891 ALIGN	64
   1892 EXTERN	__imp_RtlVirtualUnwind
   1893 
   1894 ALIGN	16
   1895 se_handler:
   1896 	push	rsi
   1897 	push	rdi
   1898 	push	rbx
   1899 	push	rbp
   1900 	push	r12
   1901 	push	r13
   1902 	push	r14
   1903 	push	r15
   1904 	pushfq
   1905 	sub	rsp,64
   1906 
   1907 	mov	rax,QWORD[120+r8]
   1908 	mov	rbx,QWORD[248+r8]
   1909 
   1910 	mov	rsi,QWORD[8+r9]
   1911 	mov	r11,QWORD[56+r9]
   1912 
   1913 	mov	r10d,DWORD[r11]
   1914 	lea	r10,[r10*1+rsi]
   1915 	cmp	rbx,r10
   1916 	jb	NEAR $L$in_prologue
   1917 
   1918 	mov	rax,QWORD[152+r8]
   1919 
   1920 	mov	r10d,DWORD[4+r11]
   1921 	lea	r10,[r10*1+rsi]
   1922 	cmp	rbx,r10
   1923 	jae	NEAR $L$in_prologue
   1924 
   1925 	lea	rax,[((48+280))+rax]
   1926 
   1927 	mov	rbx,QWORD[((-8))+rax]
   1928 	mov	rbp,QWORD[((-16))+rax]
   1929 	mov	r12,QWORD[((-24))+rax]
   1930 	mov	r13,QWORD[((-32))+rax]
   1931 	mov	r14,QWORD[((-40))+rax]
   1932 	mov	r15,QWORD[((-48))+rax]
   1933 	mov	QWORD[144+r8],rbx
   1934 	mov	QWORD[160+r8],rbp
   1935 	mov	QWORD[216+r8],r12
   1936 	mov	QWORD[224+r8],r13
   1937 	mov	QWORD[232+r8],r14
   1938 	mov	QWORD[240+r8],r15
   1939 
   1940 $L$in_prologue:
   1941 	mov	rdi,QWORD[8+rax]
   1942 	mov	rsi,QWORD[16+rax]
   1943 	mov	QWORD[152+r8],rax
   1944 	mov	QWORD[168+r8],rsi
   1945 	mov	QWORD[176+r8],rdi
   1946 
   1947 	mov	rdi,QWORD[40+r9]
   1948 	mov	rsi,r8
   1949 	mov	ecx,154
   1950 	DD	0xa548f3fc
   1951 
   1952 	mov	rsi,r9
   1953 	xor	rcx,rcx
   1954 	mov	rdx,QWORD[8+rsi]
   1955 	mov	r8,QWORD[rsi]
   1956 	mov	r9,QWORD[16+rsi]
   1957 	mov	r10,QWORD[40+rsi]
   1958 	lea	r11,[56+rsi]
   1959 	lea	r12,[24+rsi]
   1960 	mov	QWORD[32+rsp],r10
   1961 	mov	QWORD[40+rsp],r11
   1962 	mov	QWORD[48+rsp],r12
   1963 	mov	QWORD[56+rsp],rcx
   1964 	call	QWORD[__imp_RtlVirtualUnwind]
   1965 
   1966 	mov	eax,1
   1967 	add	rsp,64
   1968 	popfq
   1969 	pop	r15
   1970 	pop	r14
   1971 	pop	r13
   1972 	pop	r12
   1973 	pop	rbp
   1974 	pop	rbx
   1975 	pop	rdi
   1976 	pop	rsi
   1977 	DB	0F3h,0C3h		;repret
   1978 
   1979 
   1980 section	.pdata rdata align=4
   1981 ALIGN	4
   1982 	DD	$L$SEH_begin_gcm_gmult_4bit wrt ..imagebase
   1983 	DD	$L$SEH_end_gcm_gmult_4bit wrt ..imagebase
   1984 	DD	$L$SEH_info_gcm_gmult_4bit wrt ..imagebase
   1985 
   1986 	DD	$L$SEH_begin_gcm_ghash_4bit wrt ..imagebase
   1987 	DD	$L$SEH_end_gcm_ghash_4bit wrt ..imagebase
   1988 	DD	$L$SEH_info_gcm_ghash_4bit wrt ..imagebase
   1989 
   1990 	DD	$L$SEH_begin_gcm_init_clmul wrt ..imagebase
   1991 	DD	$L$SEH_end_gcm_init_clmul wrt ..imagebase
   1992 	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
   1993 
   1994 	DD	$L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
   1995 	DD	$L$SEH_end_gcm_ghash_clmul wrt ..imagebase
   1996 	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
   1997 	DD	$L$SEH_begin_gcm_init_avx wrt ..imagebase
   1998 	DD	$L$SEH_end_gcm_init_avx wrt ..imagebase
   1999 	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
   2000 
   2001 	DD	$L$SEH_begin_gcm_ghash_avx wrt ..imagebase
   2002 	DD	$L$SEH_end_gcm_ghash_avx wrt ..imagebase
   2003 	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
   2004 section	.xdata rdata align=8
   2005 ALIGN	8
   2006 $L$SEH_info_gcm_gmult_4bit:
   2007 DB	9,0,0,0
   2008 	DD	se_handler wrt ..imagebase
   2009 	DD	$L$gmult_prologue wrt ..imagebase,$L$gmult_epilogue wrt ..imagebase
   2010 $L$SEH_info_gcm_ghash_4bit:
   2011 DB	9,0,0,0
   2012 	DD	se_handler wrt ..imagebase
   2013 	DD	$L$ghash_prologue wrt ..imagebase,$L$ghash_epilogue wrt ..imagebase
   2014 $L$SEH_info_gcm_init_clmul:
   2015 DB	0x01,0x08,0x03,0x00
   2016 DB	0x08,0x68,0x00,0x00
   2017 DB	0x04,0x22,0x00,0x00
   2018 $L$SEH_info_gcm_ghash_clmul:
   2019 DB	0x01,0x33,0x16,0x00
   2020 DB	0x33,0xf8,0x09,0x00
   2021 DB	0x2e,0xe8,0x08,0x00
   2022 DB	0x29,0xd8,0x07,0x00
   2023 DB	0x24,0xc8,0x06,0x00
   2024 DB	0x1f,0xb8,0x05,0x00
   2025 DB	0x1a,0xa8,0x04,0x00
   2026 DB	0x15,0x98,0x03,0x00
   2027 DB	0x10,0x88,0x02,0x00
   2028 DB	0x0c,0x78,0x01,0x00
   2029 DB	0x08,0x68,0x00,0x00
   2030 DB	0x04,0x01,0x15,0x00
   2031