Home | History | Annotate | Download | only in modes
      1 %ifidn __OUTPUT_FORMAT__,obj
      2 section	code	use32 class=code align=64
      3 %elifidn __OUTPUT_FORMAT__,win32
      4 %ifdef __YASM_VERSION_ID__
      5 %if __YASM_VERSION_ID__ < 01010000h
      6 %error yasm version 1.1.0 or later needed.
      7 %endif
      8 ; Yasm automatically includes .00 and complains about redefining it.
      9 ; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html
     10 %else
     11 $@feat.00 equ 1
     12 %endif
     13 section	.text	code align=64
     14 %else
     15 section	.text	code
     16 %endif
     17 global	_gcm_gmult_4bit_x86
     18 align	16
     19 _gcm_gmult_4bit_x86:
     20 L$_gcm_gmult_4bit_x86_begin:
     21 	push	ebp
     22 	push	ebx
     23 	push	esi
     24 	push	edi
     25 	sub	esp,84
     26 	mov	edi,DWORD [104+esp]
     27 	mov	esi,DWORD [108+esp]
     28 	mov	ebp,DWORD [edi]
     29 	mov	edx,DWORD [4+edi]
     30 	mov	ecx,DWORD [8+edi]
     31 	mov	ebx,DWORD [12+edi]
     32 	mov	DWORD [16+esp],0
     33 	mov	DWORD [20+esp],471859200
     34 	mov	DWORD [24+esp],943718400
     35 	mov	DWORD [28+esp],610271232
     36 	mov	DWORD [32+esp],1887436800
     37 	mov	DWORD [36+esp],1822425088
     38 	mov	DWORD [40+esp],1220542464
     39 	mov	DWORD [44+esp],1423966208
     40 	mov	DWORD [48+esp],3774873600
     41 	mov	DWORD [52+esp],4246732800
     42 	mov	DWORD [56+esp],3644850176
     43 	mov	DWORD [60+esp],3311403008
     44 	mov	DWORD [64+esp],2441084928
     45 	mov	DWORD [68+esp],2376073216
     46 	mov	DWORD [72+esp],2847932416
     47 	mov	DWORD [76+esp],3051356160
     48 	mov	DWORD [esp],ebp
     49 	mov	DWORD [4+esp],edx
     50 	mov	DWORD [8+esp],ecx
     51 	mov	DWORD [12+esp],ebx
     52 	shr	ebx,20
     53 	and	ebx,240
     54 	mov	ebp,DWORD [4+ebx*1+esi]
     55 	mov	edx,DWORD [ebx*1+esi]
     56 	mov	ecx,DWORD [12+ebx*1+esi]
     57 	mov	ebx,DWORD [8+ebx*1+esi]
     58 	xor	eax,eax
     59 	mov	edi,15
     60 	jmp	NEAR L$000x86_loop
     61 align	16
     62 L$000x86_loop:
     63 	mov	al,bl
     64 	shrd	ebx,ecx,4
     65 	and	al,15
     66 	shrd	ecx,edx,4
     67 	shrd	edx,ebp,4
     68 	shr	ebp,4
     69 	xor	ebp,DWORD [16+eax*4+esp]
     70 	mov	al,BYTE [edi*1+esp]
     71 	and	al,240
     72 	xor	ebx,DWORD [8+eax*1+esi]
     73 	xor	ecx,DWORD [12+eax*1+esi]
     74 	xor	edx,DWORD [eax*1+esi]
     75 	xor	ebp,DWORD [4+eax*1+esi]
     76 	dec	edi
     77 	js	NEAR L$001x86_break
     78 	mov	al,bl
     79 	shrd	ebx,ecx,4
     80 	and	al,15
     81 	shrd	ecx,edx,4
     82 	shrd	edx,ebp,4
     83 	shr	ebp,4
     84 	xor	ebp,DWORD [16+eax*4+esp]
     85 	mov	al,BYTE [edi*1+esp]
     86 	shl	al,4
     87 	xor	ebx,DWORD [8+eax*1+esi]
     88 	xor	ecx,DWORD [12+eax*1+esi]
     89 	xor	edx,DWORD [eax*1+esi]
     90 	xor	ebp,DWORD [4+eax*1+esi]
     91 	jmp	NEAR L$000x86_loop
     92 align	16
     93 L$001x86_break:
     94 	bswap	ebx
     95 	bswap	ecx
     96 	bswap	edx
     97 	bswap	ebp
     98 	mov	edi,DWORD [104+esp]
     99 	mov	DWORD [12+edi],ebx
    100 	mov	DWORD [8+edi],ecx
    101 	mov	DWORD [4+edi],edx
    102 	mov	DWORD [edi],ebp
    103 	add	esp,84
    104 	pop	edi
    105 	pop	esi
    106 	pop	ebx
    107 	pop	ebp
    108 	ret
    109 global	_gcm_ghash_4bit_x86
    110 align	16
    111 _gcm_ghash_4bit_x86:
    112 L$_gcm_ghash_4bit_x86_begin:
    113 	push	ebp
    114 	push	ebx
    115 	push	esi
    116 	push	edi
    117 	sub	esp,84
    118 	mov	ebx,DWORD [104+esp]
    119 	mov	esi,DWORD [108+esp]
    120 	mov	edi,DWORD [112+esp]
    121 	mov	ecx,DWORD [116+esp]
    122 	add	ecx,edi
    123 	mov	DWORD [116+esp],ecx
    124 	mov	ebp,DWORD [ebx]
    125 	mov	edx,DWORD [4+ebx]
    126 	mov	ecx,DWORD [8+ebx]
    127 	mov	ebx,DWORD [12+ebx]
    128 	mov	DWORD [16+esp],0
    129 	mov	DWORD [20+esp],471859200
    130 	mov	DWORD [24+esp],943718400
    131 	mov	DWORD [28+esp],610271232
    132 	mov	DWORD [32+esp],1887436800
    133 	mov	DWORD [36+esp],1822425088
    134 	mov	DWORD [40+esp],1220542464
    135 	mov	DWORD [44+esp],1423966208
    136 	mov	DWORD [48+esp],3774873600
    137 	mov	DWORD [52+esp],4246732800
    138 	mov	DWORD [56+esp],3644850176
    139 	mov	DWORD [60+esp],3311403008
    140 	mov	DWORD [64+esp],2441084928
    141 	mov	DWORD [68+esp],2376073216
    142 	mov	DWORD [72+esp],2847932416
    143 	mov	DWORD [76+esp],3051356160
    144 align	16
    145 L$002x86_outer_loop:
    146 	xor	ebx,DWORD [12+edi]
    147 	xor	ecx,DWORD [8+edi]
    148 	xor	edx,DWORD [4+edi]
    149 	xor	ebp,DWORD [edi]
    150 	mov	DWORD [12+esp],ebx
    151 	mov	DWORD [8+esp],ecx
    152 	mov	DWORD [4+esp],edx
    153 	mov	DWORD [esp],ebp
    154 	shr	ebx,20
    155 	and	ebx,240
    156 	mov	ebp,DWORD [4+ebx*1+esi]
    157 	mov	edx,DWORD [ebx*1+esi]
    158 	mov	ecx,DWORD [12+ebx*1+esi]
    159 	mov	ebx,DWORD [8+ebx*1+esi]
    160 	xor	eax,eax
    161 	mov	edi,15
    162 	jmp	NEAR L$003x86_loop
    163 align	16
    164 L$003x86_loop:
    165 	mov	al,bl
    166 	shrd	ebx,ecx,4
    167 	and	al,15
    168 	shrd	ecx,edx,4
    169 	shrd	edx,ebp,4
    170 	shr	ebp,4
    171 	xor	ebp,DWORD [16+eax*4+esp]
    172 	mov	al,BYTE [edi*1+esp]
    173 	and	al,240
    174 	xor	ebx,DWORD [8+eax*1+esi]
    175 	xor	ecx,DWORD [12+eax*1+esi]
    176 	xor	edx,DWORD [eax*1+esi]
    177 	xor	ebp,DWORD [4+eax*1+esi]
    178 	dec	edi
    179 	js	NEAR L$004x86_break
    180 	mov	al,bl
    181 	shrd	ebx,ecx,4
    182 	and	al,15
    183 	shrd	ecx,edx,4
    184 	shrd	edx,ebp,4
    185 	shr	ebp,4
    186 	xor	ebp,DWORD [16+eax*4+esp]
    187 	mov	al,BYTE [edi*1+esp]
    188 	shl	al,4
    189 	xor	ebx,DWORD [8+eax*1+esi]
    190 	xor	ecx,DWORD [12+eax*1+esi]
    191 	xor	edx,DWORD [eax*1+esi]
    192 	xor	ebp,DWORD [4+eax*1+esi]
    193 	jmp	NEAR L$003x86_loop
    194 align	16
    195 L$004x86_break:
    196 	bswap	ebx
    197 	bswap	ecx
    198 	bswap	edx
    199 	bswap	ebp
    200 	mov	edi,DWORD [112+esp]
    201 	lea	edi,[16+edi]
    202 	cmp	edi,DWORD [116+esp]
    203 	mov	DWORD [112+esp],edi
    204 	jb	NEAR L$002x86_outer_loop
    205 	mov	edi,DWORD [104+esp]
    206 	mov	DWORD [12+edi],ebx
    207 	mov	DWORD [8+edi],ecx
    208 	mov	DWORD [4+edi],edx
    209 	mov	DWORD [edi],ebp
    210 	add	esp,84
    211 	pop	edi
    212 	pop	esi
    213 	pop	ebx
    214 	pop	ebp
    215 	ret
    216 global	_gcm_gmult_4bit_mmx
    217 align	16
    218 _gcm_gmult_4bit_mmx:
    219 L$_gcm_gmult_4bit_mmx_begin:
    220 	push	ebp
    221 	push	ebx
    222 	push	esi
    223 	push	edi
    224 	mov	edi,DWORD [20+esp]
    225 	mov	esi,DWORD [24+esp]
    226 	call	L$005pic_point
    227 L$005pic_point:
    228 	pop	eax
    229 	lea	eax,[(L$rem_4bit-L$005pic_point)+eax]
    230 	movzx	ebx,BYTE [15+edi]
    231 	xor	ecx,ecx
    232 	mov	edx,ebx
    233 	mov	cl,dl
    234 	mov	ebp,14
    235 	shl	cl,4
    236 	and	edx,240
    237 	movq	mm0,[8+ecx*1+esi]
    238 	movq	mm1,[ecx*1+esi]
    239 	movd	ebx,mm0
    240 	jmp	NEAR L$006mmx_loop
    241 align	16
    242 L$006mmx_loop:
    243 	psrlq	mm0,4
    244 	and	ebx,15
    245 	movq	mm2,mm1
    246 	psrlq	mm1,4
    247 	pxor	mm0,[8+edx*1+esi]
    248 	mov	cl,BYTE [ebp*1+edi]
    249 	psllq	mm2,60
    250 	pxor	mm1,[ebx*8+eax]
    251 	dec	ebp
    252 	movd	ebx,mm0
    253 	pxor	mm1,[edx*1+esi]
    254 	mov	edx,ecx
    255 	pxor	mm0,mm2
    256 	js	NEAR L$007mmx_break
    257 	shl	cl,4
    258 	and	ebx,15
    259 	psrlq	mm0,4
    260 	and	edx,240
    261 	movq	mm2,mm1
    262 	psrlq	mm1,4
    263 	pxor	mm0,[8+ecx*1+esi]
    264 	psllq	mm2,60
    265 	pxor	mm1,[ebx*8+eax]
    266 	movd	ebx,mm0
    267 	pxor	mm1,[ecx*1+esi]
    268 	pxor	mm0,mm2
    269 	jmp	NEAR L$006mmx_loop
    270 align	16
    271 L$007mmx_break:
    272 	shl	cl,4
    273 	and	ebx,15
    274 	psrlq	mm0,4
    275 	and	edx,240
    276 	movq	mm2,mm1
    277 	psrlq	mm1,4
    278 	pxor	mm0,[8+ecx*1+esi]
    279 	psllq	mm2,60
    280 	pxor	mm1,[ebx*8+eax]
    281 	movd	ebx,mm0
    282 	pxor	mm1,[ecx*1+esi]
    283 	pxor	mm0,mm2
    284 	psrlq	mm0,4
    285 	and	ebx,15
    286 	movq	mm2,mm1
    287 	psrlq	mm1,4
    288 	pxor	mm0,[8+edx*1+esi]
    289 	psllq	mm2,60
    290 	pxor	mm1,[ebx*8+eax]
    291 	movd	ebx,mm0
    292 	pxor	mm1,[edx*1+esi]
    293 	pxor	mm0,mm2
    294 	psrlq	mm0,32
    295 	movd	edx,mm1
    296 	psrlq	mm1,32
    297 	movd	ecx,mm0
    298 	movd	ebp,mm1
    299 	bswap	ebx
    300 	bswap	edx
    301 	bswap	ecx
    302 	bswap	ebp
    303 	emms
    304 	mov	DWORD [12+edi],ebx
    305 	mov	DWORD [4+edi],edx
    306 	mov	DWORD [8+edi],ecx
    307 	mov	DWORD [edi],ebp
    308 	pop	edi
    309 	pop	esi
    310 	pop	ebx
    311 	pop	ebp
    312 	ret
    313 global	_gcm_ghash_4bit_mmx
    314 align	16
    315 _gcm_ghash_4bit_mmx:
    316 L$_gcm_ghash_4bit_mmx_begin:
    317 	push	ebp
    318 	push	ebx
    319 	push	esi
    320 	push	edi
    321 	mov	eax,DWORD [20+esp]
    322 	mov	ebx,DWORD [24+esp]
    323 	mov	ecx,DWORD [28+esp]
    324 	mov	edx,DWORD [32+esp]
    325 	mov	ebp,esp
    326 	call	L$008pic_point
    327 L$008pic_point:
    328 	pop	esi
    329 	lea	esi,[(L$rem_8bit-L$008pic_point)+esi]
    330 	sub	esp,544
    331 	and	esp,-64
    332 	sub	esp,16
    333 	add	edx,ecx
    334 	mov	DWORD [544+esp],eax
    335 	mov	DWORD [552+esp],edx
    336 	mov	DWORD [556+esp],ebp
    337 	add	ebx,128
    338 	lea	edi,[144+esp]
    339 	lea	ebp,[400+esp]
    340 	mov	edx,DWORD [ebx-120]
    341 	movq	mm0,[ebx-120]
    342 	movq	mm3,[ebx-128]
    343 	shl	edx,4
    344 	mov	BYTE [esp],dl
    345 	mov	edx,DWORD [ebx-104]
    346 	movq	mm2,[ebx-104]
    347 	movq	mm5,[ebx-112]
    348 	movq	[edi-128],mm0
    349 	psrlq	mm0,4
    350 	movq	[edi],mm3
    351 	movq	mm7,mm3
    352 	psrlq	mm3,4
    353 	shl	edx,4
    354 	mov	BYTE [1+esp],dl
    355 	mov	edx,DWORD [ebx-88]
    356 	movq	mm1,[ebx-88]
    357 	psllq	mm7,60
    358 	movq	mm4,[ebx-96]
    359 	por	mm0,mm7
    360 	movq	[edi-120],mm2
    361 	psrlq	mm2,4
    362 	movq	[8+edi],mm5
    363 	movq	mm6,mm5
    364 	movq	[ebp-128],mm0
    365 	psrlq	mm5,4
    366 	movq	[ebp],mm3
    367 	shl	edx,4
    368 	mov	BYTE [2+esp],dl
    369 	mov	edx,DWORD [ebx-72]
    370 	movq	mm0,[ebx-72]
    371 	psllq	mm6,60
    372 	movq	mm3,[ebx-80]
    373 	por	mm2,mm6
    374 	movq	[edi-112],mm1
    375 	psrlq	mm1,4
    376 	movq	[16+edi],mm4
    377 	movq	mm7,mm4
    378 	movq	[ebp-120],mm2
    379 	psrlq	mm4,4
    380 	movq	[8+ebp],mm5
    381 	shl	edx,4
    382 	mov	BYTE [3+esp],dl
    383 	mov	edx,DWORD [ebx-56]
    384 	movq	mm2,[ebx-56]
    385 	psllq	mm7,60
    386 	movq	mm5,[ebx-64]
    387 	por	mm1,mm7
    388 	movq	[edi-104],mm0
    389 	psrlq	mm0,4
    390 	movq	[24+edi],mm3
    391 	movq	mm6,mm3
    392 	movq	[ebp-112],mm1
    393 	psrlq	mm3,4
    394 	movq	[16+ebp],mm4
    395 	shl	edx,4
    396 	mov	BYTE [4+esp],dl
    397 	mov	edx,DWORD [ebx-40]
    398 	movq	mm1,[ebx-40]
    399 	psllq	mm6,60
    400 	movq	mm4,[ebx-48]
    401 	por	mm0,mm6
    402 	movq	[edi-96],mm2
    403 	psrlq	mm2,4
    404 	movq	[32+edi],mm5
    405 	movq	mm7,mm5
    406 	movq	[ebp-104],mm0
    407 	psrlq	mm5,4
    408 	movq	[24+ebp],mm3
    409 	shl	edx,4
    410 	mov	BYTE [5+esp],dl
    411 	mov	edx,DWORD [ebx-24]
    412 	movq	mm0,[ebx-24]
    413 	psllq	mm7,60
    414 	movq	mm3,[ebx-32]
    415 	por	mm2,mm7
    416 	movq	[edi-88],mm1
    417 	psrlq	mm1,4
    418 	movq	[40+edi],mm4
    419 	movq	mm6,mm4
    420 	movq	[ebp-96],mm2
    421 	psrlq	mm4,4
    422 	movq	[32+ebp],mm5
    423 	shl	edx,4
    424 	mov	BYTE [6+esp],dl
    425 	mov	edx,DWORD [ebx-8]
    426 	movq	mm2,[ebx-8]
    427 	psllq	mm6,60
    428 	movq	mm5,[ebx-16]
    429 	por	mm1,mm6
    430 	movq	[edi-80],mm0
    431 	psrlq	mm0,4
    432 	movq	[48+edi],mm3
    433 	movq	mm7,mm3
    434 	movq	[ebp-88],mm1
    435 	psrlq	mm3,4
    436 	movq	[40+ebp],mm4
    437 	shl	edx,4
    438 	mov	BYTE [7+esp],dl
    439 	mov	edx,DWORD [8+ebx]
    440 	movq	mm1,[8+ebx]
    441 	psllq	mm7,60
    442 	movq	mm4,[ebx]
    443 	por	mm0,mm7
    444 	movq	[edi-72],mm2
    445 	psrlq	mm2,4
    446 	movq	[56+edi],mm5
    447 	movq	mm6,mm5
    448 	movq	[ebp-80],mm0
    449 	psrlq	mm5,4
    450 	movq	[48+ebp],mm3
    451 	shl	edx,4
    452 	mov	BYTE [8+esp],dl
    453 	mov	edx,DWORD [24+ebx]
    454 	movq	mm0,[24+ebx]
    455 	psllq	mm6,60
    456 	movq	mm3,[16+ebx]
    457 	por	mm2,mm6
    458 	movq	[edi-64],mm1
    459 	psrlq	mm1,4
    460 	movq	[64+edi],mm4
    461 	movq	mm7,mm4
    462 	movq	[ebp-72],mm2
    463 	psrlq	mm4,4
    464 	movq	[56+ebp],mm5
    465 	shl	edx,4
    466 	mov	BYTE [9+esp],dl
    467 	mov	edx,DWORD [40+ebx]
    468 	movq	mm2,[40+ebx]
    469 	psllq	mm7,60
    470 	movq	mm5,[32+ebx]
    471 	por	mm1,mm7
    472 	movq	[edi-56],mm0
    473 	psrlq	mm0,4
    474 	movq	[72+edi],mm3
    475 	movq	mm6,mm3
    476 	movq	[ebp-64],mm1
    477 	psrlq	mm3,4
    478 	movq	[64+ebp],mm4
    479 	shl	edx,4
    480 	mov	BYTE [10+esp],dl
    481 	mov	edx,DWORD [56+ebx]
    482 	movq	mm1,[56+ebx]
    483 	psllq	mm6,60
    484 	movq	mm4,[48+ebx]
    485 	por	mm0,mm6
    486 	movq	[edi-48],mm2
    487 	psrlq	mm2,4
    488 	movq	[80+edi],mm5
    489 	movq	mm7,mm5
    490 	movq	[ebp-56],mm0
    491 	psrlq	mm5,4
    492 	movq	[72+ebp],mm3
    493 	shl	edx,4
    494 	mov	BYTE [11+esp],dl
    495 	mov	edx,DWORD [72+ebx]
    496 	movq	mm0,[72+ebx]
    497 	psllq	mm7,60
    498 	movq	mm3,[64+ebx]
    499 	por	mm2,mm7
    500 	movq	[edi-40],mm1
    501 	psrlq	mm1,4
    502 	movq	[88+edi],mm4
    503 	movq	mm6,mm4
    504 	movq	[ebp-48],mm2
    505 	psrlq	mm4,4
    506 	movq	[80+ebp],mm5
    507 	shl	edx,4
    508 	mov	BYTE [12+esp],dl
    509 	mov	edx,DWORD [88+ebx]
    510 	movq	mm2,[88+ebx]
    511 	psllq	mm6,60
    512 	movq	mm5,[80+ebx]
    513 	por	mm1,mm6
    514 	movq	[edi-32],mm0
    515 	psrlq	mm0,4
    516 	movq	[96+edi],mm3
    517 	movq	mm7,mm3
    518 	movq	[ebp-40],mm1
    519 	psrlq	mm3,4
    520 	movq	[88+ebp],mm4
    521 	shl	edx,4
    522 	mov	BYTE [13+esp],dl
    523 	mov	edx,DWORD [104+ebx]
    524 	movq	mm1,[104+ebx]
    525 	psllq	mm7,60
    526 	movq	mm4,[96+ebx]
    527 	por	mm0,mm7
    528 	movq	[edi-24],mm2
    529 	psrlq	mm2,4
    530 	movq	[104+edi],mm5
    531 	movq	mm6,mm5
    532 	movq	[ebp-32],mm0
    533 	psrlq	mm5,4
    534 	movq	[96+ebp],mm3
    535 	shl	edx,4
    536 	mov	BYTE [14+esp],dl
    537 	mov	edx,DWORD [120+ebx]
    538 	movq	mm0,[120+ebx]
    539 	psllq	mm6,60
    540 	movq	mm3,[112+ebx]
    541 	por	mm2,mm6
    542 	movq	[edi-16],mm1
    543 	psrlq	mm1,4
    544 	movq	[112+edi],mm4
    545 	movq	mm7,mm4
    546 	movq	[ebp-24],mm2
    547 	psrlq	mm4,4
    548 	movq	[104+ebp],mm5
    549 	shl	edx,4
    550 	mov	BYTE [15+esp],dl
    551 	psllq	mm7,60
    552 	por	mm1,mm7
    553 	movq	[edi-8],mm0
    554 	psrlq	mm0,4
    555 	movq	[120+edi],mm3
    556 	movq	mm6,mm3
    557 	movq	[ebp-16],mm1
    558 	psrlq	mm3,4
    559 	movq	[112+ebp],mm4
    560 	psllq	mm6,60
    561 	por	mm0,mm6
    562 	movq	[ebp-8],mm0
    563 	movq	[120+ebp],mm3
    564 	movq	mm6,[eax]
    565 	mov	ebx,DWORD [8+eax]
    566 	mov	edx,DWORD [12+eax]
    567 align	16
    568 L$009outer:
    569 	xor	edx,DWORD [12+ecx]
    570 	xor	ebx,DWORD [8+ecx]
    571 	pxor	mm6,[ecx]
    572 	lea	ecx,[16+ecx]
    573 	mov	DWORD [536+esp],ebx
    574 	movq	[528+esp],mm6
    575 	mov	DWORD [548+esp],ecx
    576 	xor	eax,eax
    577 	rol	edx,8
    578 	mov	al,dl
    579 	mov	ebp,eax
    580 	and	al,15
    581 	shr	ebp,4
    582 	pxor	mm0,mm0
    583 	rol	edx,8
    584 	pxor	mm1,mm1
    585 	pxor	mm2,mm2
    586 	movq	mm7,[16+eax*8+esp]
    587 	movq	mm6,[144+eax*8+esp]
    588 	mov	al,dl
    589 	movd	ebx,mm7
    590 	psrlq	mm7,8
    591 	movq	mm3,mm6
    592 	mov	edi,eax
    593 	psrlq	mm6,8
    594 	pxor	mm7,[272+ebp*8+esp]
    595 	and	al,15
    596 	psllq	mm3,56
    597 	shr	edi,4
    598 	pxor	mm7,[16+eax*8+esp]
    599 	rol	edx,8
    600 	pxor	mm6,[144+eax*8+esp]
    601 	pxor	mm7,mm3
    602 	pxor	mm6,[400+ebp*8+esp]
    603 	xor	bl,BYTE [ebp*1+esp]
    604 	mov	al,dl
    605 	movd	ecx,mm7
    606 	movzx	ebx,bl
    607 	psrlq	mm7,8
    608 	movq	mm3,mm6
    609 	mov	ebp,eax
    610 	psrlq	mm6,8
    611 	pxor	mm7,[272+edi*8+esp]
    612 	and	al,15
    613 	psllq	mm3,56
    614 	shr	ebp,4
    615 	pinsrw	mm2,WORD [ebx*2+esi],2
    616 	pxor	mm7,[16+eax*8+esp]
    617 	rol	edx,8
    618 	pxor	mm6,[144+eax*8+esp]
    619 	pxor	mm7,mm3
    620 	pxor	mm6,[400+edi*8+esp]
    621 	xor	cl,BYTE [edi*1+esp]
    622 	mov	al,dl
    623 	mov	edx,DWORD [536+esp]
    624 	movd	ebx,mm7
    625 	movzx	ecx,cl
    626 	psrlq	mm7,8
    627 	movq	mm3,mm6
    628 	mov	edi,eax
    629 	psrlq	mm6,8
    630 	pxor	mm7,[272+ebp*8+esp]
    631 	and	al,15
    632 	psllq	mm3,56
    633 	pxor	mm6,mm2
    634 	shr	edi,4
    635 	pinsrw	mm1,WORD [ecx*2+esi],2
    636 	pxor	mm7,[16+eax*8+esp]
    637 	rol	edx,8
    638 	pxor	mm6,[144+eax*8+esp]
    639 	pxor	mm7,mm3
    640 	pxor	mm6,[400+ebp*8+esp]
    641 	xor	bl,BYTE [ebp*1+esp]
    642 	mov	al,dl
    643 	movd	ecx,mm7
    644 	movzx	ebx,bl
    645 	psrlq	mm7,8
    646 	movq	mm3,mm6
    647 	mov	ebp,eax
    648 	psrlq	mm6,8
    649 	pxor	mm7,[272+edi*8+esp]
    650 	and	al,15
    651 	psllq	mm3,56
    652 	pxor	mm6,mm1
    653 	shr	ebp,4
    654 	pinsrw	mm0,WORD [ebx*2+esi],2
    655 	pxor	mm7,[16+eax*8+esp]
    656 	rol	edx,8
    657 	pxor	mm6,[144+eax*8+esp]
    658 	pxor	mm7,mm3
    659 	pxor	mm6,[400+edi*8+esp]
    660 	xor	cl,BYTE [edi*1+esp]
    661 	mov	al,dl
    662 	movd	ebx,mm7
    663 	movzx	ecx,cl
    664 	psrlq	mm7,8
    665 	movq	mm3,mm6
    666 	mov	edi,eax
    667 	psrlq	mm6,8
    668 	pxor	mm7,[272+ebp*8+esp]
    669 	and	al,15
    670 	psllq	mm3,56
    671 	pxor	mm6,mm0
    672 	shr	edi,4
    673 	pinsrw	mm2,WORD [ecx*2+esi],2
    674 	pxor	mm7,[16+eax*8+esp]
    675 	rol	edx,8
    676 	pxor	mm6,[144+eax*8+esp]
    677 	pxor	mm7,mm3
    678 	pxor	mm6,[400+ebp*8+esp]
    679 	xor	bl,BYTE [ebp*1+esp]
    680 	mov	al,dl
    681 	movd	ecx,mm7
    682 	movzx	ebx,bl
    683 	psrlq	mm7,8
    684 	movq	mm3,mm6
    685 	mov	ebp,eax
    686 	psrlq	mm6,8
    687 	pxor	mm7,[272+edi*8+esp]
    688 	and	al,15
    689 	psllq	mm3,56
    690 	pxor	mm6,mm2
    691 	shr	ebp,4
    692 	pinsrw	mm1,WORD [ebx*2+esi],2
    693 	pxor	mm7,[16+eax*8+esp]
    694 	rol	edx,8
    695 	pxor	mm6,[144+eax*8+esp]
    696 	pxor	mm7,mm3
    697 	pxor	mm6,[400+edi*8+esp]
    698 	xor	cl,BYTE [edi*1+esp]
    699 	mov	al,dl
    700 	mov	edx,DWORD [532+esp]
    701 	movd	ebx,mm7
    702 	movzx	ecx,cl
    703 	psrlq	mm7,8
    704 	movq	mm3,mm6
    705 	mov	edi,eax
    706 	psrlq	mm6,8
    707 	pxor	mm7,[272+ebp*8+esp]
    708 	and	al,15
    709 	psllq	mm3,56
    710 	pxor	mm6,mm1
    711 	shr	edi,4
    712 	pinsrw	mm0,WORD [ecx*2+esi],2
    713 	pxor	mm7,[16+eax*8+esp]
    714 	rol	edx,8
    715 	pxor	mm6,[144+eax*8+esp]
    716 	pxor	mm7,mm3
    717 	pxor	mm6,[400+ebp*8+esp]
    718 	xor	bl,BYTE [ebp*1+esp]
    719 	mov	al,dl
    720 	movd	ecx,mm7
    721 	movzx	ebx,bl
    722 	psrlq	mm7,8
    723 	movq	mm3,mm6
    724 	mov	ebp,eax
    725 	psrlq	mm6,8
    726 	pxor	mm7,[272+edi*8+esp]
    727 	and	al,15
    728 	psllq	mm3,56
    729 	pxor	mm6,mm0
    730 	shr	ebp,4
    731 	pinsrw	mm2,WORD [ebx*2+esi],2
    732 	pxor	mm7,[16+eax*8+esp]
    733 	rol	edx,8
    734 	pxor	mm6,[144+eax*8+esp]
    735 	pxor	mm7,mm3
    736 	pxor	mm6,[400+edi*8+esp]
    737 	xor	cl,BYTE [edi*1+esp]
    738 	mov	al,dl
    739 	movd	ebx,mm7
    740 	movzx	ecx,cl
    741 	psrlq	mm7,8
    742 	movq	mm3,mm6
    743 	mov	edi,eax
    744 	psrlq	mm6,8
    745 	pxor	mm7,[272+ebp*8+esp]
    746 	and	al,15
    747 	psllq	mm3,56
    748 	pxor	mm6,mm2
    749 	shr	edi,4
    750 	pinsrw	mm1,WORD [ecx*2+esi],2
    751 	pxor	mm7,[16+eax*8+esp]
    752 	rol	edx,8
    753 	pxor	mm6,[144+eax*8+esp]
    754 	pxor	mm7,mm3
    755 	pxor	mm6,[400+ebp*8+esp]
    756 	xor	bl,BYTE [ebp*1+esp]
    757 	mov	al,dl
    758 	movd	ecx,mm7
    759 	movzx	ebx,bl
    760 	psrlq	mm7,8
    761 	movq	mm3,mm6
    762 	mov	ebp,eax
    763 	psrlq	mm6,8
    764 	pxor	mm7,[272+edi*8+esp]
    765 	and	al,15
    766 	psllq	mm3,56
    767 	pxor	mm6,mm1
    768 	shr	ebp,4
    769 	pinsrw	mm0,WORD [ebx*2+esi],2
    770 	pxor	mm7,[16+eax*8+esp]
    771 	rol	edx,8
    772 	pxor	mm6,[144+eax*8+esp]
    773 	pxor	mm7,mm3
    774 	pxor	mm6,[400+edi*8+esp]
    775 	xor	cl,BYTE [edi*1+esp]
    776 	mov	al,dl
    777 	mov	edx,DWORD [528+esp]
    778 	movd	ebx,mm7
    779 	movzx	ecx,cl
    780 	psrlq	mm7,8
    781 	movq	mm3,mm6
    782 	mov	edi,eax
    783 	psrlq	mm6,8
    784 	pxor	mm7,[272+ebp*8+esp]
    785 	and	al,15
    786 	psllq	mm3,56
    787 	pxor	mm6,mm0
    788 	shr	edi,4
    789 	pinsrw	mm2,WORD [ecx*2+esi],2
    790 	pxor	mm7,[16+eax*8+esp]
    791 	rol	edx,8
    792 	pxor	mm6,[144+eax*8+esp]
    793 	pxor	mm7,mm3
    794 	pxor	mm6,[400+ebp*8+esp]
    795 	xor	bl,BYTE [ebp*1+esp]
    796 	mov	al,dl
    797 	movd	ecx,mm7
    798 	movzx	ebx,bl
    799 	psrlq	mm7,8
    800 	movq	mm3,mm6
    801 	mov	ebp,eax
    802 	psrlq	mm6,8
    803 	pxor	mm7,[272+edi*8+esp]
    804 	and	al,15
    805 	psllq	mm3,56
    806 	pxor	mm6,mm2
    807 	shr	ebp,4
    808 	pinsrw	mm1,WORD [ebx*2+esi],2
    809 	pxor	mm7,[16+eax*8+esp]
    810 	rol	edx,8
    811 	pxor	mm6,[144+eax*8+esp]
    812 	pxor	mm7,mm3
    813 	pxor	mm6,[400+edi*8+esp]
    814 	xor	cl,BYTE [edi*1+esp]
    815 	mov	al,dl
    816 	movd	ebx,mm7
    817 	movzx	ecx,cl
    818 	psrlq	mm7,8
    819 	movq	mm3,mm6
    820 	mov	edi,eax
    821 	psrlq	mm6,8
    822 	pxor	mm7,[272+ebp*8+esp]
    823 	and	al,15
    824 	psllq	mm3,56
    825 	pxor	mm6,mm1
    826 	shr	edi,4
    827 	pinsrw	mm0,WORD [ecx*2+esi],2
    828 	pxor	mm7,[16+eax*8+esp]
    829 	rol	edx,8
    830 	pxor	mm6,[144+eax*8+esp]
    831 	pxor	mm7,mm3
    832 	pxor	mm6,[400+ebp*8+esp]
    833 	xor	bl,BYTE [ebp*1+esp]
    834 	mov	al,dl
    835 	movd	ecx,mm7
    836 	movzx	ebx,bl
    837 	psrlq	mm7,8
    838 	movq	mm3,mm6
    839 	mov	ebp,eax
    840 	psrlq	mm6,8
    841 	pxor	mm7,[272+edi*8+esp]
    842 	and	al,15
    843 	psllq	mm3,56
    844 	pxor	mm6,mm0
    845 	shr	ebp,4
    846 	pinsrw	mm2,WORD [ebx*2+esi],2
    847 	pxor	mm7,[16+eax*8+esp]
    848 	rol	edx,8
    849 	pxor	mm6,[144+eax*8+esp]
    850 	pxor	mm7,mm3
    851 	pxor	mm6,[400+edi*8+esp]
    852 	xor	cl,BYTE [edi*1+esp]
    853 	mov	al,dl
    854 	mov	edx,DWORD [524+esp]
    855 	movd	ebx,mm7
    856 	movzx	ecx,cl
    857 	psrlq	mm7,8
    858 	movq	mm3,mm6
    859 	mov	edi,eax
    860 	psrlq	mm6,8
    861 	pxor	mm7,[272+ebp*8+esp]
    862 	and	al,15
    863 	psllq	mm3,56
    864 	pxor	mm6,mm2
    865 	shr	edi,4
    866 	pinsrw	mm1,WORD [ecx*2+esi],2
    867 	pxor	mm7,[16+eax*8+esp]
    868 	pxor	mm6,[144+eax*8+esp]
    869 	xor	bl,BYTE [ebp*1+esp]
    870 	pxor	mm7,mm3
    871 	pxor	mm6,[400+ebp*8+esp]
    872 	movzx	ebx,bl
    873 	pxor	mm2,mm2
    874 	psllq	mm1,4
    875 	movd	ecx,mm7
    876 	psrlq	mm7,4
    877 	movq	mm3,mm6
    878 	psrlq	mm6,4
    879 	shl	ecx,4
    880 	pxor	mm7,[16+edi*8+esp]
    881 	psllq	mm3,60
    882 	movzx	ecx,cl
    883 	pxor	mm7,mm3
    884 	pxor	mm6,[144+edi*8+esp]
    885 	pinsrw	mm0,WORD [ebx*2+esi],2
    886 	pxor	mm6,mm1
    887 	movd	edx,mm7
    888 	pinsrw	mm2,WORD [ecx*2+esi],3
    889 	psllq	mm0,12
    890 	pxor	mm6,mm0
    891 	psrlq	mm7,32
    892 	pxor	mm6,mm2
    893 	mov	ecx,DWORD [548+esp]
    894 	movd	ebx,mm7
    895 	movq	mm3,mm6
    896 	psllw	mm6,8
    897 	psrlw	mm3,8
    898 	por	mm6,mm3
    899 	bswap	edx
    900 	pshufw	mm6,mm6,27
    901 	bswap	ebx
    902 	cmp	ecx,DWORD [552+esp]
    903 	jne	NEAR L$009outer
    904 	mov	eax,DWORD [544+esp]
    905 	mov	DWORD [12+eax],edx
    906 	mov	DWORD [8+eax],ebx
    907 	movq	[eax],mm6
    908 	mov	esp,DWORD [556+esp]
    909 	emms
    910 	pop	edi
    911 	pop	esi
    912 	pop	ebx
    913 	pop	ebp
    914 	ret
    915 global	_gcm_init_clmul
    916 align	16
    917 _gcm_init_clmul:
    918 L$_gcm_init_clmul_begin:
    919 	mov	edx,DWORD [4+esp]
    920 	mov	eax,DWORD [8+esp]
    921 	call	L$010pic
    922 L$010pic:
    923 	pop	ecx
    924 	lea	ecx,[(L$bswap-L$010pic)+ecx]
    925 	movdqu	xmm2,[eax]
    926 	pshufd	xmm2,xmm2,78
    927 	pshufd	xmm4,xmm2,255
    928 	movdqa	xmm3,xmm2
    929 	psllq	xmm2,1
    930 	pxor	xmm5,xmm5
    931 	psrlq	xmm3,63
    932 	pcmpgtd	xmm5,xmm4
    933 	pslldq	xmm3,8
    934 	por	xmm2,xmm3
    935 	pand	xmm5,[16+ecx]
    936 	pxor	xmm2,xmm5
    937 	movdqa	xmm0,xmm2
    938 	movdqa	xmm1,xmm0
    939 	pshufd	xmm3,xmm0,78
    940 	pshufd	xmm4,xmm2,78
    941 	pxor	xmm3,xmm0
    942 	pxor	xmm4,xmm2
    943 db	102,15,58,68,194,0
    944 db	102,15,58,68,202,17
    945 db	102,15,58,68,220,0
    946 	xorps	xmm3,xmm0
    947 	xorps	xmm3,xmm1
    948 	movdqa	xmm4,xmm3
    949 	psrldq	xmm3,8
    950 	pslldq	xmm4,8
    951 	pxor	xmm1,xmm3
    952 	pxor	xmm0,xmm4
    953 	movdqa	xmm4,xmm0
    954 	movdqa	xmm3,xmm0
    955 	psllq	xmm0,5
    956 	pxor	xmm3,xmm0
    957 	psllq	xmm0,1
    958 	pxor	xmm0,xmm3
    959 	psllq	xmm0,57
    960 	movdqa	xmm3,xmm0
    961 	pslldq	xmm0,8
    962 	psrldq	xmm3,8
    963 	pxor	xmm0,xmm4
    964 	pxor	xmm1,xmm3
    965 	movdqa	xmm4,xmm0
    966 	psrlq	xmm0,1
    967 	pxor	xmm1,xmm4
    968 	pxor	xmm4,xmm0
    969 	psrlq	xmm0,5
    970 	pxor	xmm0,xmm4
    971 	psrlq	xmm0,1
    972 	pxor	xmm0,xmm1
    973 	pshufd	xmm3,xmm2,78
    974 	pshufd	xmm4,xmm0,78
    975 	pxor	xmm3,xmm2
    976 	movdqu	[edx],xmm2
    977 	pxor	xmm4,xmm0
    978 	movdqu	[16+edx],xmm0
    979 db	102,15,58,15,227,8
    980 	movdqu	[32+edx],xmm4
    981 	ret
    982 global	_gcm_gmult_clmul
    983 align	16
    984 _gcm_gmult_clmul:
    985 L$_gcm_gmult_clmul_begin:
    986 	mov	eax,DWORD [4+esp]
    987 	mov	edx,DWORD [8+esp]
    988 	call	L$011pic
    989 L$011pic:
    990 	pop	ecx
    991 	lea	ecx,[(L$bswap-L$011pic)+ecx]
    992 	movdqu	xmm0,[eax]
    993 	movdqa	xmm5,[ecx]
    994 	movups	xmm2,[edx]
    995 db	102,15,56,0,197
    996 	movups	xmm4,[32+edx]
    997 	movdqa	xmm1,xmm0
    998 	pshufd	xmm3,xmm0,78
    999 	pxor	xmm3,xmm0
   1000 db	102,15,58,68,194,0
   1001 db	102,15,58,68,202,17
   1002 db	102,15,58,68,220,0
   1003 	xorps	xmm3,xmm0
   1004 	xorps	xmm3,xmm1
   1005 	movdqa	xmm4,xmm3
   1006 	psrldq	xmm3,8
   1007 	pslldq	xmm4,8
   1008 	pxor	xmm1,xmm3
   1009 	pxor	xmm0,xmm4
   1010 	movdqa	xmm4,xmm0
   1011 	movdqa	xmm3,xmm0
   1012 	psllq	xmm0,5
   1013 	pxor	xmm3,xmm0
   1014 	psllq	xmm0,1
   1015 	pxor	xmm0,xmm3
   1016 	psllq	xmm0,57
   1017 	movdqa	xmm3,xmm0
   1018 	pslldq	xmm0,8
   1019 	psrldq	xmm3,8
   1020 	pxor	xmm0,xmm4
   1021 	pxor	xmm1,xmm3
   1022 	movdqa	xmm4,xmm0
   1023 	psrlq	xmm0,1
   1024 	pxor	xmm1,xmm4
   1025 	pxor	xmm4,xmm0
   1026 	psrlq	xmm0,5
   1027 	pxor	xmm0,xmm4
   1028 	psrlq	xmm0,1
   1029 	pxor	xmm0,xmm1
   1030 db	102,15,56,0,197
   1031 	movdqu	[eax],xmm0
   1032 	ret
   1033 global	_gcm_ghash_clmul
   1034 align	16
   1035 _gcm_ghash_clmul:
   1036 L$_gcm_ghash_clmul_begin:
   1037 	push	ebp
   1038 	push	ebx
   1039 	push	esi
   1040 	push	edi
   1041 	mov	eax,DWORD [20+esp]
   1042 	mov	edx,DWORD [24+esp]
   1043 	mov	esi,DWORD [28+esp]
   1044 	mov	ebx,DWORD [32+esp]
   1045 	call	L$012pic
   1046 L$012pic:
   1047 	pop	ecx
   1048 	lea	ecx,[(L$bswap-L$012pic)+ecx]
   1049 	movdqu	xmm0,[eax]
   1050 	movdqa	xmm5,[ecx]
   1051 	movdqu	xmm2,[edx]
   1052 db	102,15,56,0,197
   1053 	sub	ebx,16
   1054 	jz	NEAR L$013odd_tail
   1055 	movdqu	xmm3,[esi]
   1056 	movdqu	xmm6,[16+esi]
   1057 db	102,15,56,0,221
   1058 db	102,15,56,0,245
   1059 	movdqu	xmm5,[32+edx]
   1060 	pxor	xmm0,xmm3
   1061 	pshufd	xmm3,xmm6,78
   1062 	movdqa	xmm7,xmm6
   1063 	pxor	xmm3,xmm6
   1064 	lea	esi,[32+esi]
   1065 db	102,15,58,68,242,0
   1066 db	102,15,58,68,250,17
   1067 db	102,15,58,68,221,0
   1068 	movups	xmm2,[16+edx]
   1069 	nop
   1070 	sub	ebx,32
   1071 	jbe	NEAR L$014even_tail
   1072 	jmp	NEAR L$015mod_loop
   1073 align	32
   1074 L$015mod_loop:
   1075 	pshufd	xmm4,xmm0,78
   1076 	movdqa	xmm1,xmm0
   1077 	pxor	xmm4,xmm0
   1078 	nop
   1079 db	102,15,58,68,194,0
   1080 db	102,15,58,68,202,17
   1081 db	102,15,58,68,229,16
   1082 	movups	xmm2,[edx]
   1083 	xorps	xmm0,xmm6
   1084 	movdqa	xmm5,[ecx]
   1085 	xorps	xmm1,xmm7
   1086 	movdqu	xmm7,[esi]
   1087 	pxor	xmm3,xmm0
   1088 	movdqu	xmm6,[16+esi]
   1089 	pxor	xmm3,xmm1
   1090 db	102,15,56,0,253
   1091 	pxor	xmm4,xmm3
   1092 	movdqa	xmm3,xmm4
   1093 	psrldq	xmm4,8
   1094 	pslldq	xmm3,8
   1095 	pxor	xmm1,xmm4
   1096 	pxor	xmm0,xmm3
   1097 db	102,15,56,0,245
   1098 	pxor	xmm1,xmm7
   1099 	movdqa	xmm7,xmm6
   1100 	movdqa	xmm4,xmm0
   1101 	movdqa	xmm3,xmm0
   1102 	psllq	xmm0,5
   1103 	pxor	xmm3,xmm0
   1104 	psllq	xmm0,1
   1105 	pxor	xmm0,xmm3
   1106 db	102,15,58,68,242,0
   1107 	movups	xmm5,[32+edx]
   1108 	psllq	xmm0,57
   1109 	movdqa	xmm3,xmm0
   1110 	pslldq	xmm0,8
   1111 	psrldq	xmm3,8
   1112 	pxor	xmm0,xmm4
   1113 	pxor	xmm1,xmm3
   1114 	pshufd	xmm3,xmm7,78
   1115 	movdqa	xmm4,xmm0
   1116 	psrlq	xmm0,1
   1117 	pxor	xmm3,xmm7
   1118 	pxor	xmm1,xmm4
   1119 db	102,15,58,68,250,17
   1120 	movups	xmm2,[16+edx]
   1121 	pxor	xmm4,xmm0
   1122 	psrlq	xmm0,5
   1123 	pxor	xmm0,xmm4
   1124 	psrlq	xmm0,1
   1125 	pxor	xmm0,xmm1
   1126 db	102,15,58,68,221,0
   1127 	lea	esi,[32+esi]
   1128 	sub	ebx,32
   1129 	ja	NEAR L$015mod_loop
   1130 L$014even_tail:
   1131 	pshufd	xmm4,xmm0,78
   1132 	movdqa	xmm1,xmm0
   1133 	pxor	xmm4,xmm0
   1134 db	102,15,58,68,194,0
   1135 db	102,15,58,68,202,17
   1136 db	102,15,58,68,229,16
   1137 	movdqa	xmm5,[ecx]
   1138 	xorps	xmm0,xmm6
   1139 	xorps	xmm1,xmm7
   1140 	pxor	xmm3,xmm0
   1141 	pxor	xmm3,xmm1
   1142 	pxor	xmm4,xmm3
   1143 	movdqa	xmm3,xmm4
   1144 	psrldq	xmm4,8
   1145 	pslldq	xmm3,8
   1146 	pxor	xmm1,xmm4
   1147 	pxor	xmm0,xmm3
   1148 	movdqa	xmm4,xmm0
   1149 	movdqa	xmm3,xmm0
   1150 	psllq	xmm0,5
   1151 	pxor	xmm3,xmm0
   1152 	psllq	xmm0,1
   1153 	pxor	xmm0,xmm3
   1154 	psllq	xmm0,57
   1155 	movdqa	xmm3,xmm0
   1156 	pslldq	xmm0,8
   1157 	psrldq	xmm3,8
   1158 	pxor	xmm0,xmm4
   1159 	pxor	xmm1,xmm3
   1160 	movdqa	xmm4,xmm0
   1161 	psrlq	xmm0,1
   1162 	pxor	xmm1,xmm4
   1163 	pxor	xmm4,xmm0
   1164 	psrlq	xmm0,5
   1165 	pxor	xmm0,xmm4
   1166 	psrlq	xmm0,1
   1167 	pxor	xmm0,xmm1
   1168 	test	ebx,ebx
   1169 	jnz	NEAR L$016done
   1170 	movups	xmm2,[edx]
   1171 L$013odd_tail:
   1172 	movdqu	xmm3,[esi]
   1173 db	102,15,56,0,221
   1174 	pxor	xmm0,xmm3
   1175 	movdqa	xmm1,xmm0
   1176 	pshufd	xmm3,xmm0,78
   1177 	pshufd	xmm4,xmm2,78
   1178 	pxor	xmm3,xmm0
   1179 	pxor	xmm4,xmm2
   1180 db	102,15,58,68,194,0
   1181 db	102,15,58,68,202,17
   1182 db	102,15,58,68,220,0
   1183 	xorps	xmm3,xmm0
   1184 	xorps	xmm3,xmm1
   1185 	movdqa	xmm4,xmm3
   1186 	psrldq	xmm3,8
   1187 	pslldq	xmm4,8
   1188 	pxor	xmm1,xmm3
   1189 	pxor	xmm0,xmm4
   1190 	movdqa	xmm4,xmm0
   1191 	movdqa	xmm3,xmm0
   1192 	psllq	xmm0,5
   1193 	pxor	xmm3,xmm0
   1194 	psllq	xmm0,1
   1195 	pxor	xmm0,xmm3
   1196 	psllq	xmm0,57
   1197 	movdqa	xmm3,xmm0
   1198 	pslldq	xmm0,8
   1199 	psrldq	xmm3,8
   1200 	pxor	xmm0,xmm4
   1201 	pxor	xmm1,xmm3
   1202 	movdqa	xmm4,xmm0
   1203 	psrlq	xmm0,1
   1204 	pxor	xmm1,xmm4
   1205 	pxor	xmm4,xmm0
   1206 	psrlq	xmm0,5
   1207 	pxor	xmm0,xmm4
   1208 	psrlq	xmm0,1
   1209 	pxor	xmm0,xmm1
   1210 L$016done:
   1211 db	102,15,56,0,197
   1212 	movdqu	[eax],xmm0
   1213 	pop	edi
   1214 	pop	esi
   1215 	pop	ebx
   1216 	pop	ebp
   1217 	ret
   1218 align	64
   1219 L$bswap:
   1220 db	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
   1221 db	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
   1222 align	64
   1223 L$rem_8bit:
   1224 dw	0,450,900,582,1800,1738,1164,1358
   1225 dw	3600,4050,3476,3158,2328,2266,2716,2910
   1226 dw	7200,7650,8100,7782,6952,6890,6316,6510
   1227 dw	4656,5106,4532,4214,5432,5370,5820,6014
   1228 dw	14400,14722,15300,14854,16200,16010,15564,15630
   1229 dw	13904,14226,13780,13334,12632,12442,13020,13086
   1230 dw	9312,9634,10212,9766,9064,8874,8428,8494
   1231 dw	10864,11186,10740,10294,11640,11450,12028,12094
   1232 dw	28800,28994,29444,29382,30600,30282,29708,30158
   1233 dw	32400,32594,32020,31958,31128,30810,31260,31710
   1234 dw	27808,28002,28452,28390,27560,27242,26668,27118
   1235 dw	25264,25458,24884,24822,26040,25722,26172,26622
   1236 dw	18624,18690,19268,19078,20424,19978,19532,19854
   1237 dw	18128,18194,17748,17558,16856,16410,16988,17310
   1238 dw	21728,21794,22372,22182,21480,21034,20588,20910
   1239 dw	23280,23346,22900,22710,24056,23610,24188,24510
   1240 dw	57600,57538,57988,58182,58888,59338,58764,58446
   1241 dw	61200,61138,60564,60758,59416,59866,60316,59998
   1242 dw	64800,64738,65188,65382,64040,64490,63916,63598
   1243 dw	62256,62194,61620,61814,62520,62970,63420,63102
   1244 dw	55616,55426,56004,56070,56904,57226,56780,56334
   1245 dw	55120,54930,54484,54550,53336,53658,54236,53790
   1246 dw	50528,50338,50916,50982,49768,50090,49644,49198
   1247 dw	52080,51890,51444,51510,52344,52666,53244,52798
   1248 dw	37248,36930,37380,37830,38536,38730,38156,38094
   1249 dw	40848,40530,39956,40406,39064,39258,39708,39646
   1250 dw	36256,35938,36388,36838,35496,35690,35116,35054
   1251 dw	33712,33394,32820,33270,33976,34170,34620,34558
   1252 dw	43456,43010,43588,43910,44744,44810,44364,44174
   1253 dw	42960,42514,42068,42390,41176,41242,41820,41630
   1254 dw	46560,46114,46692,47014,45800,45866,45420,45230
   1255 dw	48112,47666,47220,47542,48376,48442,49020,48830
   1256 align	64
   1257 L$rem_4bit:
   1258 dd	0,0,0,471859200,0,943718400,0,610271232
   1259 dd	0,1887436800,0,1822425088,0,1220542464,0,1423966208
   1260 dd	0,3774873600,0,4246732800,0,3644850176,0,3311403008
   1261 dd	0,2441084928,0,2376073216,0,2847932416,0,3051356160
   1262 db	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
   1263 db	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
   1264 db	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
   1265 db	0
   1266