Home | History | Annotate | Download | only in masmx86
      1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
      2 ; *
      3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
      4 ; *
      5 ; * Copyright (C) 1995-2003 Mark Adler
      6 ; * For conditions of distribution and use, see copyright notice in zlib.h
      7 ; *
      8 ; * Copyright (C) 2003 Chris Anderson <christop (a] charm.net>
      9 ; * Please use the copyright conditions above.
     10 ; *
     11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
     12 ; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
     13 ; * the moment.  I have successfully compiled and tested this code with gcc2.96,
     14 ; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
     15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
     16 ; * enabled.  I will attempt to merge the MMX code into this version.  Newer
     17 ; * versions of this and inffast.S can be found at
     18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
     19 ; *
     20 ; * 2005 : modification by Gilles Vollant
     21 ; */
     22 ; For Visual C++ 4.x and higher and ML 6.x and higher
     23 ;   ml.exe is in directory \MASM611C of Win95 DDK
     24 ;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
     25 ;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
     26 ;
     27 ;
     28 ;   compile with command line option
     29 ;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
     30 
     31 ;   if you define NO_GZIP (see inflate.h), compile with
     32 ;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
     33 
     34 
     35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
     36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
     37 ;        in inflate_state in inflate.h)
     38 zlib1222sup      equ    8
     39 
     40 
     41 IFDEF GUNZIP
     42   INFLATE_MODE_TYPE    equ 11
     43   INFLATE_MODE_BAD     equ 26
     44 ELSE
     45   IFNDEF NO_GUNZIP
     46     INFLATE_MODE_TYPE    equ 11
     47     INFLATE_MODE_BAD     equ 26
     48   ELSE
     49     INFLATE_MODE_TYPE    equ 3
     50     INFLATE_MODE_BAD     equ 17
     51   ENDIF
     52 ENDIF
     53 
     54 
     55 ; 75 "inffast.S"
     56 ;FILE "inffast.S"
     57 
     58 ;;;GLOBAL _inflate_fast
     59 
     60 ;;;SECTION .text
     61 
     62 
     63 
     64 	.586p
     65 	.mmx
     66 
     67 	name	inflate_fast_x86
     68 	.MODEL	FLAT
     69 
     70 _DATA			segment
     71 inflate_fast_use_mmx:
     72 	dd	1
     73 
     74 
     75 _TEXT			segment
     76 
     77 
     78 
     79 ALIGN 4
     80 	db	'Fast decoding Code from Chris Anderson'
     81 	db	0
     82 
     83 ALIGN 4
     84 invalid_literal_length_code_msg:
     85 	db	'invalid literal/length code'
     86 	db	0
     87 
     88 ALIGN 4
     89 invalid_distance_code_msg:
     90 	db	'invalid distance code'
     91 	db	0
     92 
     93 ALIGN 4
     94 invalid_distance_too_far_msg:
     95 	db	'invalid distance too far back'
     96 	db	0
     97 
     98 
     99 ALIGN 4
    100 inflate_fast_mask:
    101 dd	0
    102 dd	1
    103 dd	3
    104 dd	7
    105 dd	15
    106 dd	31
    107 dd	63
    108 dd	127
    109 dd	255
    110 dd	511
    111 dd	1023
    112 dd	2047
    113 dd	4095
    114 dd	8191
    115 dd	16383
    116 dd	32767
    117 dd	65535
    118 dd	131071
    119 dd	262143
    120 dd	524287
    121 dd	1048575
    122 dd	2097151
    123 dd	4194303
    124 dd	8388607
    125 dd	16777215
    126 dd	33554431
    127 dd	67108863
    128 dd	134217727
    129 dd	268435455
    130 dd	536870911
    131 dd	1073741823
    132 dd	2147483647
    133 dd	4294967295
    134 
    135 
    136 mode_state	 equ	0	;/* state->mode	*/
    137 wsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
    138 write_state	 equ	(36+4+zlib1222sup)	;/* state->write */
    139 window_state	 equ	(40+4+zlib1222sup)	;/* state->window */
    140 hold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
    141 bits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
    142 lencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
    143 distcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
    144 lenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
    145 distbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
    146 
    147 
    148 ;;SECTION .text
    149 ; 205 "inffast.S"
    150 ;GLOBAL	inflate_fast_use_mmx
    151 
    152 ;SECTION .data
    153 
    154 
    155 ; GLOBAL inflate_fast_use_mmx:object
    156 ;.size inflate_fast_use_mmx, 4
    157 ; 226 "inffast.S"
    158 ;SECTION .text
    159 
    160 ALIGN 4
    161 _inflate_fast proc near
    162 .FPO (16, 4, 0, 0, 1, 0)
    163 	push  edi
    164 	push  esi
    165 	push  ebp
    166 	push  ebx
    167 	pushfd
    168 	sub  esp,64
    169 	cld
    170 
    171 
    172 
    173 
    174 	mov  esi, [esp+88]
    175 	mov  edi, [esi+28]
    176 
    177 
    178 
    179 
    180 
    181 
    182 
    183 	mov  edx, [esi+4]
    184 	mov  eax, [esi+0]
    185 
    186 	add  edx,eax
    187 	sub  edx,11
    188 
    189 	mov  [esp+44],eax
    190 	mov  [esp+20],edx
    191 
    192 	mov  ebp, [esp+92]
    193 	mov  ecx, [esi+16]
    194 	mov  ebx, [esi+12]
    195 
    196 	sub  ebp,ecx
    197 	neg  ebp
    198 	add  ebp,ebx
    199 
    200 	sub  ecx,257
    201 	add  ecx,ebx
    202 
    203 	mov  [esp+60],ebx
    204 	mov  [esp+40],ebp
    205 	mov  [esp+16],ecx
    206 ; 285 "inffast.S"
    207 	mov  eax, [edi+lencode_state]
    208 	mov  ecx, [edi+distcode_state]
    209 
    210 	mov  [esp+8],eax
    211 	mov  [esp+12],ecx
    212 
    213 	mov  eax,1
    214 	mov  ecx, [edi+lenbits_state]
    215 	shl  eax,cl
    216 	dec  eax
    217 	mov  [esp+0],eax
    218 
    219 	mov  eax,1
    220 	mov  ecx, [edi+distbits_state]
    221 	shl  eax,cl
    222 	dec  eax
    223 	mov  [esp+4],eax
    224 
    225 	mov  eax, [edi+wsize_state]
    226 	mov  ecx, [edi+write_state]
    227 	mov  edx, [edi+window_state]
    228 
    229 	mov  [esp+52],eax
    230 	mov  [esp+48],ecx
    231 	mov  [esp+56],edx
    232 
    233 	mov  ebp, [edi+hold_state]
    234 	mov  ebx, [edi+bits_state]
    235 ; 321 "inffast.S"
    236 	mov  esi, [esp+44]
    237 	mov  ecx, [esp+20]
    238 	cmp  ecx,esi
    239 	ja   L_align_long
    240 
    241 	add  ecx,11
    242 	sub  ecx,esi
    243 	mov  eax,12
    244 	sub  eax,ecx
    245 	lea  edi, [esp+28]
    246 	rep movsb
    247 	mov  ecx,eax
    248 	xor  eax,eax
    249 	rep stosb
    250 	lea  esi, [esp+28]
    251 	mov  [esp+20],esi
    252 	jmp  L_is_aligned
    253 
    254 
    255 L_align_long:
    256 	test  esi,3
    257 	jz   L_is_aligned
    258 	xor  eax,eax
    259 	mov  al, [esi]
    260 	inc  esi
    261 	mov  ecx,ebx
    262 	add  ebx,8
    263 	shl  eax,cl
    264 	or  ebp,eax
    265 	jmp L_align_long
    266 
    267 L_is_aligned:
    268 	mov  edi, [esp+60]
    269 ; 366 "inffast.S"
    270 L_check_mmx:
    271 	cmp  dword ptr [inflate_fast_use_mmx],2
    272 	je   L_init_mmx
    273 	ja   L_do_loop
    274 
    275 	push  eax
    276 	push  ebx
    277 	push  ecx
    278 	push  edx
    279 	pushfd
    280 	mov  eax, [esp]
    281 	xor  dword ptr [esp],0200000h
    282 
    283 
    284 
    285 
    286 	popfd
    287 	pushfd
    288 	pop  edx
    289 	xor  edx,eax
    290 	jz   L_dont_use_mmx
    291 	xor  eax,eax
    292 	cpuid
    293 	cmp  ebx,0756e6547h
    294 	jne  L_dont_use_mmx
    295 	cmp  ecx,06c65746eh
    296 	jne  L_dont_use_mmx
    297 	cmp  edx,049656e69h
    298 	jne  L_dont_use_mmx
    299 	mov  eax,1
    300 	cpuid
    301 	shr  eax,8
    302 	and  eax,15
    303 	cmp  eax,6
    304 	jne  L_dont_use_mmx
    305 	test  edx,0800000h
    306 	jnz  L_use_mmx
    307 	jmp  L_dont_use_mmx
    308 L_use_mmx:
    309 	mov  dword ptr [inflate_fast_use_mmx],2
    310 	jmp  L_check_mmx_pop
    311 L_dont_use_mmx:
    312 	mov  dword ptr [inflate_fast_use_mmx],3
    313 L_check_mmx_pop:
    314 	pop  edx
    315 	pop  ecx
    316 	pop  ebx
    317 	pop  eax
    318 	jmp  L_check_mmx
    319 ; 426 "inffast.S"
    320 ALIGN 4
    321 L_do_loop:
    322 ; 437 "inffast.S"
    323 	cmp  bl,15
    324 	ja   L_get_length_code
    325 
    326 	xor  eax,eax
    327 	lodsw
    328 	mov  cl,bl
    329 	add  bl,16
    330 	shl  eax,cl
    331 	or  ebp,eax
    332 
    333 L_get_length_code:
    334 	mov  edx, [esp+0]
    335 	mov  ecx, [esp+8]
    336 	and  edx,ebp
    337 	mov  eax, [ecx+edx*4]
    338 
    339 L_dolen:
    340 
    341 
    342 
    343 
    344 
    345 
    346 	mov  cl,ah
    347 	sub  bl,ah
    348 	shr  ebp,cl
    349 
    350 
    351 
    352 
    353 
    354 
    355 	test  al,al
    356 	jnz   L_test_for_length_base
    357 
    358 	shr  eax,16
    359 	stosb
    360 
    361 L_while_test:
    362 
    363 
    364 	cmp  [esp+16],edi
    365 	jbe  L_break_loop
    366 
    367 	cmp  [esp+20],esi
    368 	ja   L_do_loop
    369 	jmp  L_break_loop
    370 
    371 L_test_for_length_base:
    372 ; 502 "inffast.S"
    373 	mov  edx,eax
    374 	shr  edx,16
    375 	mov  cl,al
    376 
    377 	test  al,16
    378 	jz   L_test_for_second_level_length
    379 	and  cl,15
    380 	jz   L_save_len
    381 	cmp  bl,cl
    382 	jae  L_add_bits_to_len
    383 
    384 	mov  ch,cl
    385 	xor  eax,eax
    386 	lodsw
    387 	mov  cl,bl
    388 	add  bl,16
    389 	shl  eax,cl
    390 	or  ebp,eax
    391 	mov  cl,ch
    392 
    393 L_add_bits_to_len:
    394 	mov  eax,1
    395 	shl  eax,cl
    396 	dec  eax
    397 	sub  bl,cl
    398 	and  eax,ebp
    399 	shr  ebp,cl
    400 	add  edx,eax
    401 
    402 L_save_len:
    403 	mov  [esp+24],edx
    404 
    405 
    406 L_decode_distance:
    407 ; 549 "inffast.S"
    408 	cmp  bl,15
    409 	ja   L_get_distance_code
    410 
    411 	xor  eax,eax
    412 	lodsw
    413 	mov  cl,bl
    414 	add  bl,16
    415 	shl  eax,cl
    416 	or  ebp,eax
    417 
    418 L_get_distance_code:
    419 	mov  edx, [esp+4]
    420 	mov  ecx, [esp+12]
    421 	and  edx,ebp
    422 	mov  eax, [ecx+edx*4]
    423 
    424 
    425 L_dodist:
    426 	mov  edx,eax
    427 	shr  edx,16
    428 	mov  cl,ah
    429 	sub  bl,ah
    430 	shr  ebp,cl
    431 ; 584 "inffast.S"
    432 	mov  cl,al
    433 
    434 	test  al,16
    435 	jz  L_test_for_second_level_dist
    436 	and  cl,15
    437 	jz  L_check_dist_one
    438 	cmp  bl,cl
    439 	jae  L_add_bits_to_dist
    440 
    441 	mov  ch,cl
    442 	xor  eax,eax
    443 	lodsw
    444 	mov  cl,bl
    445 	add  bl,16
    446 	shl  eax,cl
    447 	or  ebp,eax
    448 	mov  cl,ch
    449 
    450 L_add_bits_to_dist:
    451 	mov  eax,1
    452 	shl  eax,cl
    453 	dec  eax
    454 	sub  bl,cl
    455 	and  eax,ebp
    456 	shr  ebp,cl
    457 	add  edx,eax
    458 	jmp  L_check_window
    459 
    460 L_check_window:
    461 ; 625 "inffast.S"
    462 	mov  [esp+44],esi
    463 	mov  eax,edi
    464 	sub  eax, [esp+40]
    465 
    466 	cmp  eax,edx
    467 	jb   L_clip_window
    468 
    469 	mov  ecx, [esp+24]
    470 	mov  esi,edi
    471 	sub  esi,edx
    472 
    473 	sub  ecx,3
    474 	mov  al, [esi]
    475 	mov  [edi],al
    476 	mov  al, [esi+1]
    477 	mov  dl, [esi+2]
    478 	add  esi,3
    479 	mov  [edi+1],al
    480 	mov  [edi+2],dl
    481 	add  edi,3
    482 	rep movsb
    483 
    484 	mov  esi, [esp+44]
    485 	jmp  L_while_test
    486 
    487 ALIGN 4
    488 L_check_dist_one:
    489 	cmp  edx,1
    490 	jne  L_check_window
    491 	cmp  [esp+40],edi
    492 	je  L_check_window
    493 
    494 	dec  edi
    495 	mov  ecx, [esp+24]
    496 	mov  al, [edi]
    497 	sub  ecx,3
    498 
    499 	mov  [edi+1],al
    500 	mov  [edi+2],al
    501 	mov  [edi+3],al
    502 	add  edi,4
    503 	rep stosb
    504 
    505 	jmp  L_while_test
    506 
    507 ALIGN 4
    508 L_test_for_second_level_length:
    509 
    510 
    511 
    512 
    513 	test  al,64
    514 	jnz   L_test_for_end_of_block
    515 
    516 	mov  eax,1
    517 	shl  eax,cl
    518 	dec  eax
    519 	and  eax,ebp
    520 	add  eax,edx
    521 	mov  edx, [esp+8]
    522 	mov  eax, [edx+eax*4]
    523 	jmp  L_dolen
    524 
    525 ALIGN 4
    526 L_test_for_second_level_dist:
    527 
    528 
    529 
    530 
    531 	test  al,64
    532 	jnz   L_invalid_distance_code
    533 
    534 	mov  eax,1
    535 	shl  eax,cl
    536 	dec  eax
    537 	and  eax,ebp
    538 	add  eax,edx
    539 	mov  edx, [esp+12]
    540 	mov  eax, [edx+eax*4]
    541 	jmp  L_dodist
    542 
    543 ALIGN 4
    544 L_clip_window:
    545 ; 721 "inffast.S"
    546 	mov  ecx,eax
    547 	mov  eax, [esp+52]
    548 	neg  ecx
    549 	mov  esi, [esp+56]
    550 
    551 	cmp  eax,edx
    552 	jb   L_invalid_distance_too_far
    553 
    554 	add  ecx,edx
    555 	cmp  dword ptr [esp+48],0
    556 	jne  L_wrap_around_window
    557 
    558 	sub  eax,ecx
    559 	add  esi,eax
    560 ; 749 "inffast.S"
    561 	mov  eax, [esp+24]
    562 	cmp  eax,ecx
    563 	jbe  L_do_copy1
    564 
    565 	sub  eax,ecx
    566 	rep movsb
    567 	mov  esi,edi
    568 	sub  esi,edx
    569 	jmp  L_do_copy1
    570 
    571 	cmp  eax,ecx
    572 	jbe  L_do_copy1
    573 
    574 	sub  eax,ecx
    575 	rep movsb
    576 	mov  esi,edi
    577 	sub  esi,edx
    578 	jmp  L_do_copy1
    579 
    580 L_wrap_around_window:
    581 ; 793 "inffast.S"
    582 	mov  eax, [esp+48]
    583 	cmp  ecx,eax
    584 	jbe  L_contiguous_in_window
    585 
    586 	add  esi, [esp+52]
    587 	add  esi,eax
    588 	sub  esi,ecx
    589 	sub  ecx,eax
    590 
    591 
    592 	mov  eax, [esp+24]
    593 	cmp  eax,ecx
    594 	jbe  L_do_copy1
    595 
    596 	sub  eax,ecx
    597 	rep movsb
    598 	mov  esi, [esp+56]
    599 	mov  ecx, [esp+48]
    600 	cmp  eax,ecx
    601 	jbe  L_do_copy1
    602 
    603 	sub  eax,ecx
    604 	rep movsb
    605 	mov  esi,edi
    606 	sub  esi,edx
    607 	jmp  L_do_copy1
    608 
    609 L_contiguous_in_window:
    610 ; 836 "inffast.S"
    611 	add  esi,eax
    612 	sub  esi,ecx
    613 
    614 
    615 	mov  eax, [esp+24]
    616 	cmp  eax,ecx
    617 	jbe  L_do_copy1
    618 
    619 	sub  eax,ecx
    620 	rep movsb
    621 	mov  esi,edi
    622 	sub  esi,edx
    623 
    624 L_do_copy1:
    625 ; 862 "inffast.S"
    626 	mov  ecx,eax
    627 	rep movsb
    628 
    629 	mov  esi, [esp+44]
    630 	jmp  L_while_test
    631 ; 878 "inffast.S"
    632 ALIGN 4
    633 L_init_mmx:
    634 	emms
    635 
    636 
    637 
    638 
    639 
    640 	movd mm0,ebp
    641 	mov  ebp,ebx
    642 ; 896 "inffast.S"
    643 	movd mm4,dword ptr [esp+0]
    644 	movq mm3,mm4
    645 	movd mm5,dword ptr [esp+4]
    646 	movq mm2,mm5
    647 	pxor mm1,mm1
    648 	mov  ebx, [esp+8]
    649 	jmp  L_do_loop_mmx
    650 
    651 ALIGN 4
    652 L_do_loop_mmx:
    653 	psrlq mm0,mm1
    654 
    655 	cmp  ebp,32
    656 	ja  L_get_length_code_mmx
    657 
    658 	movd mm6,ebp
    659 	movd mm7,dword ptr [esi]
    660 	add  esi,4
    661 	psllq mm7,mm6
    662 	add  ebp,32
    663 	por mm0,mm7
    664 
    665 L_get_length_code_mmx:
    666 	pand mm4,mm0
    667 	movd eax,mm4
    668 	movq mm4,mm3
    669 	mov  eax, [ebx+eax*4]
    670 
    671 L_dolen_mmx:
    672 	movzx  ecx,ah
    673 	movd mm1,ecx
    674 	sub  ebp,ecx
    675 
    676 	test  al,al
    677 	jnz L_test_for_length_base_mmx
    678 
    679 	shr  eax,16
    680 	stosb
    681 
    682 L_while_test_mmx:
    683 
    684 
    685 	cmp  [esp+16],edi
    686 	jbe L_break_loop
    687 
    688 	cmp  [esp+20],esi
    689 	ja L_do_loop_mmx
    690 	jmp L_break_loop
    691 
    692 L_test_for_length_base_mmx:
    693 
    694 	mov  edx,eax
    695 	shr  edx,16
    696 
    697 	test  al,16
    698 	jz  L_test_for_second_level_length_mmx
    699 	and  eax,15
    700 	jz L_decode_distance_mmx
    701 
    702 	psrlq mm0,mm1
    703 	movd mm1,eax
    704 	movd ecx,mm0
    705 	sub  ebp,eax
    706 	and  ecx, [inflate_fast_mask+eax*4]
    707 	add  edx,ecx
    708 
    709 L_decode_distance_mmx:
    710 	psrlq mm0,mm1
    711 
    712 	cmp  ebp,32
    713 	ja L_get_dist_code_mmx
    714 
    715 	movd mm6,ebp
    716 	movd mm7,dword ptr [esi]
    717 	add  esi,4
    718 	psllq mm7,mm6
    719 	add  ebp,32
    720 	por mm0,mm7
    721 
    722 L_get_dist_code_mmx:
    723 	mov  ebx, [esp+12]
    724 	pand mm5,mm0
    725 	movd eax,mm5
    726 	movq mm5,mm2
    727 	mov  eax, [ebx+eax*4]
    728 
    729 L_dodist_mmx:
    730 
    731 	movzx  ecx,ah
    732 	mov  ebx,eax
    733 	shr  ebx,16
    734 	sub  ebp,ecx
    735 	movd mm1,ecx
    736 
    737 	test  al,16
    738 	jz L_test_for_second_level_dist_mmx
    739 	and  eax,15
    740 	jz L_check_dist_one_mmx
    741 
    742 L_add_bits_to_dist_mmx:
    743 	psrlq mm0,mm1
    744 	movd mm1,eax
    745 	movd ecx,mm0
    746 	sub  ebp,eax
    747 	and  ecx, [inflate_fast_mask+eax*4]
    748 	add  ebx,ecx
    749 
    750 L_check_window_mmx:
    751 	mov  [esp+44],esi
    752 	mov  eax,edi
    753 	sub  eax, [esp+40]
    754 
    755 	cmp  eax,ebx
    756 	jb L_clip_window_mmx
    757 
    758 	mov  ecx,edx
    759 	mov  esi,edi
    760 	sub  esi,ebx
    761 
    762 	sub  ecx,3
    763 	mov  al, [esi]
    764 	mov  [edi],al
    765 	mov  al, [esi+1]
    766 	mov  dl, [esi+2]
    767 	add  esi,3
    768 	mov  [edi+1],al
    769 	mov  [edi+2],dl
    770 	add  edi,3
    771 	rep movsb
    772 
    773 	mov  esi, [esp+44]
    774 	mov  ebx, [esp+8]
    775 	jmp  L_while_test_mmx
    776 
    777 ALIGN 4
    778 L_check_dist_one_mmx:
    779 	cmp  ebx,1
    780 	jne  L_check_window_mmx
    781 	cmp  [esp+40],edi
    782 	je   L_check_window_mmx
    783 
    784 	dec  edi
    785 	mov  ecx,edx
    786 	mov  al, [edi]
    787 	sub  ecx,3
    788 
    789 	mov  [edi+1],al
    790 	mov  [edi+2],al
    791 	mov  [edi+3],al
    792 	add  edi,4
    793 	rep stosb
    794 
    795 	mov  ebx, [esp+8]
    796 	jmp  L_while_test_mmx
    797 
    798 ALIGN 4
    799 L_test_for_second_level_length_mmx:
    800 	test  al,64
    801 	jnz L_test_for_end_of_block
    802 
    803 	and  eax,15
    804 	psrlq mm0,mm1
    805 	movd ecx,mm0
    806 	and  ecx, [inflate_fast_mask+eax*4]
    807 	add  ecx,edx
    808 	mov  eax, [ebx+ecx*4]
    809 	jmp L_dolen_mmx
    810 
    811 ALIGN 4
    812 L_test_for_second_level_dist_mmx:
    813 	test  al,64
    814 	jnz L_invalid_distance_code
    815 
    816 	and  eax,15
    817 	psrlq mm0,mm1
    818 	movd ecx,mm0
    819 	and  ecx, [inflate_fast_mask+eax*4]
    820 	mov  eax, [esp+12]
    821 	add  ecx,ebx
    822 	mov  eax, [eax+ecx*4]
    823 	jmp  L_dodist_mmx
    824 
    825 ALIGN 4
    826 L_clip_window_mmx:
    827 
    828 	mov  ecx,eax
    829 	mov  eax, [esp+52]
    830 	neg  ecx
    831 	mov  esi, [esp+56]
    832 
    833 	cmp  eax,ebx
    834 	jb  L_invalid_distance_too_far
    835 
    836 	add  ecx,ebx
    837 	cmp  dword ptr [esp+48],0
    838 	jne  L_wrap_around_window_mmx
    839 
    840 	sub  eax,ecx
    841 	add  esi,eax
    842 
    843 	cmp  edx,ecx
    844 	jbe  L_do_copy1_mmx
    845 
    846 	sub  edx,ecx
    847 	rep movsb
    848 	mov  esi,edi
    849 	sub  esi,ebx
    850 	jmp  L_do_copy1_mmx
    851 
    852 	cmp  edx,ecx
    853 	jbe  L_do_copy1_mmx
    854 
    855 	sub  edx,ecx
    856 	rep movsb
    857 	mov  esi,edi
    858 	sub  esi,ebx
    859 	jmp  L_do_copy1_mmx
    860 
    861 L_wrap_around_window_mmx:
    862 
    863 	mov  eax, [esp+48]
    864 	cmp  ecx,eax
    865 	jbe  L_contiguous_in_window_mmx
    866 
    867 	add  esi, [esp+52]
    868 	add  esi,eax
    869 	sub  esi,ecx
    870 	sub  ecx,eax
    871 
    872 
    873 	cmp  edx,ecx
    874 	jbe  L_do_copy1_mmx
    875 
    876 	sub  edx,ecx
    877 	rep movsb
    878 	mov  esi, [esp+56]
    879 	mov  ecx, [esp+48]
    880 	cmp  edx,ecx
    881 	jbe  L_do_copy1_mmx
    882 
    883 	sub  edx,ecx
    884 	rep movsb
    885 	mov  esi,edi
    886 	sub  esi,ebx
    887 	jmp  L_do_copy1_mmx
    888 
    889 L_contiguous_in_window_mmx:
    890 
    891 	add  esi,eax
    892 	sub  esi,ecx
    893 
    894 
    895 	cmp  edx,ecx
    896 	jbe  L_do_copy1_mmx
    897 
    898 	sub  edx,ecx
    899 	rep movsb
    900 	mov  esi,edi
    901 	sub  esi,ebx
    902 
    903 L_do_copy1_mmx:
    904 
    905 
    906 	mov  ecx,edx
    907 	rep movsb
    908 
    909 	mov  esi, [esp+44]
    910 	mov  ebx, [esp+8]
    911 	jmp  L_while_test_mmx
    912 ; 1174 "inffast.S"
    913 L_invalid_distance_code:
    914 
    915 
    916 
    917 
    918 
    919 	mov  ecx, invalid_distance_code_msg
    920 	mov  edx,INFLATE_MODE_BAD
    921 	jmp  L_update_stream_state
    922 
    923 L_test_for_end_of_block:
    924 
    925 
    926 
    927 
    928 
    929 	test  al,32
    930 	jz  L_invalid_literal_length_code
    931 
    932 	mov  ecx,0
    933 	mov  edx,INFLATE_MODE_TYPE
    934 	jmp  L_update_stream_state
    935 
    936 L_invalid_literal_length_code:
    937 
    938 
    939 
    940 
    941 
    942 	mov  ecx, invalid_literal_length_code_msg
    943 	mov  edx,INFLATE_MODE_BAD
    944 	jmp  L_update_stream_state
    945 
    946 L_invalid_distance_too_far:
    947 
    948 
    949 
    950 	mov  esi, [esp+44]
    951 	mov  ecx, invalid_distance_too_far_msg
    952 	mov  edx,INFLATE_MODE_BAD
    953 	jmp  L_update_stream_state
    954 
    955 L_update_stream_state:
    956 
    957 	mov  eax, [esp+88]
    958 	test  ecx,ecx
    959 	jz  L_skip_msg
    960 	mov  [eax+24],ecx
    961 L_skip_msg:
    962 	mov  eax, [eax+28]
    963 	mov  [eax+mode_state],edx
    964 	jmp  L_break_loop
    965 
    966 ALIGN 4
    967 L_break_loop:
    968 ; 1243 "inffast.S"
    969 	cmp  dword ptr [inflate_fast_use_mmx],2
    970 	jne  L_update_next_in
    971 
    972 
    973 
    974 	mov  ebx,ebp
    975 
    976 L_update_next_in:
    977 ; 1266 "inffast.S"
    978 	mov  eax, [esp+88]
    979 	mov  ecx,ebx
    980 	mov  edx, [eax+28]
    981 	shr  ecx,3
    982 	sub  esi,ecx
    983 	shl  ecx,3
    984 	sub  ebx,ecx
    985 	mov  [eax+12],edi
    986 	mov  [edx+bits_state],ebx
    987 	mov  ecx,ebx
    988 
    989 	lea  ebx, [esp+28]
    990 	cmp  [esp+20],ebx
    991 	jne  L_buf_not_used
    992 
    993 	sub  esi,ebx
    994 	mov  ebx, [eax+0]
    995 	mov  [esp+20],ebx
    996 	add  esi,ebx
    997 	mov  ebx, [eax+4]
    998 	sub  ebx,11
    999 	add  [esp+20],ebx
   1000 
   1001 L_buf_not_used:
   1002 	mov  [eax+0],esi
   1003 
   1004 	mov  ebx,1
   1005 	shl  ebx,cl
   1006 	dec  ebx
   1007 
   1008 
   1009 
   1010 
   1011 
   1012 	cmp  dword ptr [inflate_fast_use_mmx],2
   1013 	jne  L_update_hold
   1014 
   1015 
   1016 
   1017 	psrlq mm0,mm1
   1018 	movd ebp,mm0
   1019 
   1020 	emms
   1021 
   1022 L_update_hold:
   1023 
   1024 
   1025 
   1026 	and  ebp,ebx
   1027 	mov  [edx+hold_state],ebp
   1028 
   1029 
   1030 
   1031 
   1032 	mov  ebx, [esp+20]
   1033 	cmp  ebx,esi
   1034 	jbe  L_last_is_smaller
   1035 
   1036 	sub  ebx,esi
   1037 	add  ebx,11
   1038 	mov  [eax+4],ebx
   1039 	jmp  L_fixup_out
   1040 L_last_is_smaller:
   1041 	sub  esi,ebx
   1042 	neg  esi
   1043 	add  esi,11
   1044 	mov  [eax+4],esi
   1045 
   1046 
   1047 
   1048 
   1049 L_fixup_out:
   1050 
   1051 	mov  ebx, [esp+16]
   1052 	cmp  ebx,edi
   1053 	jbe  L_end_is_smaller
   1054 
   1055 	sub  ebx,edi
   1056 	add  ebx,257
   1057 	mov  [eax+16],ebx
   1058 	jmp  L_done
   1059 L_end_is_smaller:
   1060 	sub  edi,ebx
   1061 	neg  edi
   1062 	add  edi,257
   1063 	mov  [eax+16],edi
   1064 
   1065 
   1066 
   1067 
   1068 
   1069 L_done:
   1070 	add  esp,64
   1071 	popfd
   1072 	pop  ebx
   1073 	pop  ebp
   1074 	pop  esi
   1075 	pop  edi
   1076 	ret
   1077 _inflate_fast endp
   1078 
   1079 _TEXT	ends
   1080 end
   1081