Home | History | Annotate | Download | only in masmx86
      1 ;/* inffas32.asm is a hand tuned assembler version of inffast.c -- fast decoding
      2 ; *
      3 ; * inffas32.asm is derivated from inffas86.c, with translation of assembly code
      4 ; *
      5 ; * Copyright (C) 1995-2003 Mark Adler
      6 ; * For conditions of distribution and use, see copyright notice in zlib.h
      7 ; *
      8 ; * Copyright (C) 2003 Chris Anderson <christop (a] charm.net>
      9 ; * Please use the copyright conditions above.
     10 ; *
     11 ; * Mar-13-2003 -- Most of this is derived from inffast.S which is derived from
     12 ; * the gcc -S output of zlib-1.2.0/inffast.c.  Zlib-1.2.0 is in beta release at
     13 ; * the moment.  I have successfully compiled and tested this code with gcc2.96,
     14 ; * gcc3.2, icc5.0, msvc6.0.  It is very close to the speed of inffast.S
     15 ; * compiled with gcc -DNO_MMX, but inffast.S is still faster on the P3 with MMX
     16 ; * enabled.  I will attempt to merge the MMX code into this version.  Newer
     17 ; * versions of this and inffast.S can be found at
     18 ; * http://www.eetbeetee.com/zlib/ and http://www.charm.net/~christop/zlib/
     19 ; *
     20 ; * 2005 : modification by Gilles Vollant
     21 ; */
     22 ; For Visual C++ 4.x and higher and ML 6.x and higher
     23 ;   ml.exe is in directory \MASM611C of Win95 DDK
     24 ;   ml.exe is also distributed in http://www.masm32.com/masmdl.htm
     25 ;    and in VC++2003 toolkit at http://msdn.microsoft.com/visualc/vctoolkit2003/
     26 ;
     27 ;
     28 ;   compile with command line option
     29 ;   ml  /coff /Zi /c /Flinffas32.lst inffas32.asm
     30 
     31 ;   if you define NO_GZIP (see inflate.h), compile with
     32 ;   ml  /coff /Zi /c /Flinffas32.lst /DNO_GUNZIP inffas32.asm
     33 
     34 
     35 ; zlib122sup is 0 fort zlib 1.2.2.1 and lower
     36 ; zlib122sup is 8 fort zlib 1.2.2.2 and more (with addition of dmax and head
     37 ;        in inflate_state in inflate.h)
     38 zlib1222sup      equ    8
     39 
     40 
     41 IFDEF GUNZIP
     42   INFLATE_MODE_TYPE    equ 11
     43   INFLATE_MODE_BAD     equ 26
     44 ELSE
     45   IFNDEF NO_GUNZIP
     46     INFLATE_MODE_TYPE    equ 11
     47     INFLATE_MODE_BAD     equ 26
     48   ELSE
     49     INFLATE_MODE_TYPE    equ 3
     50     INFLATE_MODE_BAD     equ 17
     51   ENDIF
     52 ENDIF
     53 
     54 
     55 ; 75 "inffast.S"
     56 ;FILE "inffast.S"
     57 
     58 ;;;GLOBAL _inflate_fast
     59 
     60 ;;;SECTION .text
     61 
     62 
     63 
     64 	.586p
     65 	.mmx
     66 
     67 	name	inflate_fast_x86
     68 	.MODEL	FLAT
     69 
     70 _DATA			segment
     71 inflate_fast_use_mmx:
     72 	dd	1
     73 
     74 
     75 _TEXT			segment
     76 PUBLIC _inflate_fast
     77 
     78 ALIGN 4
     79 _inflate_fast:
     80 	jmp inflate_fast_entry
     81 
     82 
     83 
     84 ALIGN 4
     85 	db	'Fast decoding Code from Chris Anderson'
     86 	db	0
     87 
     88 ALIGN 4
     89 invalid_literal_length_code_msg:
     90 	db	'invalid literal/length code'
     91 	db	0
     92 
     93 ALIGN 4
     94 invalid_distance_code_msg:
     95 	db	'invalid distance code'
     96 	db	0
     97 
     98 ALIGN 4
     99 invalid_distance_too_far_msg:
    100 	db	'invalid distance too far back'
    101 	db	0
    102 
    103 
    104 ALIGN 4
    105 inflate_fast_mask:
    106 dd	0
    107 dd	1
    108 dd	3
    109 dd	7
    110 dd	15
    111 dd	31
    112 dd	63
    113 dd	127
    114 dd	255
    115 dd	511
    116 dd	1023
    117 dd	2047
    118 dd	4095
    119 dd	8191
    120 dd	16383
    121 dd	32767
    122 dd	65535
    123 dd	131071
    124 dd	262143
    125 dd	524287
    126 dd	1048575
    127 dd	2097151
    128 dd	4194303
    129 dd	8388607
    130 dd	16777215
    131 dd	33554431
    132 dd	67108863
    133 dd	134217727
    134 dd	268435455
    135 dd	536870911
    136 dd	1073741823
    137 dd	2147483647
    138 dd	4294967295
    139 
    140 
    141 mode_state	 equ	0	;/* state->mode	*/
    142 wsize_state	 equ	(32+zlib1222sup)	;/* state->wsize */
    143 write_state	 equ	(36+4+zlib1222sup)	;/* state->write */
    144 window_state	 equ	(40+4+zlib1222sup)	;/* state->window */
    145 hold_state	 equ	(44+4+zlib1222sup)	;/* state->hold	*/
    146 bits_state	 equ	(48+4+zlib1222sup)	;/* state->bits	*/
    147 lencode_state	 equ	(64+4+zlib1222sup)	;/* state->lencode */
    148 distcode_state	 equ	(68+4+zlib1222sup)	;/* state->distcode */
    149 lenbits_state	 equ	(72+4+zlib1222sup)	;/* state->lenbits */
    150 distbits_state	 equ	(76+4+zlib1222sup)	;/* state->distbits */
    151 
    152 
    153 ;;SECTION .text
    154 ; 205 "inffast.S"
    155 ;GLOBAL	inflate_fast_use_mmx
    156 
    157 ;SECTION .data
    158 
    159 
    160 ; GLOBAL inflate_fast_use_mmx:object
    161 ;.size inflate_fast_use_mmx, 4
    162 ; 226 "inffast.S"
    163 ;SECTION .text
    164 
    165 ALIGN 4
    166 inflate_fast_entry:
    167 	push  edi
    168 	push  esi
    169 	push  ebp
    170 	push  ebx
    171 	pushfd
    172 	sub  esp,64
    173 	cld
    174 
    175 
    176 
    177 
    178 	mov  esi, [esp+88]
    179 	mov  edi, [esi+28]
    180 
    181 
    182 
    183 
    184 
    185 
    186 
    187 	mov  edx, [esi+4]
    188 	mov  eax, [esi+0]
    189 
    190 	add  edx,eax
    191 	sub  edx,11
    192 
    193 	mov  [esp+44],eax
    194 	mov  [esp+20],edx
    195 
    196 	mov  ebp, [esp+92]
    197 	mov  ecx, [esi+16]
    198 	mov  ebx, [esi+12]
    199 
    200 	sub  ebp,ecx
    201 	neg  ebp
    202 	add  ebp,ebx
    203 
    204 	sub  ecx,257
    205 	add  ecx,ebx
    206 
    207 	mov  [esp+60],ebx
    208 	mov  [esp+40],ebp
    209 	mov  [esp+16],ecx
    210 ; 285 "inffast.S"
    211 	mov  eax, [edi+lencode_state]
    212 	mov  ecx, [edi+distcode_state]
    213 
    214 	mov  [esp+8],eax
    215 	mov  [esp+12],ecx
    216 
    217 	mov  eax,1
    218 	mov  ecx, [edi+lenbits_state]
    219 	shl  eax,cl
    220 	dec  eax
    221 	mov  [esp+0],eax
    222 
    223 	mov  eax,1
    224 	mov  ecx, [edi+distbits_state]
    225 	shl  eax,cl
    226 	dec  eax
    227 	mov  [esp+4],eax
    228 
    229 	mov  eax, [edi+wsize_state]
    230 	mov  ecx, [edi+write_state]
    231 	mov  edx, [edi+window_state]
    232 
    233 	mov  [esp+52],eax
    234 	mov  [esp+48],ecx
    235 	mov  [esp+56],edx
    236 
    237 	mov  ebp, [edi+hold_state]
    238 	mov  ebx, [edi+bits_state]
    239 ; 321 "inffast.S"
    240 	mov  esi, [esp+44]
    241 	mov  ecx, [esp+20]
    242 	cmp  ecx,esi
    243 	ja   L_align_long
    244 
    245 	add  ecx,11
    246 	sub  ecx,esi
    247 	mov  eax,12
    248 	sub  eax,ecx
    249 	lea  edi, [esp+28]
    250 	rep movsb
    251 	mov  ecx,eax
    252 	xor  eax,eax
    253 	rep stosb
    254 	lea  esi, [esp+28]
    255 	mov  [esp+20],esi
    256 	jmp  L_is_aligned
    257 
    258 
    259 L_align_long:
    260 	test  esi,3
    261 	jz   L_is_aligned
    262 	xor  eax,eax
    263 	mov  al, [esi]
    264 	inc  esi
    265 	mov  ecx,ebx
    266 	add  ebx,8
    267 	shl  eax,cl
    268 	or  ebp,eax
    269 	jmp L_align_long
    270 
    271 L_is_aligned:
    272 	mov  edi, [esp+60]
    273 ; 366 "inffast.S"
    274 L_check_mmx:
    275 	cmp  dword ptr [inflate_fast_use_mmx],2
    276 	je   L_init_mmx
    277 	ja   L_do_loop
    278 
    279 	push  eax
    280 	push  ebx
    281 	push  ecx
    282 	push  edx
    283 	pushfd
    284 	mov  eax, [esp]
    285 	xor  dword ptr [esp],0200000h
    286 
    287 
    288 
    289 
    290 	popfd
    291 	pushfd
    292 	pop  edx
    293 	xor  edx,eax
    294 	jz   L_dont_use_mmx
    295 	xor  eax,eax
    296 	cpuid
    297 	cmp  ebx,0756e6547h
    298 	jne  L_dont_use_mmx
    299 	cmp  ecx,06c65746eh
    300 	jne  L_dont_use_mmx
    301 	cmp  edx,049656e69h
    302 	jne  L_dont_use_mmx
    303 	mov  eax,1
    304 	cpuid
    305 	shr  eax,8
    306 	and  eax,15
    307 	cmp  eax,6
    308 	jne  L_dont_use_mmx
    309 	test  edx,0800000h
    310 	jnz  L_use_mmx
    311 	jmp  L_dont_use_mmx
    312 L_use_mmx:
    313 	mov  dword ptr [inflate_fast_use_mmx],2
    314 	jmp  L_check_mmx_pop
    315 L_dont_use_mmx:
    316 	mov  dword ptr [inflate_fast_use_mmx],3
    317 L_check_mmx_pop:
    318 	pop  edx
    319 	pop  ecx
    320 	pop  ebx
    321 	pop  eax
    322 	jmp  L_check_mmx
    323 ; 426 "inffast.S"
    324 ALIGN 4
    325 L_do_loop:
    326 ; 437 "inffast.S"
    327 	cmp  bl,15
    328 	ja   L_get_length_code
    329 
    330 	xor  eax,eax
    331 	lodsw
    332 	mov  cl,bl
    333 	add  bl,16
    334 	shl  eax,cl
    335 	or  ebp,eax
    336 
    337 L_get_length_code:
    338 	mov  edx, [esp+0]
    339 	mov  ecx, [esp+8]
    340 	and  edx,ebp
    341 	mov  eax, [ecx+edx*4]
    342 
    343 L_dolen:
    344 
    345 
    346 
    347 
    348 
    349 
    350 	mov  cl,ah
    351 	sub  bl,ah
    352 	shr  ebp,cl
    353 
    354 
    355 
    356 
    357 
    358 
    359 	test  al,al
    360 	jnz   L_test_for_length_base
    361 
    362 	shr  eax,16
    363 	stosb
    364 
    365 L_while_test:
    366 
    367 
    368 	cmp  [esp+16],edi
    369 	jbe  L_break_loop
    370 
    371 	cmp  [esp+20],esi
    372 	ja   L_do_loop
    373 	jmp  L_break_loop
    374 
    375 L_test_for_length_base:
    376 ; 502 "inffast.S"
    377 	mov  edx,eax
    378 	shr  edx,16
    379 	mov  cl,al
    380 
    381 	test  al,16
    382 	jz   L_test_for_second_level_length
    383 	and  cl,15
    384 	jz   L_save_len
    385 	cmp  bl,cl
    386 	jae  L_add_bits_to_len
    387 
    388 	mov  ch,cl
    389 	xor  eax,eax
    390 	lodsw
    391 	mov  cl,bl
    392 	add  bl,16
    393 	shl  eax,cl
    394 	or  ebp,eax
    395 	mov  cl,ch
    396 
    397 L_add_bits_to_len:
    398 	mov  eax,1
    399 	shl  eax,cl
    400 	dec  eax
    401 	sub  bl,cl
    402 	and  eax,ebp
    403 	shr  ebp,cl
    404 	add  edx,eax
    405 
    406 L_save_len:
    407 	mov  [esp+24],edx
    408 
    409 
    410 L_decode_distance:
    411 ; 549 "inffast.S"
    412 	cmp  bl,15
    413 	ja   L_get_distance_code
    414 
    415 	xor  eax,eax
    416 	lodsw
    417 	mov  cl,bl
    418 	add  bl,16
    419 	shl  eax,cl
    420 	or  ebp,eax
    421 
    422 L_get_distance_code:
    423 	mov  edx, [esp+4]
    424 	mov  ecx, [esp+12]
    425 	and  edx,ebp
    426 	mov  eax, [ecx+edx*4]
    427 
    428 
    429 L_dodist:
    430 	mov  edx,eax
    431 	shr  edx,16
    432 	mov  cl,ah
    433 	sub  bl,ah
    434 	shr  ebp,cl
    435 ; 584 "inffast.S"
    436 	mov  cl,al
    437 
    438 	test  al,16
    439 	jz  L_test_for_second_level_dist
    440 	and  cl,15
    441 	jz  L_check_dist_one
    442 	cmp  bl,cl
    443 	jae  L_add_bits_to_dist
    444 
    445 	mov  ch,cl
    446 	xor  eax,eax
    447 	lodsw
    448 	mov  cl,bl
    449 	add  bl,16
    450 	shl  eax,cl
    451 	or  ebp,eax
    452 	mov  cl,ch
    453 
    454 L_add_bits_to_dist:
    455 	mov  eax,1
    456 	shl  eax,cl
    457 	dec  eax
    458 	sub  bl,cl
    459 	and  eax,ebp
    460 	shr  ebp,cl
    461 	add  edx,eax
    462 	jmp  L_check_window
    463 
    464 L_check_window:
    465 ; 625 "inffast.S"
    466 	mov  [esp+44],esi
    467 	mov  eax,edi
    468 	sub  eax, [esp+40]
    469 
    470 	cmp  eax,edx
    471 	jb   L_clip_window
    472 
    473 	mov  ecx, [esp+24]
    474 	mov  esi,edi
    475 	sub  esi,edx
    476 
    477 	sub  ecx,3
    478 	mov  al, [esi]
    479 	mov  [edi],al
    480 	mov  al, [esi+1]
    481 	mov  dl, [esi+2]
    482 	add  esi,3
    483 	mov  [edi+1],al
    484 	mov  [edi+2],dl
    485 	add  edi,3
    486 	rep movsb
    487 
    488 	mov  esi, [esp+44]
    489 	jmp  L_while_test
    490 
    491 ALIGN 4
    492 L_check_dist_one:
    493 	cmp  edx,1
    494 	jne  L_check_window
    495 	cmp  [esp+40],edi
    496 	je  L_check_window
    497 
    498 	dec  edi
    499 	mov  ecx, [esp+24]
    500 	mov  al, [edi]
    501 	sub  ecx,3
    502 
    503 	mov  [edi+1],al
    504 	mov  [edi+2],al
    505 	mov  [edi+3],al
    506 	add  edi,4
    507 	rep stosb
    508 
    509 	jmp  L_while_test
    510 
    511 ALIGN 4
    512 L_test_for_second_level_length:
    513 
    514 
    515 
    516 
    517 	test  al,64
    518 	jnz   L_test_for_end_of_block
    519 
    520 	mov  eax,1
    521 	shl  eax,cl
    522 	dec  eax
    523 	and  eax,ebp
    524 	add  eax,edx
    525 	mov  edx, [esp+8]
    526 	mov  eax, [edx+eax*4]
    527 	jmp  L_dolen
    528 
    529 ALIGN 4
    530 L_test_for_second_level_dist:
    531 
    532 
    533 
    534 
    535 	test  al,64
    536 	jnz   L_invalid_distance_code
    537 
    538 	mov  eax,1
    539 	shl  eax,cl
    540 	dec  eax
    541 	and  eax,ebp
    542 	add  eax,edx
    543 	mov  edx, [esp+12]
    544 	mov  eax, [edx+eax*4]
    545 	jmp  L_dodist
    546 
    547 ALIGN 4
    548 L_clip_window:
    549 ; 721 "inffast.S"
    550 	mov  ecx,eax
    551 	mov  eax, [esp+52]
    552 	neg  ecx
    553 	mov  esi, [esp+56]
    554 
    555 	cmp  eax,edx
    556 	jb   L_invalid_distance_too_far
    557 
    558 	add  ecx,edx
    559 	cmp  dword ptr [esp+48],0
    560 	jne  L_wrap_around_window
    561 
    562 	sub  eax,ecx
    563 	add  esi,eax
    564 ; 749 "inffast.S"
    565 	mov  eax, [esp+24]
    566 	cmp  eax,ecx
    567 	jbe  L_do_copy1
    568 
    569 	sub  eax,ecx
    570 	rep movsb
    571 	mov  esi,edi
    572 	sub  esi,edx
    573 	jmp  L_do_copy1
    574 
    575 	cmp  eax,ecx
    576 	jbe  L_do_copy1
    577 
    578 	sub  eax,ecx
    579 	rep movsb
    580 	mov  esi,edi
    581 	sub  esi,edx
    582 	jmp  L_do_copy1
    583 
    584 L_wrap_around_window:
    585 ; 793 "inffast.S"
    586 	mov  eax, [esp+48]
    587 	cmp  ecx,eax
    588 	jbe  L_contiguous_in_window
    589 
    590 	add  esi, [esp+52]
    591 	add  esi,eax
    592 	sub  esi,ecx
    593 	sub  ecx,eax
    594 
    595 
    596 	mov  eax, [esp+24]
    597 	cmp  eax,ecx
    598 	jbe  L_do_copy1
    599 
    600 	sub  eax,ecx
    601 	rep movsb
    602 	mov  esi, [esp+56]
    603 	mov  ecx, [esp+48]
    604 	cmp  eax,ecx
    605 	jbe  L_do_copy1
    606 
    607 	sub  eax,ecx
    608 	rep movsb
    609 	mov  esi,edi
    610 	sub  esi,edx
    611 	jmp  L_do_copy1
    612 
    613 L_contiguous_in_window:
    614 ; 836 "inffast.S"
    615 	add  esi,eax
    616 	sub  esi,ecx
    617 
    618 
    619 	mov  eax, [esp+24]
    620 	cmp  eax,ecx
    621 	jbe  L_do_copy1
    622 
    623 	sub  eax,ecx
    624 	rep movsb
    625 	mov  esi,edi
    626 	sub  esi,edx
    627 
    628 L_do_copy1:
    629 ; 862 "inffast.S"
    630 	mov  ecx,eax
    631 	rep movsb
    632 
    633 	mov  esi, [esp+44]
    634 	jmp  L_while_test
    635 ; 878 "inffast.S"
    636 ALIGN 4
    637 L_init_mmx:
    638 	emms
    639 
    640 
    641 
    642 
    643 
    644 	movd mm0,ebp
    645 	mov  ebp,ebx
    646 ; 896 "inffast.S"
    647 	movd mm4,dword ptr [esp+0]
    648 	movq mm3,mm4
    649 	movd mm5,dword ptr [esp+4]
    650 	movq mm2,mm5
    651 	pxor mm1,mm1
    652 	mov  ebx, [esp+8]
    653 	jmp  L_do_loop_mmx
    654 
    655 ALIGN 4
    656 L_do_loop_mmx:
    657 	psrlq mm0,mm1
    658 
    659 	cmp  ebp,32
    660 	ja  L_get_length_code_mmx
    661 
    662 	movd mm6,ebp
    663 	movd mm7,dword ptr [esi]
    664 	add  esi,4
    665 	psllq mm7,mm6
    666 	add  ebp,32
    667 	por mm0,mm7
    668 
    669 L_get_length_code_mmx:
    670 	pand mm4,mm0
    671 	movd eax,mm4
    672 	movq mm4,mm3
    673 	mov  eax, [ebx+eax*4]
    674 
    675 L_dolen_mmx:
    676 	movzx  ecx,ah
    677 	movd mm1,ecx
    678 	sub  ebp,ecx
    679 
    680 	test  al,al
    681 	jnz L_test_for_length_base_mmx
    682 
    683 	shr  eax,16
    684 	stosb
    685 
    686 L_while_test_mmx:
    687 
    688 
    689 	cmp  [esp+16],edi
    690 	jbe L_break_loop
    691 
    692 	cmp  [esp+20],esi
    693 	ja L_do_loop_mmx
    694 	jmp L_break_loop
    695 
    696 L_test_for_length_base_mmx:
    697 
    698 	mov  edx,eax
    699 	shr  edx,16
    700 
    701 	test  al,16
    702 	jz  L_test_for_second_level_length_mmx
    703 	and  eax,15
    704 	jz L_decode_distance_mmx
    705 
    706 	psrlq mm0,mm1
    707 	movd mm1,eax
    708 	movd ecx,mm0
    709 	sub  ebp,eax
    710 	and  ecx, [inflate_fast_mask+eax*4]
    711 	add  edx,ecx
    712 
    713 L_decode_distance_mmx:
    714 	psrlq mm0,mm1
    715 
    716 	cmp  ebp,32
    717 	ja L_get_dist_code_mmx
    718 
    719 	movd mm6,ebp
    720 	movd mm7,dword ptr [esi]
    721 	add  esi,4
    722 	psllq mm7,mm6
    723 	add  ebp,32
    724 	por mm0,mm7
    725 
    726 L_get_dist_code_mmx:
    727 	mov  ebx, [esp+12]
    728 	pand mm5,mm0
    729 	movd eax,mm5
    730 	movq mm5,mm2
    731 	mov  eax, [ebx+eax*4]
    732 
    733 L_dodist_mmx:
    734 
    735 	movzx  ecx,ah
    736 	mov  ebx,eax
    737 	shr  ebx,16
    738 	sub  ebp,ecx
    739 	movd mm1,ecx
    740 
    741 	test  al,16
    742 	jz L_test_for_second_level_dist_mmx
    743 	and  eax,15
    744 	jz L_check_dist_one_mmx
    745 
    746 L_add_bits_to_dist_mmx:
    747 	psrlq mm0,mm1
    748 	movd mm1,eax
    749 	movd ecx,mm0
    750 	sub  ebp,eax
    751 	and  ecx, [inflate_fast_mask+eax*4]
    752 	add  ebx,ecx
    753 
    754 L_check_window_mmx:
    755 	mov  [esp+44],esi
    756 	mov  eax,edi
    757 	sub  eax, [esp+40]
    758 
    759 	cmp  eax,ebx
    760 	jb L_clip_window_mmx
    761 
    762 	mov  ecx,edx
    763 	mov  esi,edi
    764 	sub  esi,ebx
    765 
    766 	sub  ecx,3
    767 	mov  al, [esi]
    768 	mov  [edi],al
    769 	mov  al, [esi+1]
    770 	mov  dl, [esi+2]
    771 	add  esi,3
    772 	mov  [edi+1],al
    773 	mov  [edi+2],dl
    774 	add  edi,3
    775 	rep movsb
    776 
    777 	mov  esi, [esp+44]
    778 	mov  ebx, [esp+8]
    779 	jmp  L_while_test_mmx
    780 
    781 ALIGN 4
    782 L_check_dist_one_mmx:
    783 	cmp  ebx,1
    784 	jne  L_check_window_mmx
    785 	cmp  [esp+40],edi
    786 	je   L_check_window_mmx
    787 
    788 	dec  edi
    789 	mov  ecx,edx
    790 	mov  al, [edi]
    791 	sub  ecx,3
    792 
    793 	mov  [edi+1],al
    794 	mov  [edi+2],al
    795 	mov  [edi+3],al
    796 	add  edi,4
    797 	rep stosb
    798 
    799 	mov  ebx, [esp+8]
    800 	jmp  L_while_test_mmx
    801 
    802 ALIGN 4
    803 L_test_for_second_level_length_mmx:
    804 	test  al,64
    805 	jnz L_test_for_end_of_block
    806 
    807 	and  eax,15
    808 	psrlq mm0,mm1
    809 	movd ecx,mm0
    810 	and  ecx, [inflate_fast_mask+eax*4]
    811 	add  ecx,edx
    812 	mov  eax, [ebx+ecx*4]
    813 	jmp L_dolen_mmx
    814 
    815 ALIGN 4
    816 L_test_for_second_level_dist_mmx:
    817 	test  al,64
    818 	jnz L_invalid_distance_code
    819 
    820 	and  eax,15
    821 	psrlq mm0,mm1
    822 	movd ecx,mm0
    823 	and  ecx, [inflate_fast_mask+eax*4]
    824 	mov  eax, [esp+12]
    825 	add  ecx,ebx
    826 	mov  eax, [eax+ecx*4]
    827 	jmp  L_dodist_mmx
    828 
    829 ALIGN 4
    830 L_clip_window_mmx:
    831 
    832 	mov  ecx,eax
    833 	mov  eax, [esp+52]
    834 	neg  ecx
    835 	mov  esi, [esp+56]
    836 
    837 	cmp  eax,ebx
    838 	jb  L_invalid_distance_too_far
    839 
    840 	add  ecx,ebx
    841 	cmp  dword ptr [esp+48],0
    842 	jne  L_wrap_around_window_mmx
    843 
    844 	sub  eax,ecx
    845 	add  esi,eax
    846 
    847 	cmp  edx,ecx
    848 	jbe  L_do_copy1_mmx
    849 
    850 	sub  edx,ecx
    851 	rep movsb
    852 	mov  esi,edi
    853 	sub  esi,ebx
    854 	jmp  L_do_copy1_mmx
    855 
    856 	cmp  edx,ecx
    857 	jbe  L_do_copy1_mmx
    858 
    859 	sub  edx,ecx
    860 	rep movsb
    861 	mov  esi,edi
    862 	sub  esi,ebx
    863 	jmp  L_do_copy1_mmx
    864 
    865 L_wrap_around_window_mmx:
    866 
    867 	mov  eax, [esp+48]
    868 	cmp  ecx,eax
    869 	jbe  L_contiguous_in_window_mmx
    870 
    871 	add  esi, [esp+52]
    872 	add  esi,eax
    873 	sub  esi,ecx
    874 	sub  ecx,eax
    875 
    876 
    877 	cmp  edx,ecx
    878 	jbe  L_do_copy1_mmx
    879 
    880 	sub  edx,ecx
    881 	rep movsb
    882 	mov  esi, [esp+56]
    883 	mov  ecx, [esp+48]
    884 	cmp  edx,ecx
    885 	jbe  L_do_copy1_mmx
    886 
    887 	sub  edx,ecx
    888 	rep movsb
    889 	mov  esi,edi
    890 	sub  esi,ebx
    891 	jmp  L_do_copy1_mmx
    892 
    893 L_contiguous_in_window_mmx:
    894 
    895 	add  esi,eax
    896 	sub  esi,ecx
    897 
    898 
    899 	cmp  edx,ecx
    900 	jbe  L_do_copy1_mmx
    901 
    902 	sub  edx,ecx
    903 	rep movsb
    904 	mov  esi,edi
    905 	sub  esi,ebx
    906 
    907 L_do_copy1_mmx:
    908 
    909 
    910 	mov  ecx,edx
    911 	rep movsb
    912 
    913 	mov  esi, [esp+44]
    914 	mov  ebx, [esp+8]
    915 	jmp  L_while_test_mmx
    916 ; 1174 "inffast.S"
    917 L_invalid_distance_code:
    918 
    919 
    920 
    921 
    922 
    923 	mov  ecx, invalid_distance_code_msg
    924 	mov  edx,INFLATE_MODE_BAD
    925 	jmp  L_update_stream_state
    926 
    927 L_test_for_end_of_block:
    928 
    929 
    930 
    931 
    932 
    933 	test  al,32
    934 	jz  L_invalid_literal_length_code
    935 
    936 	mov  ecx,0
    937 	mov  edx,INFLATE_MODE_TYPE
    938 	jmp  L_update_stream_state
    939 
    940 L_invalid_literal_length_code:
    941 
    942 
    943 
    944 
    945 
    946 	mov  ecx, invalid_literal_length_code_msg
    947 	mov  edx,INFLATE_MODE_BAD
    948 	jmp  L_update_stream_state
    949 
    950 L_invalid_distance_too_far:
    951 
    952 
    953 
    954 	mov  esi, [esp+44]
    955 	mov  ecx, invalid_distance_too_far_msg
    956 	mov  edx,INFLATE_MODE_BAD
    957 	jmp  L_update_stream_state
    958 
    959 L_update_stream_state:
    960 
    961 	mov  eax, [esp+88]
    962 	test  ecx,ecx
    963 	jz  L_skip_msg
    964 	mov  [eax+24],ecx
    965 L_skip_msg:
    966 	mov  eax, [eax+28]
    967 	mov  [eax+mode_state],edx
    968 	jmp  L_break_loop
    969 
    970 ALIGN 4
    971 L_break_loop:
    972 ; 1243 "inffast.S"
    973 	cmp  dword ptr [inflate_fast_use_mmx],2
    974 	jne  L_update_next_in
    975 
    976 
    977 
    978 	mov  ebx,ebp
    979 
    980 L_update_next_in:
    981 ; 1266 "inffast.S"
    982 	mov  eax, [esp+88]
    983 	mov  ecx,ebx
    984 	mov  edx, [eax+28]
    985 	shr  ecx,3
    986 	sub  esi,ecx
    987 	shl  ecx,3
    988 	sub  ebx,ecx
    989 	mov  [eax+12],edi
    990 	mov  [edx+bits_state],ebx
    991 	mov  ecx,ebx
    992 
    993 	lea  ebx, [esp+28]
    994 	cmp  [esp+20],ebx
    995 	jne  L_buf_not_used
    996 
    997 	sub  esi,ebx
    998 	mov  ebx, [eax+0]
    999 	mov  [esp+20],ebx
   1000 	add  esi,ebx
   1001 	mov  ebx, [eax+4]
   1002 	sub  ebx,11
   1003 	add  [esp+20],ebx
   1004 
   1005 L_buf_not_used:
   1006 	mov  [eax+0],esi
   1007 
   1008 	mov  ebx,1
   1009 	shl  ebx,cl
   1010 	dec  ebx
   1011 
   1012 
   1013 
   1014 
   1015 
   1016 	cmp  dword ptr [inflate_fast_use_mmx],2
   1017 	jne  L_update_hold
   1018 
   1019 
   1020 
   1021 	psrlq mm0,mm1
   1022 	movd ebp,mm0
   1023 
   1024 	emms
   1025 
   1026 L_update_hold:
   1027 
   1028 
   1029 
   1030 	and  ebp,ebx
   1031 	mov  [edx+hold_state],ebp
   1032 
   1033 
   1034 
   1035 
   1036 	mov  ebx, [esp+20]
   1037 	cmp  ebx,esi
   1038 	jbe  L_last_is_smaller
   1039 
   1040 	sub  ebx,esi
   1041 	add  ebx,11
   1042 	mov  [eax+4],ebx
   1043 	jmp  L_fixup_out
   1044 L_last_is_smaller:
   1045 	sub  esi,ebx
   1046 	neg  esi
   1047 	add  esi,11
   1048 	mov  [eax+4],esi
   1049 
   1050 
   1051 
   1052 
   1053 L_fixup_out:
   1054 
   1055 	mov  ebx, [esp+16]
   1056 	cmp  ebx,edi
   1057 	jbe  L_end_is_smaller
   1058 
   1059 	sub  ebx,edi
   1060 	add  ebx,257
   1061 	mov  [eax+16],ebx
   1062 	jmp  L_done
   1063 L_end_is_smaller:
   1064 	sub  edi,ebx
   1065 	neg  edi
   1066 	add  edi,257
   1067 	mov  [eax+16],edi
   1068 
   1069 
   1070 
   1071 
   1072 
   1073 L_done:
   1074 	add  esp,64
   1075 	popfd
   1076 	pop  ebx
   1077 	pop  ebp
   1078 	pop  esi
   1079 	pop  edi
   1080 	ret
   1081 
   1082 _TEXT	ends
   1083 end
   1084