Home | History | Annotate | Download | only in inflate86
      1 /*
      2  * inffast.S is a hand tuned assembler version of:
      3  *
      4  * inffast.c -- fast decoding
      5  * Copyright (C) 1995-2003 Mark Adler
      6  * For conditions of distribution and use, see copyright notice in zlib.h
      7  *
      8  * Copyright (C) 2003 Chris Anderson <christop (at) charm.net>
      9  * Please use the copyright conditions above.
     10  *
     11  * This version (Jan-23-2003) of inflate_fast was coded and tested under
     12  * GNU/Linux on a pentium 3, using the gcc-3.2 compiler distribution.  On that
     13  * machine, I found that gzip style archives decompressed about 20% faster than
     14  * the gcc-3.2 -O3 -fomit-frame-pointer compiled version.  Your results will
     15  * depend on how large of a buffer is used for z_stream.next_in & next_out
     16  * (8K-32K worked best for my 256K cpu cache) and how much overhead there is in
     17  * stream processing I/O and crc32/addler32.  In my case, this routine used
     18  * 70% of the cpu time and crc32 used 20%.
     19  *
     20  * I am confident that this version will work in the general case, but I have
     21  * not tested a wide variety of datasets or a wide variety of platforms.
     22  *
     23  * Jan-24-2003 -- Added -DUSE_MMX define for slightly faster inflating.
     24  * It should be a runtime flag instead of compile time flag...
     25  *
     26  * Jan-26-2003 -- Added runtime check for MMX support with cpuid instruction.
     27  * With -DUSE_MMX, only MMX code is compiled.  With -DNO_MMX, only non-MMX code
     28  * is compiled.  Without either option, runtime detection is enabled.  Runtime
     29  * detection should work on all modern cpus and the recomended algorithm (flip
     30  * ID bit on eflags and then use the cpuid instruction) is used in many
     31  * multimedia applications.  Tested under win2k with gcc-2.95 and gas-2.12
     32  * distributed with cygwin3.  Compiling with gcc-2.95 -c inffast.S -o
     33  * inffast.obj generates a COFF object which can then be linked with MSVC++
     34  * compiled code.  Tested under FreeBSD 4.7 with gcc-2.95.
     35  *
     36  * Jan-28-2003 -- Tested Athlon XP... MMX mode is slower than no MMX (and
     37  * slower than compiler generated code).  Adjusted cpuid check to use the MMX
     38  * code only for Pentiums < P4 until I have more data on the P4.  Speed
     39  * improvment is only about 15% on the Athlon when compared with code generated
     40  * with MSVC++.  Not sure yet, but I think the P4 will also be slower using the
     41  * MMX mode because many of it's x86 ALU instructions execute in .5 cycles and
     42  * have less latency than MMX ops.  Added code to buffer the last 11 bytes of
     43  * the input stream since the MMX code grabs bits in chunks of 32, which
     44  * differs from the inffast.c algorithm.  I don't think there would have been
     45  * read overruns where a page boundary was crossed (a segfault), but there
     46  * could have been overruns when next_in ends on unaligned memory (unintialized
     47  * memory read).
     48  *
     49  * Mar-13-2003 -- P4 MMX is slightly slower than P4 NO_MMX.  I created a C
     50  * version of the non-MMX code so that it doesn't depend on zstrm and zstate
     51  * structure offsets which are hard coded in this file.  This was last tested
     52  * with zlib-1.2.0 which is currently in beta testing, newer versions of this
     53  * and inffas86.c can be found at http://www.eetbeetee.com/zlib/ and
     54  * http://www.charm.net/~christop/zlib/
     55  */
     56 
     57 
     58 /*
     59  * if you have underscore linking problems (_inflate_fast undefined), try
     60  * using -DGAS_COFF
     61  */
     62 #if ! defined( GAS_COFF ) && ! defined( GAS_ELF )
     63 
     64 #if defined( WIN32 ) || defined( __CYGWIN__ )
     65 #define GAS_COFF /* windows object format */
     66 #else
     67 #define GAS_ELF
     68 #endif
     69 
     70 #endif /* ! GAS_COFF && ! GAS_ELF */
     71 
     72 
     73 #if defined( GAS_COFF )
     74 
     75 /* coff externals have underscores */
     76 #define inflate_fast _inflate_fast
     77 #define inflate_fast_use_mmx _inflate_fast_use_mmx
     78 
     79 #endif /* GAS_COFF */
     80 
     81 
     82 .file "inffast.S"
     83 
     84 .globl inflate_fast
     85 
     86 .text
     87 .align 4,0
     88 .L_invalid_literal_length_code_msg:
     89 .string "invalid literal/length code"
     90 
     91 .align 4,0
     92 .L_invalid_distance_code_msg:
     93 .string "invalid distance code"
     94 
     95 .align 4,0
     96 .L_invalid_distance_too_far_msg:
     97 .string "invalid distance too far back"
     98 
     99 #if ! defined( NO_MMX )
    100 .align 4,0
    101 .L_mask: /* mask[N] = ( 1 << N ) - 1 */
    102 .long 0
    103 .long 1
    104 .long 3
    105 .long 7
    106 .long 15
    107 .long 31
    108 .long 63
    109 .long 127
    110 .long 255
    111 .long 511
    112 .long 1023
    113 .long 2047
    114 .long 4095
    115 .long 8191
    116 .long 16383
    117 .long 32767
    118 .long 65535
    119 .long 131071
    120 .long 262143
    121 .long 524287
    122 .long 1048575
    123 .long 2097151
    124 .long 4194303
    125 .long 8388607
    126 .long 16777215
    127 .long 33554431
    128 .long 67108863
    129 .long 134217727
    130 .long 268435455
    131 .long 536870911
    132 .long 1073741823
    133 .long 2147483647
    134 .long 4294967295
    135 #endif /* NO_MMX */
    136 
    137 .text
    138 
    139 /*
    140  * struct z_stream offsets, in zlib.h
    141  */
    142 #define next_in_strm   0   /* strm->next_in */
    143 #define avail_in_strm  4   /* strm->avail_in */
    144 #define next_out_strm  12  /* strm->next_out */
    145 #define avail_out_strm 16  /* strm->avail_out */
    146 #define msg_strm       24  /* strm->msg */
    147 #define state_strm     28  /* strm->state */
    148 
    149 /*
    150  * struct inflate_state offsets, in inflate.h
    151  */
    152 #define mode_state     0   /* state->mode */
    153 #define wsize_state    32  /* state->wsize */
    154 #define write_state    40  /* state->write */
    155 #define window_state   44  /* state->window */
    156 #define hold_state     48  /* state->hold */
    157 #define bits_state     52  /* state->bits */
    158 #define lencode_state  68  /* state->lencode */
    159 #define distcode_state 72  /* state->distcode */
    160 #define lenbits_state  76  /* state->lenbits */
    161 #define distbits_state 80  /* state->distbits */
    162 
    163 /*
    164  * inflate_fast's activation record
    165  */
    166 #define local_var_size 64 /* how much local space for vars */
    167 #define strm_sp        88 /* first arg: z_stream * (local_var_size + 24) */
    168 #define start_sp       92 /* second arg: unsigned int (local_var_size + 28) */
    169 
    170 /*
    171  * offsets for local vars on stack
    172  */
    173 #define out            60  /* unsigned char* */
    174 #define window         56  /* unsigned char* */
    175 #define wsize          52  /* unsigned int */
    176 #define write          48  /* unsigned int */
    177 #define in             44  /* unsigned char* */
    178 #define beg            40  /* unsigned char* */
    179 #define buf            28  /* char[ 12 ] */
    180 #define len            24  /* unsigned int */
    181 #define last           20  /* unsigned char* */
    182 #define end            16  /* unsigned char* */
    183 #define dcode          12  /* code* */
    184 #define lcode           8  /* code* */
    185 #define dmask           4  /* unsigned int */
    186 #define lmask           0  /* unsigned int */
    187 
    188 /*
    189  * typedef enum inflate_mode consts, in inflate.h
    190  */
    191 #define INFLATE_MODE_TYPE 11  /* state->mode flags enum-ed in inflate.h */
    192 #define INFLATE_MODE_BAD  26
    193 
    194 
    195 #if ! defined( USE_MMX ) && ! defined( NO_MMX )
    196 
    197 #define RUN_TIME_MMX
    198 
    199 #define CHECK_MMX    1
    200 #define DO_USE_MMX   2
    201 #define DONT_USE_MMX 3
    202 
    203 .globl inflate_fast_use_mmx
    204 
    205 .data
    206 
    207 .align 4,0
    208 inflate_fast_use_mmx: /* integer flag for run time control 1=check,2=mmx,3=no */
    209 .long CHECK_MMX
    210 
    211 #if defined( GAS_ELF )
    212 /* elf info */
    213 .type   inflate_fast_use_mmx,@object
    214 .size   inflate_fast_use_mmx,4
    215 #endif
    216 
    217 #endif /* RUN_TIME_MMX */
    218 
    219 #if defined( GAS_COFF )
    220 /* coff info: scl 2 = extern, type 32 = function */
    221 .def inflate_fast; .scl 2; .type 32; .endef
    222 #endif
    223 
    224 .text
    225 
    226 .align 32,0x90
    227 inflate_fast:
    228         pushl   %edi
    229         pushl   %esi
    230         pushl   %ebp
    231         pushl   %ebx
    232         pushf   /* save eflags (strm_sp, state_sp assumes this is 32 bits) */
    233         subl    $local_var_size, %esp
    234         cld
    235 
    236 #define strm_r  %esi
    237 #define state_r %edi
    238 
    239         movl    strm_sp(%esp), strm_r
    240         movl    state_strm(strm_r), state_r
    241 
    242         /* in = strm->next_in;
    243          * out = strm->next_out;
    244          * last = in + strm->avail_in - 11;
    245          * beg = out - (start - strm->avail_out);
    246          * end = out + (strm->avail_out - 257);
    247          */
    248         movl    avail_in_strm(strm_r), %edx
    249         movl    next_in_strm(strm_r), %eax
    250 
    251         addl    %eax, %edx      /* avail_in += next_in */
    252         subl    $11, %edx       /* avail_in -= 11 */
    253 
    254         movl    %eax, in(%esp)
    255         movl    %edx, last(%esp)
    256 
    257         movl    start_sp(%esp), %ebp
    258         movl    avail_out_strm(strm_r), %ecx
    259         movl    next_out_strm(strm_r), %ebx
    260 
    261         subl    %ecx, %ebp      /* start -= avail_out */
    262         negl    %ebp            /* start = -start */
    263         addl    %ebx, %ebp      /* start += next_out */
    264 
    265         subl    $257, %ecx      /* avail_out -= 257 */
    266         addl    %ebx, %ecx      /* avail_out += out */
    267 
    268         movl    %ebx, out(%esp)
    269         movl    %ebp, beg(%esp)
    270         movl    %ecx, end(%esp)
    271 
    272         /* wsize = state->wsize;
    273          * write = state->write;
    274          * window = state->window;
    275          * hold = state->hold;
    276          * bits = state->bits;
    277          * lcode = state->lencode;
    278          * dcode = state->distcode;
    279          * lmask = ( 1 << state->lenbits ) - 1;
    280          * dmask = ( 1 << state->distbits ) - 1;
    281          */
    282 
    283         movl    lencode_state(state_r), %eax
    284         movl    distcode_state(state_r), %ecx
    285 
    286         movl    %eax, lcode(%esp)
    287         movl    %ecx, dcode(%esp)
    288 
    289         movl    $1, %eax
    290         movl    lenbits_state(state_r), %ecx
    291         shll    %cl, %eax
    292         decl    %eax
    293         movl    %eax, lmask(%esp)
    294 
    295         movl    $1, %eax
    296         movl    distbits_state(state_r), %ecx
    297         shll    %cl, %eax
    298         decl    %eax
    299         movl    %eax, dmask(%esp)
    300 
    301         movl    wsize_state(state_r), %eax
    302         movl    write_state(state_r), %ecx
    303         movl    window_state(state_r), %edx
    304 
    305         movl    %eax, wsize(%esp)
    306         movl    %ecx, write(%esp)
    307         movl    %edx, window(%esp)
    308 
    309         movl    hold_state(state_r), %ebp
    310         movl    bits_state(state_r), %ebx
    311 
    312 #undef strm_r
    313 #undef state_r
    314 
    315 #define in_r       %esi
    316 #define from_r     %esi
    317 #define out_r      %edi
    318 
    319         movl    in(%esp), in_r
    320         movl    last(%esp), %ecx
    321         cmpl    in_r, %ecx
    322         ja      .L_align_long           /* if in < last */
    323 
    324         addl    $11, %ecx               /* ecx = &in[ avail_in ] */
    325         subl    in_r, %ecx              /* ecx = avail_in */
    326         movl    $12, %eax
    327         subl    %ecx, %eax              /* eax = 12 - avail_in */
    328         leal    buf(%esp), %edi
    329         rep     movsb                   /* memcpy( buf, in, avail_in ) */
    330         movl    %eax, %ecx
    331         xorl    %eax, %eax
    332         rep     stosb         /* memset( &buf[ avail_in ], 0, 12 - avail_in ) */
    333         leal    buf(%esp), in_r         /* in = buf */
    334         movl    in_r, last(%esp)        /* last = in, do just one iteration */
    335         jmp     .L_is_aligned
    336 
    337         /* align in_r on long boundary */
    338 .L_align_long:
    339         testl   $3, in_r
    340         jz      .L_is_aligned
    341         xorl    %eax, %eax
    342         movb    (in_r), %al
    343         incl    in_r
    344         movl    %ebx, %ecx
    345         addl    $8, %ebx
    346         shll    %cl, %eax
    347         orl     %eax, %ebp
    348         jmp     .L_align_long
    349 
    350 .L_is_aligned:
    351         movl    out(%esp), out_r
    352 
    353 #if defined( NO_MMX )
    354         jmp     .L_do_loop
    355 #endif
    356 
    357 #if defined( USE_MMX )
    358         jmp     .L_init_mmx
    359 #endif
    360 
    361 /*** Runtime MMX check ***/
    362 
    363 #if defined( RUN_TIME_MMX )
    364 .L_check_mmx:
    365         cmpl    $DO_USE_MMX, inflate_fast_use_mmx
    366         je      .L_init_mmx
    367         ja      .L_do_loop /* > 2 */
    368 
    369         pushl   %eax
    370         pushl   %ebx
    371         pushl   %ecx
    372         pushl   %edx
    373         pushf
    374         movl    (%esp), %eax      /* copy eflags to eax */
    375         xorl    $0x200000, (%esp) /* try toggling ID bit of eflags (bit 21)
    376                                    * to see if cpu supports cpuid...
    377                                    * ID bit method not supported by NexGen but
    378                                    * bios may load a cpuid instruction and
    379                                    * cpuid may be disabled on Cyrix 5-6x86 */
    380         popf
    381         pushf
    382         popl    %edx              /* copy new eflags to edx */
    383         xorl    %eax, %edx        /* test if ID bit is flipped */
    384         jz      .L_dont_use_mmx   /* not flipped if zero */
    385         xorl    %eax, %eax
    386         cpuid
    387         cmpl    $0x756e6547, %ebx /* check for GenuineIntel in ebx,ecx,edx */
    388         jne     .L_dont_use_mmx
    389         cmpl    $0x6c65746e, %ecx
    390         jne     .L_dont_use_mmx
    391         cmpl    $0x49656e69, %edx
    392         jne     .L_dont_use_mmx
    393         movl    $1, %eax
    394         cpuid                     /* get cpu features */
    395         shrl    $8, %eax
    396         andl    $15, %eax
    397         cmpl    $6, %eax          /* check for Pentium family, is 0xf for P4 */
    398         jne     .L_dont_use_mmx
    399         testl   $0x800000, %edx   /* test if MMX feature is set (bit 23) */
    400         jnz     .L_use_mmx
    401         jmp     .L_dont_use_mmx
    402 .L_use_mmx:
    403         movl    $DO_USE_MMX, inflate_fast_use_mmx
    404         jmp     .L_check_mmx_pop
    405 .L_dont_use_mmx:
    406         movl    $DONT_USE_MMX, inflate_fast_use_mmx
    407 .L_check_mmx_pop:
    408         popl    %edx
    409         popl    %ecx
    410         popl    %ebx
    411         popl    %eax
    412         jmp     .L_check_mmx
    413 #endif
    414 
    415 
    416 /*** Non-MMX code ***/
    417 
    418 #if defined ( NO_MMX ) || defined( RUN_TIME_MMX )
    419 
    420 #define hold_r     %ebp
    421 #define bits_r     %bl
    422 #define bitslong_r %ebx
    423 
    424 .align 32,0x90
    425 .L_while_test:
    426         /* while (in < last && out < end)
    427          */
    428         cmpl    out_r, end(%esp)
    429         jbe     .L_break_loop           /* if (out >= end) */
    430 
    431         cmpl    in_r, last(%esp)
    432         jbe     .L_break_loop
    433 
    434 .L_do_loop:
    435         /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
    436          *
    437          * do {
    438          *   if (bits < 15) {
    439          *     hold |= *((unsigned short *)in)++ << bits;
    440          *     bits += 16
    441          *   }
    442          *   this = lcode[hold & lmask]
    443          */
    444         cmpb    $15, bits_r
    445         ja      .L_get_length_code      /* if (15 < bits) */
    446 
    447         xorl    %eax, %eax
    448         lodsw                           /* al = *(ushort *)in++ */
    449         movb    bits_r, %cl             /* cl = bits, needs it for shifting */
    450         addb    $16, bits_r             /* bits += 16 */
    451         shll    %cl, %eax
    452         orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
    453 
    454 .L_get_length_code:
    455         movl    lmask(%esp), %edx       /* edx = lmask */
    456         movl    lcode(%esp), %ecx       /* ecx = lcode */
    457         andl    hold_r, %edx            /* edx &= hold */
    458         movl    (%ecx,%edx,4), %eax     /* eax = lcode[hold & lmask] */
    459 
    460 .L_dolen:
    461         /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out
    462          *
    463          * dolen:
    464          *    bits -= this.bits;
    465          *    hold >>= this.bits
    466          */
    467         movb    %ah, %cl                /* cl = this.bits */
    468         subb    %ah, bits_r             /* bits -= this.bits */
    469         shrl    %cl, hold_r             /* hold >>= this.bits */
    470 
    471         /* check if op is a literal
    472          * if (op == 0) {
    473          *    PUP(out) = this.val;
    474          *  }
    475          */
    476         testb   %al, %al
    477         jnz     .L_test_for_length_base /* if (op != 0) 45.7% */
    478 
    479         shrl    $16, %eax               /* output this.val char */
    480         stosb
    481         jmp     .L_while_test
    482 
    483 .L_test_for_length_base:
    484         /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = len
    485          *
    486          * else if (op & 16) {
    487          *   len = this.val
    488          *   op &= 15
    489          *   if (op) {
    490          *     if (op > bits) {
    491          *       hold |= *((unsigned short *)in)++ << bits;
    492          *       bits += 16
    493          *     }
    494          *     len += hold & mask[op];
    495          *     bits -= op;
    496          *     hold >>= op;
    497          *   }
    498          */
    499 #define len_r %edx
    500         movl    %eax, len_r             /* len = this */
    501         shrl    $16, len_r              /* len = this.val */
    502         movb    %al, %cl
    503 
    504         testb   $16, %al
    505         jz      .L_test_for_second_level_length /* if ((op & 16) == 0) 8% */
    506         andb    $15, %cl                /* op &= 15 */
    507         jz      .L_save_len             /* if (!op) */
    508         cmpb    %cl, bits_r
    509         jae     .L_add_bits_to_len      /* if (op <= bits) */
    510 
    511         movb    %cl, %ch                /* stash op in ch, freeing cl */
    512         xorl    %eax, %eax
    513         lodsw                           /* al = *(ushort *)in++ */
    514         movb    bits_r, %cl             /* cl = bits, needs it for shifting */
    515         addb    $16, bits_r             /* bits += 16 */
    516         shll    %cl, %eax
    517         orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
    518         movb    %ch, %cl                /* move op back to ecx */
    519 
    520 .L_add_bits_to_len:
    521         movl    $1, %eax
    522         shll    %cl, %eax
    523         decl    %eax
    524         subb    %cl, bits_r
    525         andl    hold_r, %eax            /* eax &= hold */
    526         shrl    %cl, hold_r
    527         addl    %eax, len_r             /* len += hold & mask[op] */
    528 
    529 .L_save_len:
    530         movl    len_r, len(%esp)        /* save len */
    531 #undef  len_r
    532 
    533 .L_decode_distance:
    534         /* regs: %esi = in, %ebp = hold, %bl = bits, %edi = out, %edx = dist
    535          *
    536          *   if (bits < 15) {
    537          *     hold |= *((unsigned short *)in)++ << bits;
    538          *     bits += 16
    539          *   }
    540          *   this = dcode[hold & dmask];
    541          * dodist:
    542          *   bits -= this.bits;
    543          *   hold >>= this.bits;
    544          *   op = this.op;
    545          */
    546 
    547         cmpb    $15, bits_r
    548         ja      .L_get_distance_code    /* if (15 < bits) */
    549 
    550         xorl    %eax, %eax
    551         lodsw                           /* al = *(ushort *)in++ */
    552         movb    bits_r, %cl             /* cl = bits, needs it for shifting */
    553         addb    $16, bits_r             /* bits += 16 */
    554         shll    %cl, %eax
    555         orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
    556 
    557 .L_get_distance_code:
    558         movl    dmask(%esp), %edx       /* edx = dmask */
    559         movl    dcode(%esp), %ecx       /* ecx = dcode */
    560         andl    hold_r, %edx            /* edx &= hold */
    561         movl    (%ecx,%edx,4), %eax     /* eax = dcode[hold & dmask] */
    562 
    563 #define dist_r %edx
    564 .L_dodist:
    565         movl    %eax, dist_r            /* dist = this */
    566         shrl    $16, dist_r             /* dist = this.val */
    567         movb    %ah, %cl
    568         subb    %ah, bits_r             /* bits -= this.bits */
    569         shrl    %cl, hold_r             /* hold >>= this.bits */
    570 
    571         /* if (op & 16) {
    572          *   dist = this.val
    573          *   op &= 15
    574          *   if (op > bits) {
    575          *     hold |= *((unsigned short *)in)++ << bits;
    576          *     bits += 16
    577          *   }
    578          *   dist += hold & mask[op];
    579          *   bits -= op;
    580          *   hold >>= op;
    581          */
    582         movb    %al, %cl                /* cl = this.op */
    583 
    584         testb   $16, %al                /* if ((op & 16) == 0) */
    585         jz      .L_test_for_second_level_dist
    586         andb    $15, %cl                /* op &= 15 */
    587         jz      .L_check_dist_one
    588         cmpb    %cl, bits_r
    589         jae     .L_add_bits_to_dist     /* if (op <= bits) 97.6% */
    590 
    591         movb    %cl, %ch                /* stash op in ch, freeing cl */
    592         xorl    %eax, %eax
    593         lodsw                           /* al = *(ushort *)in++ */
    594         movb    bits_r, %cl             /* cl = bits, needs it for shifting */
    595         addb    $16, bits_r             /* bits += 16 */
    596         shll    %cl, %eax
    597         orl     %eax, hold_r            /* hold |= *((ushort *)in)++ << bits */
    598         movb    %ch, %cl                /* move op back to ecx */
    599 
    600 .L_add_bits_to_dist:
    601         movl    $1, %eax
    602         shll    %cl, %eax
    603         decl    %eax                    /* (1 << op) - 1 */
    604         subb    %cl, bits_r
    605         andl    hold_r, %eax            /* eax &= hold */
    606         shrl    %cl, hold_r
    607         addl    %eax, dist_r            /* dist += hold & ((1 << op) - 1) */
    608         jmp     .L_check_window
    609 
    610 .L_check_window:
    611         /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
    612          *       %ecx = nbytes
    613          *
    614          * nbytes = out - beg;
    615          * if (dist <= nbytes) {
    616          *   from = out - dist;
    617          *   do {
    618          *     PUP(out) = PUP(from);
    619          *   } while (--len > 0) {
    620          * }
    621          */
    622 
    623         movl    in_r, in(%esp)          /* save in so from can use it's reg */
    624         movl    out_r, %eax
    625         subl    beg(%esp), %eax         /* nbytes = out - beg */
    626 
    627         cmpl    dist_r, %eax
    628         jb      .L_clip_window          /* if (dist > nbytes) 4.2% */
    629 
    630         movl    len(%esp), %ecx
    631         movl    out_r, from_r
    632         subl    dist_r, from_r          /* from = out - dist */
    633 
    634         subl    $3, %ecx
    635         movb    (from_r), %al
    636         movb    %al, (out_r)
    637         movb    1(from_r), %al
    638         movb    2(from_r), %dl
    639         addl    $3, from_r
    640         movb    %al, 1(out_r)
    641         movb    %dl, 2(out_r)
    642         addl    $3, out_r
    643         rep     movsb
    644 
    645         movl    in(%esp), in_r          /* move in back to %esi, toss from */
    646         jmp     .L_while_test
    647 
    648 .align 16,0x90
    649 .L_check_dist_one:
    650         cmpl    $1, dist_r
    651         jne     .L_check_window
    652         cmpl    out_r, beg(%esp)
    653         je      .L_check_window
    654 
    655         decl    out_r
    656         movl    len(%esp), %ecx
    657         movb    (out_r), %al
    658         subl    $3, %ecx
    659 
    660         movb    %al, 1(out_r)
    661         movb    %al, 2(out_r)
    662         movb    %al, 3(out_r)
    663         addl    $4, out_r
    664         rep     stosb
    665 
    666         jmp     .L_while_test
    667 
    668 .align 16,0x90
    669 .L_test_for_second_level_length:
    670         /* else if ((op & 64) == 0) {
    671          *   this = lcode[this.val + (hold & mask[op])];
    672          * }
    673          */
    674         testb   $64, %al
    675         jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
    676 
    677         movl    $1, %eax
    678         shll    %cl, %eax
    679         decl    %eax
    680         andl    hold_r, %eax            /* eax &= hold */
    681         addl    %edx, %eax              /* eax += this.val */
    682         movl    lcode(%esp), %edx       /* edx = lcode */
    683         movl    (%edx,%eax,4), %eax     /* eax = lcode[val + (hold&mask[op])] */
    684         jmp     .L_dolen
    685 
    686 .align 16,0x90
    687 .L_test_for_second_level_dist:
    688         /* else if ((op & 64) == 0) {
    689          *   this = dcode[this.val + (hold & mask[op])];
    690          * }
    691          */
    692         testb   $64, %al
    693         jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
    694 
    695         movl    $1, %eax
    696         shll    %cl, %eax
    697         decl    %eax
    698         andl    hold_r, %eax            /* eax &= hold */
    699         addl    %edx, %eax              /* eax += this.val */
    700         movl    dcode(%esp), %edx       /* edx = dcode */
    701         movl    (%edx,%eax,4), %eax     /* eax = dcode[val + (hold&mask[op])] */
    702         jmp     .L_dodist
    703 
    704 .align 16,0x90
    705 .L_clip_window:
    706         /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
    707          *       %ecx = nbytes
    708          *
    709          * else {
    710          *   if (dist > wsize) {
    711          *     invalid distance
    712          *   }
    713          *   from = window;
    714          *   nbytes = dist - nbytes;
    715          *   if (write == 0) {
    716          *     from += wsize - nbytes;
    717          */
    718 #define nbytes_r %ecx
    719         movl    %eax, nbytes_r
    720         movl    wsize(%esp), %eax       /* prepare for dist compare */
    721         negl    nbytes_r                /* nbytes = -nbytes */
    722         movl    window(%esp), from_r    /* from = window */
    723 
    724         cmpl    dist_r, %eax
    725         jb      .L_invalid_distance_too_far /* if (dist > wsize) */
    726 
    727         addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
    728         cmpl    $0, write(%esp)
    729         jne     .L_wrap_around_window   /* if (write != 0) */
    730 
    731         subl    nbytes_r, %eax
    732         addl    %eax, from_r            /* from += wsize - nbytes */
    733 
    734         /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
    735          *       %ecx = nbytes, %eax = len
    736          *
    737          *     if (nbytes < len) {
    738          *       len -= nbytes;
    739          *       do {
    740          *         PUP(out) = PUP(from);
    741          *       } while (--nbytes);
    742          *       from = out - dist;
    743          *     }
    744          *   }
    745          */
    746 #define len_r %eax
    747         movl    len(%esp), len_r
    748         cmpl    nbytes_r, len_r
    749         jbe     .L_do_copy1             /* if (nbytes >= len) */
    750 
    751         subl    nbytes_r, len_r         /* len -= nbytes */
    752         rep     movsb
    753         movl    out_r, from_r
    754         subl    dist_r, from_r          /* from = out - dist */
    755         jmp     .L_do_copy1
    756 
    757         cmpl    nbytes_r, len_r
    758         jbe     .L_do_copy1             /* if (nbytes >= len) */
    759 
    760         subl    nbytes_r, len_r         /* len -= nbytes */
    761         rep     movsb
    762         movl    out_r, from_r
    763         subl    dist_r, from_r          /* from = out - dist */
    764         jmp     .L_do_copy1
    765 
    766 .L_wrap_around_window:
    767         /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
    768          *       %ecx = nbytes, %eax = write, %eax = len
    769          *
    770          *   else if (write < nbytes) {
    771          *     from += wsize + write - nbytes;
    772          *     nbytes -= write;
    773          *     if (nbytes < len) {
    774          *       len -= nbytes;
    775          *       do {
    776          *         PUP(out) = PUP(from);
    777          *       } while (--nbytes);
    778          *       from = window;
    779          *       nbytes = write;
    780          *       if (nbytes < len) {
    781          *         len -= nbytes;
    782          *         do {
    783          *           PUP(out) = PUP(from);
    784          *         } while(--nbytes);
    785          *         from = out - dist;
    786          *       }
    787          *     }
    788          *   }
    789          */
    790 #define write_r %eax
    791         movl    write(%esp), write_r
    792         cmpl    write_r, nbytes_r
    793         jbe     .L_contiguous_in_window /* if (write >= nbytes) */
    794 
    795         addl    wsize(%esp), from_r
    796         addl    write_r, from_r
    797         subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
    798         subl    write_r, nbytes_r       /* nbytes -= write */
    799 #undef write_r
    800 
    801         movl    len(%esp), len_r
    802         cmpl    nbytes_r, len_r
    803         jbe     .L_do_copy1             /* if (nbytes >= len) */
    804 
    805         subl    nbytes_r, len_r         /* len -= nbytes */
    806         rep     movsb
    807         movl    window(%esp), from_r    /* from = window */
    808         movl    write(%esp), nbytes_r   /* nbytes = write */
    809         cmpl    nbytes_r, len_r
    810         jbe     .L_do_copy1             /* if (nbytes >= len) */
    811 
    812         subl    nbytes_r, len_r         /* len -= nbytes */
    813         rep     movsb
    814         movl    out_r, from_r
    815         subl    dist_r, from_r          /* from = out - dist */
    816         jmp     .L_do_copy1
    817 
    818 .L_contiguous_in_window:
    819         /* regs: %esi = from, %ebp = hold, %bl = bits, %edi = out, %edx = dist
    820          *       %ecx = nbytes, %eax = write, %eax = len
    821          *
    822          *   else {
    823          *     from += write - nbytes;
    824          *     if (nbytes < len) {
    825          *       len -= nbytes;
    826          *       do {
    827          *         PUP(out) = PUP(from);
    828          *       } while (--nbytes);
    829          *       from = out - dist;
    830          *     }
    831          *   }
    832          */
    833 #define write_r %eax
    834         addl    write_r, from_r
    835         subl    nbytes_r, from_r        /* from += write - nbytes */
    836 #undef write_r
    837 
    838         movl    len(%esp), len_r
    839         cmpl    nbytes_r, len_r
    840         jbe     .L_do_copy1             /* if (nbytes >= len) */
    841 
    842         subl    nbytes_r, len_r         /* len -= nbytes */
    843         rep     movsb
    844         movl    out_r, from_r
    845         subl    dist_r, from_r          /* from = out - dist */
    846 
    847 .L_do_copy1:
    848         /* regs: %esi = from, %esi = in, %ebp = hold, %bl = bits, %edi = out
    849          *       %eax = len
    850          *
    851          *     while (len > 0) {
    852          *       PUP(out) = PUP(from);
    853          *       len--;
    854          *     }
    855          *   }
    856          * } while (in < last && out < end);
    857          */
    858 #undef nbytes_r
    859 #define in_r %esi
    860         movl    len_r, %ecx
    861         rep     movsb
    862 
    863         movl    in(%esp), in_r          /* move in back to %esi, toss from */
    864         jmp     .L_while_test
    865 
    866 #undef len_r
    867 #undef dist_r
    868 
    869 #endif /* NO_MMX || RUN_TIME_MMX */
    870 
    871 
    872 /*** MMX code ***/
    873 
    874 #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
    875 
    876 .align 32,0x90
    877 .L_init_mmx:
    878         emms
    879 
    880 #undef  bits_r
    881 #undef  bitslong_r
    882 #define bitslong_r %ebp
    883 #define hold_mm    %mm0
    884         movd    %ebp, hold_mm
    885         movl    %ebx, bitslong_r
    886 
    887 #define used_mm   %mm1
    888 #define dmask2_mm %mm2
    889 #define lmask2_mm %mm3
    890 #define lmask_mm  %mm4
    891 #define dmask_mm  %mm5
    892 #define tmp_mm    %mm6
    893 
    894         movd    lmask(%esp), lmask_mm
    895         movq    lmask_mm, lmask2_mm
    896         movd    dmask(%esp), dmask_mm
    897         movq    dmask_mm, dmask2_mm
    898         pxor    used_mm, used_mm
    899         movl    lcode(%esp), %ebx       /* ebx = lcode */
    900         jmp     .L_do_loop_mmx
    901 
    902 .align 32,0x90
    903 .L_while_test_mmx:
    904         /* while (in < last && out < end)
    905          */
    906         cmpl    out_r, end(%esp)
    907         jbe     .L_break_loop           /* if (out >= end) */
    908 
    909         cmpl    in_r, last(%esp)
    910         jbe     .L_break_loop
    911 
    912 .L_do_loop_mmx:
    913         psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
    914 
    915         cmpl    $32, bitslong_r
    916         ja      .L_get_length_code_mmx  /* if (32 < bits) */
    917 
    918         movd    bitslong_r, tmp_mm
    919         movd    (in_r), %mm7
    920         addl    $4, in_r
    921         psllq   tmp_mm, %mm7
    922         addl    $32, bitslong_r
    923         por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
    924 
    925 .L_get_length_code_mmx:
    926         pand    hold_mm, lmask_mm
    927         movd    lmask_mm, %eax
    928         movq    lmask2_mm, lmask_mm
    929         movl    (%ebx,%eax,4), %eax     /* eax = lcode[hold & lmask] */
    930 
    931 .L_dolen_mmx:
    932         movzbl  %ah, %ecx               /* ecx = this.bits */
    933         movd    %ecx, used_mm
    934         subl    %ecx, bitslong_r        /* bits -= this.bits */
    935 
    936         testb   %al, %al
    937         jnz     .L_test_for_length_base_mmx /* if (op != 0) 45.7% */
    938 
    939         shrl    $16, %eax               /* output this.val char */
    940         stosb
    941         jmp     .L_while_test_mmx
    942 
    943 .L_test_for_length_base_mmx:
    944 #define len_r  %edx
    945         movl    %eax, len_r             /* len = this */
    946         shrl    $16, len_r              /* len = this.val */
    947 
    948         testb   $16, %al
    949         jz      .L_test_for_second_level_length_mmx /* if ((op & 16) == 0) 8% */
    950         andl    $15, %eax               /* op &= 15 */
    951         jz      .L_decode_distance_mmx  /* if (!op) */
    952 
    953         psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
    954         movd    %eax, used_mm
    955         movd    hold_mm, %ecx
    956         subl    %eax, bitslong_r
    957         andl    .L_mask(,%eax,4), %ecx
    958         addl    %ecx, len_r             /* len += hold & mask[op] */
    959 
    960 .L_decode_distance_mmx:
    961         psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
    962 
    963         cmpl    $32, bitslong_r
    964         ja      .L_get_dist_code_mmx    /* if (32 < bits) */
    965 
    966         movd    bitslong_r, tmp_mm
    967         movd    (in_r), %mm7
    968         addl    $4, in_r
    969         psllq   tmp_mm, %mm7
    970         addl    $32, bitslong_r
    971         por     %mm7, hold_mm           /* hold_mm |= *((uint *)in)++ << bits */
    972 
    973 .L_get_dist_code_mmx:
    974         movl    dcode(%esp), %ebx       /* ebx = dcode */
    975         pand    hold_mm, dmask_mm
    976         movd    dmask_mm, %eax
    977         movq    dmask2_mm, dmask_mm
    978         movl    (%ebx,%eax,4), %eax     /* eax = dcode[hold & lmask] */
    979 
    980 .L_dodist_mmx:
    981 #define dist_r %ebx
    982         movzbl  %ah, %ecx               /* ecx = this.bits */
    983         movl    %eax, dist_r
    984         shrl    $16, dist_r             /* dist  = this.val */
    985         subl    %ecx, bitslong_r        /* bits -= this.bits */
    986         movd    %ecx, used_mm
    987 
    988         testb   $16, %al                /* if ((op & 16) == 0) */
    989         jz      .L_test_for_second_level_dist_mmx
    990         andl    $15, %eax               /* op &= 15 */
    991         jz      .L_check_dist_one_mmx
    992 
    993 .L_add_bits_to_dist_mmx:
    994         psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
    995         movd    %eax, used_mm           /* save bit length of current op */
    996         movd    hold_mm, %ecx           /* get the next bits on input stream */
    997         subl    %eax, bitslong_r        /* bits -= op bits */
    998         andl    .L_mask(,%eax,4), %ecx  /* ecx   = hold & mask[op] */
    999         addl    %ecx, dist_r            /* dist += hold & mask[op] */
   1000 
   1001 .L_check_window_mmx:
   1002         movl    in_r, in(%esp)          /* save in so from can use it's reg */
   1003         movl    out_r, %eax
   1004         subl    beg(%esp), %eax         /* nbytes = out - beg */
   1005 
   1006         cmpl    dist_r, %eax
   1007         jb      .L_clip_window_mmx      /* if (dist > nbytes) 4.2% */
   1008 
   1009         movl    len_r, %ecx
   1010         movl    out_r, from_r
   1011         subl    dist_r, from_r          /* from = out - dist */
   1012 
   1013         subl    $3, %ecx
   1014         movb    (from_r), %al
   1015         movb    %al, (out_r)
   1016         movb    1(from_r), %al
   1017         movb    2(from_r), %dl
   1018         addl    $3, from_r
   1019         movb    %al, 1(out_r)
   1020         movb    %dl, 2(out_r)
   1021         addl    $3, out_r
   1022         rep     movsb
   1023 
   1024         movl    in(%esp), in_r          /* move in back to %esi, toss from */
   1025         movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
   1026         jmp     .L_while_test_mmx
   1027 
   1028 .align 16,0x90
   1029 .L_check_dist_one_mmx:
   1030         cmpl    $1, dist_r
   1031         jne     .L_check_window_mmx
   1032         cmpl    out_r, beg(%esp)
   1033         je      .L_check_window_mmx
   1034 
   1035         decl    out_r
   1036         movl    len_r, %ecx
   1037         movb    (out_r), %al
   1038         subl    $3, %ecx
   1039 
   1040         movb    %al, 1(out_r)
   1041         movb    %al, 2(out_r)
   1042         movb    %al, 3(out_r)
   1043         addl    $4, out_r
   1044         rep     stosb
   1045 
   1046         movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
   1047         jmp     .L_while_test_mmx
   1048 
   1049 .align 16,0x90
   1050 .L_test_for_second_level_length_mmx:
   1051         testb   $64, %al
   1052         jnz     .L_test_for_end_of_block  /* if ((op & 64) != 0) */
   1053 
   1054         andl    $15, %eax
   1055         psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
   1056         movd    hold_mm, %ecx
   1057         andl    .L_mask(,%eax,4), %ecx
   1058         addl    len_r, %ecx
   1059         movl    (%ebx,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
   1060         jmp     .L_dolen_mmx
   1061 
   1062 .align 16,0x90
   1063 .L_test_for_second_level_dist_mmx:
   1064         testb   $64, %al
   1065         jnz     .L_invalid_distance_code  /* if ((op & 64) != 0) */
   1066 
   1067         andl    $15, %eax
   1068         psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
   1069         movd    hold_mm, %ecx
   1070         andl    .L_mask(,%eax,4), %ecx
   1071         movl    dcode(%esp), %eax       /* ecx = dcode */
   1072         addl    dist_r, %ecx
   1073         movl    (%eax,%ecx,4), %eax     /* eax = lcode[hold & lmask] */
   1074         jmp     .L_dodist_mmx
   1075 
   1076 .align 16,0x90
   1077 .L_clip_window_mmx:
   1078 #define nbytes_r %ecx
   1079         movl    %eax, nbytes_r
   1080         movl    wsize(%esp), %eax       /* prepare for dist compare */
   1081         negl    nbytes_r                /* nbytes = -nbytes */
   1082         movl    window(%esp), from_r    /* from = window */
   1083 
   1084         cmpl    dist_r, %eax
   1085         jb      .L_invalid_distance_too_far /* if (dist > wsize) */
   1086 
   1087         addl    dist_r, nbytes_r        /* nbytes = dist - nbytes */
   1088         cmpl    $0, write(%esp)
   1089         jne     .L_wrap_around_window_mmx /* if (write != 0) */
   1090 
   1091         subl    nbytes_r, %eax
   1092         addl    %eax, from_r            /* from += wsize - nbytes */
   1093 
   1094         cmpl    nbytes_r, len_r
   1095         jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
   1096 
   1097         subl    nbytes_r, len_r         /* len -= nbytes */
   1098         rep     movsb
   1099         movl    out_r, from_r
   1100         subl    dist_r, from_r          /* from = out - dist */
   1101         jmp     .L_do_copy1_mmx
   1102 
   1103         cmpl    nbytes_r, len_r
   1104         jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
   1105 
   1106         subl    nbytes_r, len_r         /* len -= nbytes */
   1107         rep     movsb
   1108         movl    out_r, from_r
   1109         subl    dist_r, from_r          /* from = out - dist */
   1110         jmp     .L_do_copy1_mmx
   1111 
   1112 .L_wrap_around_window_mmx:
   1113 #define write_r %eax
   1114         movl    write(%esp), write_r
   1115         cmpl    write_r, nbytes_r
   1116         jbe     .L_contiguous_in_window_mmx /* if (write >= nbytes) */
   1117 
   1118         addl    wsize(%esp), from_r
   1119         addl    write_r, from_r
   1120         subl    nbytes_r, from_r        /* from += wsize + write - nbytes */
   1121         subl    write_r, nbytes_r       /* nbytes -= write */
   1122 #undef write_r
   1123 
   1124         cmpl    nbytes_r, len_r
   1125         jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
   1126 
   1127         subl    nbytes_r, len_r         /* len -= nbytes */
   1128         rep     movsb
   1129         movl    window(%esp), from_r    /* from = window */
   1130         movl    write(%esp), nbytes_r   /* nbytes = write */
   1131         cmpl    nbytes_r, len_r
   1132         jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
   1133 
   1134         subl    nbytes_r, len_r         /* len -= nbytes */
   1135         rep     movsb
   1136         movl    out_r, from_r
   1137         subl    dist_r, from_r          /* from = out - dist */
   1138         jmp     .L_do_copy1_mmx
   1139 
   1140 .L_contiguous_in_window_mmx:
   1141 #define write_r %eax
   1142         addl    write_r, from_r
   1143         subl    nbytes_r, from_r        /* from += write - nbytes */
   1144 #undef write_r
   1145 
   1146         cmpl    nbytes_r, len_r
   1147         jbe     .L_do_copy1_mmx         /* if (nbytes >= len) */
   1148 
   1149         subl    nbytes_r, len_r         /* len -= nbytes */
   1150         rep     movsb
   1151         movl    out_r, from_r
   1152         subl    dist_r, from_r          /* from = out - dist */
   1153 
   1154 .L_do_copy1_mmx:
   1155 #undef nbytes_r
   1156 #define in_r %esi
   1157         movl    len_r, %ecx
   1158         rep     movsb
   1159 
   1160         movl    in(%esp), in_r          /* move in back to %esi, toss from */
   1161         movl    lcode(%esp), %ebx       /* move lcode back to %ebx, toss dist */
   1162         jmp     .L_while_test_mmx
   1163 
   1164 #undef hold_r
   1165 #undef bitslong_r
   1166 
   1167 #endif /* USE_MMX || RUN_TIME_MMX */
   1168 
   1169 
   1170 /*** USE_MMX, NO_MMX, and RUNTIME_MMX from here on ***/
   1171 
   1172 .L_invalid_distance_code:
   1173         /* else {
   1174          *   strm->msg = "invalid distance code";
   1175          *   state->mode = BAD;
   1176          * }
   1177          */
   1178         movl    $.L_invalid_distance_code_msg, %ecx
   1179         movl    $INFLATE_MODE_BAD, %edx
   1180         jmp     .L_update_stream_state
   1181 
   1182 .L_test_for_end_of_block:
   1183         /* else if (op & 32) {
   1184          *   state->mode = TYPE;
   1185          *   break;
   1186          * }
   1187          */
   1188         testb   $32, %al
   1189         jz      .L_invalid_literal_length_code  /* if ((op & 32) == 0) */
   1190 
   1191         movl    $0, %ecx
   1192         movl    $INFLATE_MODE_TYPE, %edx
   1193         jmp     .L_update_stream_state
   1194 
   1195 .L_invalid_literal_length_code:
   1196         /* else {
   1197          *   strm->msg = "invalid literal/length code";
   1198          *   state->mode = BAD;
   1199          * }
   1200          */
   1201         movl    $.L_invalid_literal_length_code_msg, %ecx
   1202         movl    $INFLATE_MODE_BAD, %edx
   1203         jmp     .L_update_stream_state
   1204 
   1205 .L_invalid_distance_too_far:
   1206         /* strm->msg = "invalid distance too far back";
   1207          * state->mode = BAD;
   1208          */
   1209         movl    in(%esp), in_r          /* from_r has in's reg, put in back */
   1210         movl    $.L_invalid_distance_too_far_msg, %ecx
   1211         movl    $INFLATE_MODE_BAD, %edx
   1212         jmp     .L_update_stream_state
   1213 
   1214 .L_update_stream_state:
   1215         /* set strm->msg = %ecx, strm->state->mode = %edx */
   1216         movl    strm_sp(%esp), %eax
   1217         testl   %ecx, %ecx              /* if (msg != NULL) */
   1218         jz      .L_skip_msg
   1219         movl    %ecx, msg_strm(%eax)    /* strm->msg = msg */
   1220 .L_skip_msg:
   1221         movl    state_strm(%eax), %eax  /* state = strm->state */
   1222         movl    %edx, mode_state(%eax)  /* state->mode = edx (BAD | TYPE) */
   1223         jmp     .L_break_loop
   1224 
   1225 .align 32,0x90
   1226 .L_break_loop:
   1227 
   1228 /*
   1229  * Regs:
   1230  *
   1231  * bits = %ebp when mmx, and in %ebx when non-mmx
   1232  * hold = %hold_mm when mmx, and in %ebp when non-mmx
   1233  * in   = %esi
   1234  * out  = %edi
   1235  */
   1236 
   1237 #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
   1238 
   1239 #if defined( RUN_TIME_MMX )
   1240 
   1241         cmpl    $DO_USE_MMX, inflate_fast_use_mmx
   1242         jne     .L_update_next_in
   1243 
   1244 #endif /* RUN_TIME_MMX */
   1245 
   1246         movl    %ebp, %ebx
   1247 
   1248 .L_update_next_in:
   1249 
   1250 #endif
   1251 
   1252 #define strm_r  %eax
   1253 #define state_r %edx
   1254 
   1255         /* len = bits >> 3;
   1256          * in -= len;
   1257          * bits -= len << 3;
   1258          * hold &= (1U << bits) - 1;
   1259          * state->hold = hold;
   1260          * state->bits = bits;
   1261          * strm->next_in = in;
   1262          * strm->next_out = out;
   1263          */
   1264         movl    strm_sp(%esp), strm_r
   1265         movl    %ebx, %ecx
   1266         movl    state_strm(strm_r), state_r
   1267         shrl    $3, %ecx
   1268         subl    %ecx, in_r
   1269         shll    $3, %ecx
   1270         subl    %ecx, %ebx
   1271         movl    out_r, next_out_strm(strm_r)
   1272         movl    %ebx, bits_state(state_r)
   1273         movl    %ebx, %ecx
   1274 
   1275         leal    buf(%esp), %ebx
   1276         cmpl    %ebx, last(%esp)
   1277         jne     .L_buf_not_used         /* if buf != last */
   1278 
   1279         subl    %ebx, in_r              /* in -= buf */
   1280         movl    next_in_strm(strm_r), %ebx
   1281         movl    %ebx, last(%esp)        /* last = strm->next_in */
   1282         addl    %ebx, in_r              /* in += strm->next_in */
   1283         movl    avail_in_strm(strm_r), %ebx
   1284         subl    $11, %ebx
   1285         addl    %ebx, last(%esp)    /* last = &strm->next_in[ avail_in - 11 ] */
   1286 
   1287 .L_buf_not_used:
   1288         movl    in_r, next_in_strm(strm_r)
   1289 
   1290         movl    $1, %ebx
   1291         shll    %cl, %ebx
   1292         decl    %ebx
   1293 
   1294 #if defined( USE_MMX ) || defined( RUN_TIME_MMX )
   1295 
   1296 #if defined( RUN_TIME_MMX )
   1297 
   1298         cmpl    $DO_USE_MMX, inflate_fast_use_mmx
   1299         jne     .L_update_hold
   1300 
   1301 #endif /* RUN_TIME_MMX */
   1302 
   1303         psrlq   used_mm, hold_mm        /* hold_mm >>= last bit length */
   1304         movd    hold_mm, %ebp
   1305 
   1306         emms
   1307 
   1308 .L_update_hold:
   1309 
   1310 #endif /* USE_MMX || RUN_TIME_MMX */
   1311 
   1312         andl    %ebx, %ebp
   1313         movl    %ebp, hold_state(state_r)
   1314 
   1315 #define last_r %ebx
   1316 
   1317         /* strm->avail_in = in < last ? 11 + (last - in) : 11 - (in - last) */
   1318         movl    last(%esp), last_r
   1319         cmpl    in_r, last_r
   1320         jbe     .L_last_is_smaller     /* if (in >= last) */
   1321 
   1322         subl    in_r, last_r           /* last -= in */
   1323         addl    $11, last_r            /* last += 11 */
   1324         movl    last_r, avail_in_strm(strm_r)
   1325         jmp     .L_fixup_out
   1326 .L_last_is_smaller:
   1327         subl    last_r, in_r           /* in -= last */
   1328         negl    in_r                   /* in = -in */
   1329         addl    $11, in_r              /* in += 11 */
   1330         movl    in_r, avail_in_strm(strm_r)
   1331 
   1332 #undef last_r
   1333 #define end_r %ebx
   1334 
   1335 .L_fixup_out:
   1336         /* strm->avail_out = out < end ? 257 + (end - out) : 257 - (out - end)*/
   1337         movl    end(%esp), end_r
   1338         cmpl    out_r, end_r
   1339         jbe     .L_end_is_smaller      /* if (out >= end) */
   1340 
   1341         subl    out_r, end_r           /* end -= out */
   1342         addl    $257, end_r            /* end += 257 */
   1343         movl    end_r, avail_out_strm(strm_r)
   1344         jmp     .L_done
   1345 .L_end_is_smaller:
   1346         subl    end_r, out_r           /* out -= end */
   1347         negl    out_r                  /* out = -out */
   1348         addl    $257, out_r            /* out += 257 */
   1349         movl    out_r, avail_out_strm(strm_r)
   1350 
   1351 #undef end_r
   1352 #undef strm_r
   1353 #undef state_r
   1354 
   1355 .L_done:
   1356         addl    $local_var_size, %esp
   1357         popf
   1358         popl    %ebx
   1359         popl    %ebp
   1360         popl    %esi
   1361         popl    %edi
   1362         ret
   1363 
   1364 #if defined( GAS_ELF )
   1365 /* elf info */
   1366 .type inflate_fast,@function
   1367 .size inflate_fast,.-inflate_fast
   1368 #endif
   1369