Home | History | Annotate | Download | only in zlib
      1 #!/usr/bin/env python
      2 # Demo program for zlib; it compresses or decompresses files, but *doesn't*
      3 # delete the original.  This doesn't support all of gzip's options.
      4 #
      5 # The 'gzip' module in the standard library provides a more complete
      6 # implementation of gzip-format files.
      7 
      8 import zlib, sys, os
      9 
     10 FTEXT, FHCRC, FEXTRA, FNAME, FCOMMENT = 1, 2, 4, 8, 16
     11 
     12 def write32(output, value):
     13     output.write(chr(value & 255)) ; value=value // 256
     14     output.write(chr(value & 255)) ; value=value // 256
     15     output.write(chr(value & 255)) ; value=value // 256
     16     output.write(chr(value & 255))
     17 
     18 def read32(input):
     19     v = ord(input.read(1))
     20     v += (ord(input.read(1)) << 8 )
     21     v += (ord(input.read(1)) << 16)
     22     v += (ord(input.read(1)) << 24)
     23     return v
     24 
     25 def compress (filename, input, output):
     26     output.write('\037\213\010')        # Write the header, ...
     27     output.write(chr(FNAME))            # ... flag byte ...
     28 
     29     statval = os.stat(filename)           # ... modification time ...
     30     mtime = statval[8]
     31     write32(output, mtime)
     32     output.write('\002')                # ... slowest compression alg. ...
     33     output.write('\377')                # ... OS (=unknown) ...
     34     output.write(filename+'\000')       # ... original filename ...
     35 
     36     crcval = zlib.crc32("")
     37     compobj = zlib.compressobj(9, zlib.DEFLATED, -zlib.MAX_WBITS,
     38                              zlib.DEF_MEM_LEVEL, 0)
     39     while True:
     40         data = input.read(1024)
     41         if data == "":
     42             break
     43         crcval = zlib.crc32(data, crcval)
     44         output.write(compobj.compress(data))
     45     output.write(compobj.flush())
     46     write32(output, crcval)             # ... the CRC ...
     47     write32(output, statval[6])         # and the file size.
     48 
     49 def decompress (input, output):
     50     magic = input.read(2)
     51     if magic != '\037\213':
     52         print 'Not a gzipped file'
     53         sys.exit(0)
     54     if ord(input.read(1)) != 8:
     55         print 'Unknown compression method'
     56         sys.exit(0)
     57     flag = ord(input.read(1))
     58     input.read(4+1+1)                   # Discard modification time,
     59                                         # extra flags, and OS byte.
     60     if flag & FEXTRA:
     61         # Read & discard the extra field, if present
     62         xlen = ord(input.read(1))
     63         xlen += 256*ord(input.read(1))
     64         input.read(xlen)
     65     if flag & FNAME:
     66         # Read and discard a null-terminated string containing the filename
     67         while True:
     68             s = input.read(1)
     69             if s == '\0': break
     70     if flag & FCOMMENT:
     71         # Read and discard a null-terminated string containing a comment
     72         while True:
     73             s=input.read(1)
     74             if s=='\0': break
     75     if flag & FHCRC:
     76         input.read(2)                   # Read & discard the 16-bit header CRC
     77 
     78     decompobj = zlib.decompressobj(-zlib.MAX_WBITS)
     79     crcval = zlib.crc32("")
     80     length = 0
     81     while True:
     82         data=input.read(1024)
     83         if data == "":
     84             break
     85         decompdata = decompobj.decompress(data)
     86         output.write(decompdata)
     87         length += len(decompdata)
     88         crcval = zlib.crc32(decompdata, crcval)
     89 
     90     decompdata = decompobj.flush()
     91     output.write(decompdata)
     92     length += len(decompdata)
     93     crcval = zlib.crc32(decompdata, crcval)
     94 
     95     # We've read to the end of the file, so we have to rewind in order
     96     # to reread the 8 bytes containing the CRC and the file size.  The
     97     # decompressor is smart and knows when to stop, so feeding it
     98     # extra data is harmless.
     99     input.seek(-8, 2)
    100     crc32 = read32(input)
    101     isize = read32(input)
    102     if crc32 != crcval:
    103         print 'CRC check failed.'
    104     if isize != length:
    105         print 'Incorrect length of data produced'
    106 
    107 def main():
    108     if len(sys.argv)!=2:
    109         print 'Usage: minigzip.py <filename>'
    110         print '  The file will be compressed or decompressed.'
    111         sys.exit(0)
    112 
    113     filename = sys.argv[1]
    114     if filename.endswith('.gz'):
    115         compressing = False
    116         outputname = filename[:-3]
    117     else:
    118         compressing = True
    119         outputname = filename + '.gz'
    120 
    121     input = open(filename, 'rb')
    122     output = open(outputname, 'wb')
    123 
    124     if compressing:
    125         compress(filename, input, output)
    126     else:
    127         decompress(input, output)
    128 
    129     input.close()
    130     output.close()
    131 
    132 if __name__ == '__main__':
    133     main()
    134