Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Read-only access to Zip archives, with minimal heap allocation.
     19  *
     20  * This is similar to the more-complete ZipFile class, but no attempt
     21  * has been made to make them interchangeable.  This class operates under
     22  * a very different set of assumptions and constraints.
     23  *
     24  * One such assumption is that if you're getting file descriptors for
     25  * use with this class as a child of a fork() operation, you must be on
     26  * a pread() to guarantee correct operation. This is because pread() can
     27  * atomically read at a file offset without worrying about a lock around an
     28  * lseek() + read() pair.
     29  */
     30 #ifndef __LIBS_ZIPFILERO_H
     31 #define __LIBS_ZIPFILERO_H
     32 
     33 #include <utils/Compat.h>
     34 #include <utils/Errors.h>
     35 #include <utils/FileMap.h>
     36 #include <utils/threads.h>
     37 
     38 #include <stdio.h>
     39 #include <stdlib.h>
     40 #include <unistd.h>
     41 #include <time.h>
     42 
     43 namespace android {
     44 
     45 /*
     46  * Trivial typedef to ensure that ZipEntryRO is not treated as a simple
     47  * integer.  We use NULL to indicate an invalid value.
     48  */
     49 typedef void* ZipEntryRO;
     50 
     51 /*
     52  * Open a Zip archive for reading.
     53  *
     54  * We want "open" and "find entry by name" to be fast operations, and we
     55  * want to use as little memory as possible.  We memory-map the file,
     56  * and load a hash table with pointers to the filenames (which aren't
     57  * null-terminated).  The other fields are at a fixed offset from the
     58  * filename, so we don't need to extract those (but we do need to byte-read
     59  * and endian-swap them every time we want them).
     60  *
     61  * To speed comparisons when doing a lookup by name, we could make the mapping
     62  * "private" (copy-on-write) and null-terminate the filenames after verifying
     63  * the record structure.  However, this requires a private mapping of
     64  * every page that the Central Directory touches.  Easier to tuck a copy
     65  * of the string length into the hash table entry.
     66  *
     67  * NOTE: If this is used on file descriptors inherited from a fork() operation,
     68  * you must be on a platform that implements pread() to guarantee correctness
     69  * on the shared file descriptors.
     70  */
     71 class ZipFileRO {
     72 public:
     73     ZipFileRO()
     74         : mFd(-1), mFileName(NULL), mFileLength(-1),
     75           mDirectoryMap(NULL),
     76           mNumEntries(-1), mDirectoryOffset(-1),
     77           mHashTableSize(-1), mHashTable(NULL)
     78         {}
     79 
     80     ~ZipFileRO();
     81 
     82     /*
     83      * Open an archive.
     84      */
     85     status_t open(const char* zipFileName);
     86 
     87     /*
     88      * Find an entry, by name.  Returns the entry identifier, or NULL if
     89      * not found.
     90      *
     91      * If two entries have the same name, one will be chosen at semi-random.
     92      */
     93     ZipEntryRO findEntryByName(const char* fileName) const;
     94 
     95     /*
     96      * Return the #of entries in the Zip archive.
     97      */
     98     int getNumEntries(void) const {
     99         return mNumEntries;
    100     }
    101 
    102     /*
    103      * Return the Nth entry.  Zip file entries are not stored in sorted
    104      * order, and updated entries may appear at the end, so anyone walking
    105      * the archive needs to avoid making ordering assumptions.  We take
    106      * that further by returning the Nth non-empty entry in the hash table
    107      * rather than the Nth entry in the archive.
    108      *
    109      * Valid values are [0..numEntries).
    110      *
    111      * [This is currently O(n).  If it needs to be fast we can allocate an
    112      * additional data structure or provide an iterator interface.]
    113      */
    114     ZipEntryRO findEntryByIndex(int idx) const;
    115 
    116     /*
    117      * Copy the filename into the supplied buffer.  Returns 0 on success,
    118      * -1 if "entry" is invalid, or the filename length if it didn't fit.  The
    119      * length, and the returned string, include the null-termination.
    120      */
    121     int getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) const;
    122 
    123     /*
    124      * Get the vital stats for an entry.  Pass in NULL pointers for anything
    125      * you don't need.
    126      *
    127      * "*pOffset" holds the Zip file offset of the entry's data.
    128      *
    129      * Returns "false" if "entry" is bogus or if the data in the Zip file
    130      * appears to be bad.
    131      */
    132     bool getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
    133         size_t* pCompLen, off64_t* pOffset, long* pModWhen, long* pCrc32) const;
    134 
    135     /*
    136      * Create a new FileMap object that maps a subset of the archive.  For
    137      * an uncompressed entry this effectively provides a pointer to the
    138      * actual data, for a compressed entry this provides the input buffer
    139      * for inflate().
    140      */
    141     FileMap* createEntryFileMap(ZipEntryRO entry) const;
    142 
    143     /*
    144      * Uncompress the data into a buffer.  Depending on the compression
    145      * format, this is either an "inflate" operation or a memcpy.
    146      *
    147      * Use "uncompLen" from getEntryInfo() to determine the required
    148      * buffer size.
    149      *
    150      * Returns "true" on success.
    151      */
    152     bool uncompressEntry(ZipEntryRO entry, void* buffer) const;
    153 
    154     /*
    155      * Uncompress the data to an open file descriptor.
    156      */
    157     bool uncompressEntry(ZipEntryRO entry, int fd) const;
    158 
    159     /* Zip compression methods we support */
    160     enum {
    161         kCompressStored     = 0,        // no compression
    162         kCompressDeflated   = 8,        // standard deflate
    163     };
    164 
    165     /*
    166      * Utility function: uncompress deflated data, buffer to buffer.
    167      */
    168     static bool inflateBuffer(void* outBuf, const void* inBuf,
    169         size_t uncompLen, size_t compLen);
    170 
    171     /*
    172      * Utility function: uncompress deflated data, buffer to fd.
    173      */
    174     static bool inflateBuffer(int fd, const void* inBuf,
    175         size_t uncompLen, size_t compLen);
    176 
    177     /*
    178      * Utility function to convert ZIP's time format to a timespec struct.
    179      */
    180     static inline void zipTimeToTimespec(long when, struct tm* timespec) {
    181         const long date = when >> 16;
    182         timespec->tm_year = ((date >> 9) & 0x7F) + 80; // Zip is years since 1980
    183         timespec->tm_mon = (date >> 5) & 0x0F;
    184         timespec->tm_mday = date & 0x1F;
    185 
    186         timespec->tm_hour = (when >> 11) & 0x1F;
    187         timespec->tm_min = (when >> 5) & 0x3F;
    188         timespec->tm_sec = (when & 0x1F) << 1;
    189     }
    190 
    191     /*
    192      * Some basic functions for raw data manipulation.  "LE" means
    193      * Little Endian.
    194      */
    195     static inline unsigned short get2LE(const unsigned char* buf) {
    196         return buf[0] | (buf[1] << 8);
    197     }
    198     static inline unsigned long get4LE(const unsigned char* buf) {
    199         return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
    200     }
    201 
    202 private:
    203     /* these are private and not defined */
    204     ZipFileRO(const ZipFileRO& src);
    205     ZipFileRO& operator=(const ZipFileRO& src);
    206 
    207     /* locate and parse the central directory */
    208     bool mapCentralDirectory(void);
    209 
    210     /* parse the archive, prepping internal structures */
    211     bool parseZipArchive(void);
    212 
    213     /* add a new entry to the hash table */
    214     void addToHash(const char* str, int strLen, unsigned int hash);
    215 
    216     /* compute string hash code */
    217     static unsigned int computeHash(const char* str, int len);
    218 
    219     /* convert a ZipEntryRO back to a hash table index */
    220     int entryToIndex(const ZipEntryRO entry) const;
    221 
    222     /*
    223      * One entry in the hash table.
    224      */
    225     typedef struct HashEntry {
    226         const char*     name;
    227         unsigned short  nameLen;
    228         //unsigned int    hash;
    229     } HashEntry;
    230 
    231     /* open Zip archive */
    232     int         mFd;
    233 
    234     /* Lock for handling the file descriptor (seeks, etc) */
    235     mutable Mutex mFdLock;
    236 
    237     /* zip file name */
    238     char*       mFileName;
    239 
    240     /* length of file */
    241     size_t      mFileLength;
    242 
    243     /* mapped file */
    244     FileMap*    mDirectoryMap;
    245 
    246     /* number of entries in the Zip archive */
    247     int         mNumEntries;
    248 
    249     /* CD directory offset in the Zip archive */
    250     off64_t     mDirectoryOffset;
    251 
    252     /*
    253      * We know how many entries are in the Zip archive, so we have a
    254      * fixed-size hash table.  We probe for an empty slot.
    255      */
    256     int         mHashTableSize;
    257     HashEntry*  mHashTable;
    258 };
    259 
    260 }; // namespace android
    261 
    262 #endif /*__LIBS_ZIPFILERO_H*/
    263