Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2007 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 /*
     18  * Read-only access to Zip archives, with minimal heap allocation.
     19  *
     20  * This is similar to the more-complete ZipFile class, but no attempt
     21  * has been made to make them interchangeable.  This class operates under
     22  * a very different set of assumptions and constraints.
     23  *
     24  * One such assumption is that if you're getting file descriptors for
     25  * use with this class as a child of a fork() operation, you must be on
     26  * a pread() to guarantee correct operation. This is because pread() can
     27  * atomically read at a file offset without worrying about a lock around an
     28  * lseek() + read() pair.
     29  */
     30 #ifndef __LIBS_ZIPFILERO_H
     31 #define __LIBS_ZIPFILERO_H
     32 
     33 #include <utils/Errors.h>
     34 #include <utils/FileMap.h>
     35 #include <utils/threads.h>
     36 
     37 #include <stdio.h>
     38 #include <stdlib.h>
     39 #include <unistd.h>
     40 
     41 namespace android {
     42 
     43 /*
     44  * Trivial typedef to ensure that ZipEntryRO is not treated as a simple
     45  * integer.  We use NULL to indicate an invalid value.
     46  */
     47 typedef void* ZipEntryRO;
     48 
     49 /*
     50  * Open a Zip archive for reading.
     51  *
     52  * We want "open" and "find entry by name" to be fast operations, and we
     53  * want to use as little memory as possible.  We memory-map the file,
     54  * and load a hash table with pointers to the filenames (which aren't
     55  * null-terminated).  The other fields are at a fixed offset from the
     56  * filename, so we don't need to extract those (but we do need to byte-read
     57  * and endian-swap them every time we want them).
     58  *
     59  * To speed comparisons when doing a lookup by name, we could make the mapping
     60  * "private" (copy-on-write) and null-terminate the filenames after verifying
     61  * the record structure.  However, this requires a private mapping of
     62  * every page that the Central Directory touches.  Easier to tuck a copy
     63  * of the string length into the hash table entry.
     64  *
     65  * NOTE: If this is used on file descriptors inherited from a fork() operation,
     66  * you must be on a platform that implements pread() to guarantee correctness
     67  * on the shared file descriptors.
     68  */
     69 class ZipFileRO {
     70 public:
     71     ZipFileRO()
     72         : mFd(-1), mFileName(NULL), mFileLength(-1),
     73           mDirectoryMap(NULL),
     74           mNumEntries(-1), mDirectoryOffset(-1),
     75           mHashTableSize(-1), mHashTable(NULL)
     76         {}
     77 
     78     ~ZipFileRO();
     79 
     80     /*
     81      * Open an archive.
     82      */
     83     status_t open(const char* zipFileName);
     84 
     85     /*
     86      * Find an entry, by name.  Returns the entry identifier, or NULL if
     87      * not found.
     88      *
     89      * If two entries have the same name, one will be chosen at semi-random.
     90      */
     91     ZipEntryRO findEntryByName(const char* fileName) const;
     92 
     93     /*
     94      * Return the #of entries in the Zip archive.
     95      */
     96     int getNumEntries(void) const {
     97         return mNumEntries;
     98     }
     99 
    100     /*
    101      * Return the Nth entry.  Zip file entries are not stored in sorted
    102      * order, and updated entries may appear at the end, so anyone walking
    103      * the archive needs to avoid making ordering assumptions.  We take
    104      * that further by returning the Nth non-empty entry in the hash table
    105      * rather than the Nth entry in the archive.
    106      *
    107      * Valid values are [0..numEntries).
    108      *
    109      * [This is currently O(n).  If it needs to be fast we can allocate an
    110      * additional data structure or provide an iterator interface.]
    111      */
    112     ZipEntryRO findEntryByIndex(int idx) const;
    113 
    114     /*
    115      * Copy the filename into the supplied buffer.  Returns 0 on success,
    116      * -1 if "entry" is invalid, or the filename length if it didn't fit.  The
    117      * length, and the returned string, include the null-termination.
    118      */
    119     int getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) const;
    120 
    121     /*
    122      * Get the vital stats for an entry.  Pass in NULL pointers for anything
    123      * you don't need.
    124      *
    125      * "*pOffset" holds the Zip file offset of the entry's data.
    126      *
    127      * Returns "false" if "entry" is bogus or if the data in the Zip file
    128      * appears to be bad.
    129      */
    130     bool getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
    131         size_t* pCompLen, off_t* pOffset, long* pModWhen, long* pCrc32) const;
    132 
    133     /*
    134      * Create a new FileMap object that maps a subset of the archive.  For
    135      * an uncompressed entry this effectively provides a pointer to the
    136      * actual data, for a compressed entry this provides the input buffer
    137      * for inflate().
    138      */
    139     FileMap* createEntryFileMap(ZipEntryRO entry) const;
    140 
    141     /*
    142      * Uncompress the data into a buffer.  Depending on the compression
    143      * format, this is either an "inflate" operation or a memcpy.
    144      *
    145      * Use "uncompLen" from getEntryInfo() to determine the required
    146      * buffer size.
    147      *
    148      * Returns "true" on success.
    149      */
    150     bool uncompressEntry(ZipEntryRO entry, void* buffer) const;
    151 
    152     /*
    153      * Uncompress the data to an open file descriptor.
    154      */
    155     bool uncompressEntry(ZipEntryRO entry, int fd) const;
    156 
    157     /* Zip compression methods we support */
    158     enum {
    159         kCompressStored     = 0,        // no compression
    160         kCompressDeflated   = 8,        // standard deflate
    161     };
    162 
    163     /*
    164      * Utility function: uncompress deflated data, buffer to buffer.
    165      */
    166     static bool inflateBuffer(void* outBuf, const void* inBuf,
    167         size_t uncompLen, size_t compLen);
    168 
    169     /*
    170      * Utility function: uncompress deflated data, buffer to fd.
    171      */
    172     static bool inflateBuffer(int fd, const void* inBuf,
    173         size_t uncompLen, size_t compLen);
    174 
    175     /*
    176      * Some basic functions for raw data manipulation.  "LE" means
    177      * Little Endian.
    178      */
    179     static inline unsigned short get2LE(const unsigned char* buf) {
    180         return buf[0] | (buf[1] << 8);
    181     }
    182     static inline unsigned long get4LE(const unsigned char* buf) {
    183         return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
    184     }
    185 
    186 private:
    187     /* these are private and not defined */
    188     ZipFileRO(const ZipFileRO& src);
    189     ZipFileRO& operator=(const ZipFileRO& src);
    190 
    191     /* locate and parse the central directory */
    192     bool mapCentralDirectory(void);
    193 
    194     /* parse the archive, prepping internal structures */
    195     bool parseZipArchive(void);
    196 
    197     /* add a new entry to the hash table */
    198     void addToHash(const char* str, int strLen, unsigned int hash);
    199 
    200     /* compute string hash code */
    201     static unsigned int computeHash(const char* str, int len);
    202 
    203     /* convert a ZipEntryRO back to a hash table index */
    204     int entryToIndex(const ZipEntryRO entry) const;
    205 
    206     /*
    207      * One entry in the hash table.
    208      */
    209     typedef struct HashEntry {
    210         const char*     name;
    211         unsigned short  nameLen;
    212         //unsigned int    hash;
    213     } HashEntry;
    214 
    215     /* open Zip archive */
    216     int         mFd;
    217 
    218     /* Lock for handling the file descriptor (seeks, etc) */
    219     mutable Mutex mFdLock;
    220 
    221     /* zip file name */
    222     char*       mFileName;
    223 
    224     /* length of file */
    225     size_t      mFileLength;
    226 
    227     /* mapped file */
    228     FileMap*    mDirectoryMap;
    229 
    230     /* number of entries in the Zip archive */
    231     int         mNumEntries;
    232 
    233     /* CD directory offset in the Zip archive */
    234     off_t       mDirectoryOffset;
    235 
    236     /*
    237      * We know how many entries are in the Zip archive, so we have a
    238      * fixed-size hash table.  We probe for an empty slot.
    239      */
    240     int         mHashTableSize;
    241     HashEntry*  mHashTable;
    242 };
    243 
    244 }; // namespace android
    245 
    246 #endif /*__LIBS_ZIPFILERO_H*/
    247