Home | History | Annotate | Download | only in google
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
      5 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
      6 
      7 #include <string>
      8 
      9 #include "base/basictypes.h"
     10 #include "base/callback.h"
     11 #include "base/files/file.h"
     12 #include "base/files/file_path.h"
     13 #include "base/files/file_util.h"
     14 #include "base/memory/scoped_ptr.h"
     15 #include "base/memory/weak_ptr.h"
     16 #include "base/time/time.h"
     17 
     18 #if defined(USE_SYSTEM_MINIZIP)
     19 #include <minizip/unzip.h>
     20 #else
     21 #include "third_party/zlib/contrib/minizip/unzip.h"
     22 #endif
     23 
     24 namespace zip {
     25 
     26 // This class is used for reading zip files. A typical use case of this
     27 // class is to scan entries in a zip file and extract them. The code will
     28 // look like:
     29 //
     30 //   ZipReader reader;
     31 //   reader.Open(zip_file_path);
     32 //   while (reader.HasMore()) {
     33 //     reader.OpenCurrentEntryInZip();
     34 //     reader.ExtractCurrentEntryToDirectory(output_directory_path);
     35 //     reader.AdvanceToNextEntry();
     36 //   }
     37 //
     38 // For simplicity, error checking is omitted in the example code above. The
     39 // production code should check return values from all of these functions.
     40 //
     41 // This calls can also be used for random access of contents in a zip file
     42 // using LocateAndOpenEntry().
     43 //
     44 class ZipReader {
     45  public:
     46   // A callback that is called when the operation is successful.
     47   typedef base::Closure SuccessCallback;
     48   // A callback that is called when the operation fails.
     49   typedef base::Closure FailureCallback;
     50   // A callback that is called periodically during the operation with the number
     51   // of bytes that have been processed so far.
     52   typedef base::Callback<void(int64)> ProgressCallback;
     53 
     54   // This class represents information of an entry (file or directory) in
     55   // a zip file.
     56   class EntryInfo {
     57    public:
     58     EntryInfo(const std::string& filename_in_zip,
     59               const unz_file_info& raw_file_info);
     60 
     61     // Returns the file path. The path is usually relative like
     62     // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
     63     const base::FilePath& file_path() const { return file_path_; }
     64 
     65     // Returns the size of the original file (i.e. after uncompressed).
     66     // Returns 0 if the entry is a directory.
     67     // Note: this value should not be trusted, because it is stored as metadata
     68     // in the zip archive and can be different from the real uncompressed size.
     69     int64 original_size() const { return original_size_; }
     70 
     71     // Returns the last modified time. If the time stored in the zip file was
     72     // not valid, the unix epoch will be returned.
     73     //
     74     // The time stored in the zip archive uses the MS-DOS date and time format.
     75     // http://msdn.microsoft.com/en-us/library/ms724247(v=vs.85).aspx
     76     // As such the following limitations apply:
     77     // * only years from 1980 to 2107 can be represented.
     78     // * the time stamp has a 2 second resolution.
     79     // * there's no timezone information, so the time is interpreted as local.
     80     base::Time last_modified() const { return last_modified_; }
     81 
     82     // Returns true if the entry is a directory.
     83     bool is_directory() const { return is_directory_; }
     84 
     85     // Returns true if the entry is unsafe, like having ".." or invalid
     86     // UTF-8 characters in its file name, or the file path is absolute.
     87     bool is_unsafe() const { return is_unsafe_; }
     88 
     89    private:
     90     const base::FilePath file_path_;
     91     int64 original_size_;
     92     base::Time last_modified_;
     93     bool is_directory_;
     94     bool is_unsafe_;
     95     DISALLOW_COPY_AND_ASSIGN(EntryInfo);
     96   };
     97 
     98   ZipReader();
     99   ~ZipReader();
    100 
    101   // Opens the zip file specified by |zip_file_path|. Returns true on
    102   // success.
    103   bool Open(const base::FilePath& zip_file_path);
    104 
    105   // Opens the zip file referred to by the platform file |zip_fd|.
    106   // Returns true on success.
    107   bool OpenFromPlatformFile(base::PlatformFile zip_fd);
    108 
    109   // Opens the zip data stored in |data|. This class uses a weak reference to
    110   // the given sring while extracting files, i.e. the caller should keep the
    111   // string until it finishes extracting files.
    112   bool OpenFromString(const std::string& data);
    113 
    114   // Closes the currently opened zip file. This function is called in the
    115   // destructor of the class, so you usually don't need to call this.
    116   void Close();
    117 
    118   // Returns true if there is at least one entry to read. This function is
    119   // used to scan entries with AdvanceToNextEntry(), like:
    120   //
    121   // while (reader.HasMore()) {
    122   //   // Do something with the current file here.
    123   //   reader.AdvanceToNextEntry();
    124   // }
    125   bool HasMore();
    126 
    127   // Advances the next entry. Returns true on success.
    128   bool AdvanceToNextEntry();
    129 
    130   // Opens the current entry in the zip file. On success, returns true and
    131   // updates the the current entry state (i.e. current_entry_info() is
    132   // updated). This function should be called before operations over the
    133   // current entry like ExtractCurrentEntryToFile().
    134   //
    135   // Note that there is no CloseCurrentEntryInZip(). The the current entry
    136   // state is reset automatically as needed.
    137   bool OpenCurrentEntryInZip();
    138 
    139   // Locates an entry in the zip file and opens it. Returns true on
    140   // success. This function internally calls OpenCurrentEntryInZip() on
    141   // success. On failure, current_entry_info() becomes NULL.
    142   bool LocateAndOpenEntry(const base::FilePath& path_in_zip);
    143 
    144   // Extracts the current entry to the given output file path. If the
    145   // current file is a directory, just creates a directory
    146   // instead. Returns true on success. OpenCurrentEntryInZip() must be
    147   // called beforehand.
    148   //
    149   // This function preserves the timestamp of the original entry. If that
    150   // timestamp is not valid, the timestamp will be set to the current time.
    151   bool ExtractCurrentEntryToFilePath(const base::FilePath& output_file_path);
    152 
    153   // Asynchronously extracts the current entry to the given output file path.
    154   // If the current entry is a directory it just creates the directory
    155   // synchronously instead.  OpenCurrentEntryInZip() must be called beforehand.
    156   // success_callback will be called on success and failure_callback will be
    157   // called on failure.  progress_callback will be called at least once.
    158   // Callbacks will be posted to the current MessageLoop in-order.
    159   void ExtractCurrentEntryToFilePathAsync(
    160       const base::FilePath& output_file_path,
    161       const SuccessCallback& success_callback,
    162       const FailureCallback& failure_callback,
    163       const ProgressCallback& progress_callback);
    164 
    165   // Extracts the current entry to the given output directory path using
    166   // ExtractCurrentEntryToFilePath(). Sub directories are created as needed
    167   // based on the file path of the current entry. For example, if the file
    168   // path in zip is "foo/bar.txt", and the output directory is "output",
    169   // "output/foo/bar.txt" will be created.
    170   //
    171   // Returns true on success. OpenCurrentEntryInZip() must be called
    172   // beforehand.
    173   //
    174   // This function preserves the timestamp of the original entry. If that
    175   // timestamp is not valid, the timestamp will be set to the current time.
    176   bool ExtractCurrentEntryIntoDirectory(
    177       const base::FilePath& output_directory_path);
    178 
    179 #if defined(OS_POSIX)
    180   // Extracts the current entry by writing directly to a file descriptor.
    181   // Does not close the file descriptor. Returns true on success.
    182   bool ExtractCurrentEntryToFd(int fd);
    183 #endif
    184 
    185   // Extracts the current entry into memory. If the current entry is a directory
    186   // the |output| parameter is set to the empty string. If the current entry is
    187   // a file, the |output| parameter is filled with its contents. Returns true on
    188   // success. OpenCurrentEntryInZip() must be called beforehand.
    189   // Note: the |output| parameter can be filled with a big amount of data, avoid
    190   // passing it around by value, but by reference or pointer.
    191   // Note: the value returned by EntryInfo::original_size() cannot be
    192   // trusted, so the real size of the uncompressed contents can be different.
    193   // Use max_read_bytes to limit the ammount of memory used to carry the entry.
    194   // If the real size of the uncompressed data is bigger than max_read_bytes
    195   // then false is returned. |max_read_bytes| must be non-zero.
    196   bool ExtractCurrentEntryToString(
    197       size_t max_read_bytes,
    198       std::string* output) const;
    199 
    200   // Returns the current entry info. Returns NULL if the current entry is
    201   // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
    202   EntryInfo* current_entry_info() const {
    203     return current_entry_info_.get();
    204   }
    205 
    206   // Returns the number of entries in the zip file.
    207   // Open() must be called beforehand.
    208   int num_entries() const { return num_entries_; }
    209 
    210  private:
    211   // Common code used both in Open and OpenFromFd.
    212   bool OpenInternal();
    213 
    214   // Resets the internal state.
    215   void Reset();
    216 
    217   // Extracts a chunk of the file to the target.  Will post a task for the next
    218   // chunk and success/failure/progress callbacks as necessary.
    219   void ExtractChunk(base::File target_file,
    220                     const SuccessCallback& success_callback,
    221                     const FailureCallback& failure_callback,
    222                     const ProgressCallback& progress_callback,
    223                     const int64 offset);
    224 
    225   unzFile zip_file_;
    226   int num_entries_;
    227   bool reached_end_;
    228   scoped_ptr<EntryInfo> current_entry_info_;
    229 
    230   base::WeakPtrFactory<ZipReader> weak_ptr_factory_;
    231 
    232   DISALLOW_COPY_AND_ASSIGN(ZipReader);
    233 };
    234 
    235 }  // namespace zip
    236 
    237 #endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
    238