Home | History | Annotate | Download | only in google
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
      6 #define THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
      7 
      8 #include <string>
      9 
     10 #include "base/basictypes.h"
     11 #include "base/file_util.h"
     12 #include "base/files/file_path.h"
     13 #include "base/memory/scoped_ptr.h"
     14 #include "base/platform_file.h"
     15 #include "base/time/time.h"
     16 
     17 #if defined(USE_SYSTEM_MINIZIP)
     18 #include <minizip/unzip.h>
     19 #else
     20 #include "third_party/zlib/contrib/minizip/unzip.h"
     21 #endif
     22 
     23 namespace zip {
     24 
     25 // This class is used for reading zip files. A typical use case of this
     26 // class is to scan entries in a zip file and extract them. The code will
     27 // look like:
     28 //
     29 //   ZipReader reader;
     30 //   reader.Open(zip_file_path);
     31 //   while (reader.HasMore()) {
     32 //     reader.OpenCurrentEntryInZip();
     33 //     reader.ExtractCurrentEntryToDirectory(output_directory_path);
     34 //     reader.AdvanceToNextEntry();
     35 //   }
     36 //
     37 // For simplicty, error checking is omitted in the example code above. The
     38 // production code should check return values from all of these functions.
     39 //
     40 // This calls can also be used for random access of contents in a zip file
     41 // using LocateAndOpenEntry().
     42 //
     43 class ZipReader {
     44  public:
     45   // This class represents information of an entry (file or directory) in
     46   // a zip file.
     47   class EntryInfo {
     48    public:
     49     EntryInfo(const std::string& filename_in_zip,
     50               const unz_file_info& raw_file_info);
     51 
     52     // Returns the file path. The path is usually relative like
     53     // "foo/bar.txt", but if it's absolute, is_unsafe() returns true.
     54     const base::FilePath& file_path() const { return file_path_; }
     55 
     56     // Returns the size of the original file (i.e. after uncompressed).
     57     // Returns 0 if the entry is a directory.
     58     int64 original_size() const { return original_size_; }
     59 
     60     // Returns the last modified time.
     61     base::Time last_modified() const { return last_modified_; }
     62 
     63     // Returns true if the entry is a directory.
     64     bool is_directory() const { return is_directory_; }
     65 
     66     // Returns true if the entry is unsafe, like having ".." or invalid
     67     // UTF-8 characters in its file name, or the file path is absolute.
     68     bool is_unsafe() const { return is_unsafe_; }
     69 
     70    private:
     71     const base::FilePath file_path_;
     72     int64 original_size_;
     73     base::Time last_modified_;
     74     bool is_directory_;
     75     bool is_unsafe_;
     76     DISALLOW_COPY_AND_ASSIGN(EntryInfo);
     77   };
     78 
     79   ZipReader();
     80   ~ZipReader();
     81 
     82   // Opens the zip file specified by |zip_file_path|. Returns true on
     83   // success.
     84   bool Open(const base::FilePath& zip_file_path);
     85 
     86   // Opens the zip file referred to by the platform file |zip_fd|.
     87   // Returns true on success.
     88   bool OpenFromPlatformFile(base::PlatformFile zip_fd);
     89 
     90   // Opens the zip data stored in |data|. This class uses a weak reference to
     91   // the given sring while extracting files, i.e. the caller should keep the
     92   // string until it finishes extracting files.
     93   bool OpenFromString(const std::string& data);
     94 
     95   // Closes the currently opened zip file. This function is called in the
     96   // destructor of the class, so you usually don't need to call this.
     97   void Close();
     98 
     99   // Returns true if there is at least one entry to read. This function is
    100   // used to scan entries with AdvanceToNextEntry(), like:
    101   //
    102   // while (reader.HasMore()) {
    103   //   // Do something with the current file here.
    104   //   reader.AdvanceToNextEntry();
    105   // }
    106   bool HasMore();
    107 
    108   // Advances the next entry. Returns true on success.
    109   bool AdvanceToNextEntry();
    110 
    111   // Opens the current entry in the zip file. On success, returns true and
    112   // updates the the current entry state (i.e. current_entry_info() is
    113   // updated). This function should be called before operations over the
    114   // current entry like ExtractCurrentEntryToFile().
    115   //
    116   // Note that there is no CloseCurrentEntryInZip(). The the current entry
    117   // state is reset automatically as needed.
    118   bool OpenCurrentEntryInZip();
    119 
    120   // Locates an entry in the zip file and opens it. Returns true on
    121   // success. This function internally calls OpenCurrentEntryInZip() on
    122   // success. On failure, current_entry_info() becomes NULL.
    123   bool LocateAndOpenEntry(const base::FilePath& path_in_zip);
    124 
    125   // Extracts the current entry to the given output file path. If the
    126   // current file is a directory, just creates a directory
    127   // instead. Returns true on success. OpenCurrentEntryInZip() must be
    128   // called beforehand.
    129   //
    130   // This function does not preserve the timestamp of the original entry.
    131   bool ExtractCurrentEntryToFilePath(const base::FilePath& output_file_path);
    132 
    133   // Extracts the current entry to the given output directory path using
    134   // ExtractCurrentEntryToFilePath(). Sub directories are created as needed
    135   // based on the file path of the current entry. For example, if the file
    136   // path in zip is "foo/bar.txt", and the output directory is "output",
    137   // "output/foo/bar.txt" will be created.
    138   //
    139   // Returns true on success. OpenCurrentEntryInZip() must be called
    140   // beforehand.
    141   bool ExtractCurrentEntryIntoDirectory(
    142       const base::FilePath& output_directory_path);
    143 
    144 #if defined(OS_POSIX)
    145   // Extracts the current entry by writing directly to a file descriptor.
    146   // Does not close the file descriptor. Returns true on success.
    147   bool ExtractCurrentEntryToFd(int fd);
    148 #endif
    149 
    150   // Returns the current entry info. Returns NULL if the current entry is
    151   // not yet opened. OpenCurrentEntryInZip() must be called beforehand.
    152   EntryInfo* current_entry_info() const {
    153     return current_entry_info_.get();
    154   }
    155 
    156   // Returns the number of entries in the zip file.
    157   // Open() must be called beforehand.
    158   int num_entries() const { return num_entries_; }
    159 
    160  private:
    161   // Common code used both in Open and OpenFromFd.
    162   bool OpenInternal();
    163 
    164   // Resets the internal state.
    165   void Reset();
    166 
    167   unzFile zip_file_;
    168   int num_entries_;
    169   bool reached_end_;
    170   scoped_ptr<EntryInfo> current_entry_info_;
    171 
    172   DISALLOW_COPY_AND_ASSIGN(ZipReader);
    173 };
    174 
    175 }  // namespace zip
    176 
    177 #endif  // THIRD_PARTY_ZLIB_GOOGLE_ZIP_READER_H_
    178