Home | History | Annotate | Download | only in google
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "third_party/zlib/google/zip_reader.h"
      6 
      7 #include "base/bind.h"
      8 #include "base/files/file.h"
      9 #include "base/logging.h"
     10 #include "base/message_loop/message_loop.h"
     11 #include "base/strings/string_util.h"
     12 #include "base/strings/utf_string_conversions.h"
     13 #include "third_party/zlib/google/zip_internal.h"
     14 
     15 #if defined(USE_SYSTEM_MINIZIP)
     16 #include <minizip/unzip.h>
     17 #else
     18 #include "third_party/zlib/contrib/minizip/unzip.h"
     19 #if defined(OS_WIN)
     20 #include "third_party/zlib/contrib/minizip/iowin32.h"
     21 #endif  // defined(OS_WIN)
     22 #endif  // defined(USE_SYSTEM_MINIZIP)
     23 
     24 namespace zip {
     25 
     26 // TODO(satorux): The implementation assumes that file names in zip files
     27 // are encoded in UTF-8. This is true for zip files created by Zip()
     28 // function in zip.h, but not true for user-supplied random zip files.
     29 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip,
     30                                 const unz_file_info& raw_file_info)
     31     : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)),
     32       is_directory_(false) {
     33   original_size_ = raw_file_info.uncompressed_size;
     34 
     35   // Directory entries in zip files end with "/".
     36   is_directory_ = EndsWith(file_name_in_zip, "/", false);
     37 
     38   // Check the file name here for directory traversal issues.
     39   is_unsafe_ = file_path_.ReferencesParent();
     40 
     41   // We also consider that the file name is unsafe, if it's invalid UTF-8.
     42   base::string16 file_name_utf16;
     43   if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(),
     44                          &file_name_utf16)) {
     45     is_unsafe_ = true;
     46   }
     47 
     48   // We also consider that the file name is unsafe, if it's absolute.
     49   // On Windows, IsAbsolute() returns false for paths starting with "/".
     50   if (file_path_.IsAbsolute() || StartsWithASCII(file_name_in_zip, "/", false))
     51     is_unsafe_ = true;
     52 
     53   // Construct the last modified time. The timezone info is not present in
     54   // zip files, so we construct the time as local time.
     55   base::Time::Exploded exploded_time = {};  // Zero-clear.
     56   exploded_time.year = raw_file_info.tmu_date.tm_year;
     57   // The month in zip file is 0-based, whereas ours is 1-based.
     58   exploded_time.month = raw_file_info.tmu_date.tm_mon + 1;
     59   exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday;
     60   exploded_time.hour = raw_file_info.tmu_date.tm_hour;
     61   exploded_time.minute = raw_file_info.tmu_date.tm_min;
     62   exploded_time.second = raw_file_info.tmu_date.tm_sec;
     63   exploded_time.millisecond = 0;
     64   if (exploded_time.HasValidValues()) {
     65     last_modified_ = base::Time::FromLocalExploded(exploded_time);
     66   } else {
     67     // Use Unix time epoch if the time stamp data is invalid.
     68     last_modified_ = base::Time::UnixEpoch();
     69   }
     70 }
     71 
     72 ZipReader::ZipReader()
     73     : weak_ptr_factory_(this) {
     74   Reset();
     75 }
     76 
     77 ZipReader::~ZipReader() {
     78   Close();
     79 }
     80 
     81 bool ZipReader::Open(const base::FilePath& zip_file_path) {
     82   DCHECK(!zip_file_);
     83 
     84   // Use of "Unsafe" function does not look good, but there is no way to do
     85   // this safely on Linux. See file_util.h for details.
     86   zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe());
     87   if (!zip_file_) {
     88     return false;
     89   }
     90 
     91   return OpenInternal();
     92 }
     93 
     94 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) {
     95   DCHECK(!zip_file_);
     96 
     97 #if defined(OS_POSIX)
     98   zip_file_ = internal::OpenFdForUnzipping(zip_fd);
     99 #elif defined(OS_WIN)
    100   zip_file_ = internal::OpenHandleForUnzipping(zip_fd);
    101 #endif
    102   if (!zip_file_) {
    103     return false;
    104   }
    105 
    106   return OpenInternal();
    107 }
    108 
    109 bool ZipReader::OpenFromString(const std::string& data) {
    110   zip_file_ = internal::PrepareMemoryForUnzipping(data);
    111   if (!zip_file_)
    112     return false;
    113   return OpenInternal();
    114 }
    115 
    116 void ZipReader::Close() {
    117   if (zip_file_) {
    118     unzClose(zip_file_);
    119   }
    120   Reset();
    121 }
    122 
    123 bool ZipReader::HasMore() {
    124   return !reached_end_;
    125 }
    126 
    127 bool ZipReader::AdvanceToNextEntry() {
    128   DCHECK(zip_file_);
    129 
    130   // Should not go further if we already reached the end.
    131   if (reached_end_)
    132     return false;
    133 
    134   unz_file_pos position = {};
    135   if (unzGetFilePos(zip_file_, &position) != UNZ_OK)
    136     return false;
    137   const int current_entry_index = position.num_of_file;
    138   // If we are currently at the last entry, then the next position is the
    139   // end of the zip file, so mark that we reached the end.
    140   if (current_entry_index + 1 == num_entries_) {
    141     reached_end_ = true;
    142   } else {
    143     DCHECK_LT(current_entry_index + 1, num_entries_);
    144     if (unzGoToNextFile(zip_file_) != UNZ_OK) {
    145       return false;
    146     }
    147   }
    148   current_entry_info_.reset();
    149   return true;
    150 }
    151 
    152 bool ZipReader::OpenCurrentEntryInZip() {
    153   DCHECK(zip_file_);
    154 
    155   unz_file_info raw_file_info = {};
    156   char raw_file_name_in_zip[internal::kZipMaxPath] = {};
    157   const int result = unzGetCurrentFileInfo(zip_file_,
    158                                            &raw_file_info,
    159                                            raw_file_name_in_zip,
    160                                            sizeof(raw_file_name_in_zip) - 1,
    161                                            NULL,  // extraField.
    162                                            0,  // extraFieldBufferSize.
    163                                            NULL,  // szComment.
    164                                            0);  // commentBufferSize.
    165   if (result != UNZ_OK)
    166     return false;
    167   if (raw_file_name_in_zip[0] == '\0')
    168     return false;
    169   current_entry_info_.reset(
    170       new EntryInfo(raw_file_name_in_zip, raw_file_info));
    171   return true;
    172 }
    173 
    174 bool ZipReader::LocateAndOpenEntry(const base::FilePath& path_in_zip) {
    175   DCHECK(zip_file_);
    176 
    177   current_entry_info_.reset();
    178   reached_end_ = false;
    179   const int kDefaultCaseSensivityOfOS = 0;
    180   const int result = unzLocateFile(zip_file_,
    181                                    path_in_zip.AsUTF8Unsafe().c_str(),
    182                                    kDefaultCaseSensivityOfOS);
    183   if (result != UNZ_OK)
    184     return false;
    185 
    186   // Then Open the entry.
    187   return OpenCurrentEntryInZip();
    188 }
    189 
    190 bool ZipReader::ExtractCurrentEntryToFilePath(
    191     const base::FilePath& output_file_path) {
    192   DCHECK(zip_file_);
    193 
    194   // If this is a directory, just create it and return.
    195   if (current_entry_info()->is_directory())
    196     return base::CreateDirectory(output_file_path);
    197 
    198   const int open_result = unzOpenCurrentFile(zip_file_);
    199   if (open_result != UNZ_OK)
    200     return false;
    201 
    202   // We can't rely on parent directory entries being specified in the
    203   // zip, so we make sure they are created.
    204   base::FilePath output_dir_path = output_file_path.DirName();
    205   if (!base::CreateDirectory(output_dir_path))
    206     return false;
    207 
    208   base::File file(output_file_path,
    209                   base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE);
    210   if (!file.IsValid())
    211     return false;
    212 
    213   bool success = true;  // This becomes false when something bad happens.
    214   while (true) {
    215     char buf[internal::kZipBufSize];
    216     const int num_bytes_read = unzReadCurrentFile(zip_file_, buf,
    217                                                   internal::kZipBufSize);
    218     if (num_bytes_read == 0) {
    219       // Reached the end of the file.
    220       break;
    221     } else if (num_bytes_read < 0) {
    222       // If num_bytes_read < 0, then it's a specific UNZ_* error code.
    223       success = false;
    224       break;
    225     } else if (num_bytes_read > 0) {
    226       // Some data is read. Write it to the output file.
    227       if (num_bytes_read != file.WriteAtCurrentPos(buf, num_bytes_read)) {
    228         success = false;
    229         break;
    230       }
    231     }
    232   }
    233 
    234   file.Close();
    235   unzCloseCurrentFile(zip_file_);
    236 
    237   if (current_entry_info()->last_modified() != base::Time::UnixEpoch())
    238     base::TouchFile(output_file_path,
    239                     base::Time::Now(),
    240                     current_entry_info()->last_modified());
    241 
    242   return success;
    243 }
    244 
    245 void ZipReader::ExtractCurrentEntryToFilePathAsync(
    246     const base::FilePath& output_file_path,
    247     const SuccessCallback& success_callback,
    248     const FailureCallback& failure_callback,
    249     const ProgressCallback& progress_callback) {
    250   DCHECK(zip_file_);
    251   DCHECK(current_entry_info_.get());
    252 
    253   // If this is a directory, just create it and return.
    254   if (current_entry_info()->is_directory()) {
    255     if (base::CreateDirectory(output_file_path)) {
    256       base::MessageLoopProxy::current()->PostTask(FROM_HERE, success_callback);
    257     } else {
    258       DVLOG(1) << "Unzip failed: unable to create directory.";
    259       base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback);
    260     }
    261     return;
    262   }
    263 
    264   if (unzOpenCurrentFile(zip_file_) != UNZ_OK) {
    265     DVLOG(1) << "Unzip failed: unable to open current zip entry.";
    266     base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback);
    267     return;
    268   }
    269 
    270   base::FilePath output_dir_path = output_file_path.DirName();
    271   if (!base::CreateDirectory(output_dir_path)) {
    272     DVLOG(1) << "Unzip failed: unable to create containing directory.";
    273     base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback);
    274     return;
    275   }
    276 
    277   const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE;
    278   base::File output_file(output_file_path, flags);
    279 
    280   if (!output_file.IsValid()) {
    281     DVLOG(1) << "Unzip failed: unable to create platform file at "
    282              << output_file_path.value();
    283     base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback);
    284     return;
    285   }
    286 
    287   base::MessageLoop::current()->PostTask(
    288       FROM_HERE,
    289       base::Bind(&ZipReader::ExtractChunk,
    290                  weak_ptr_factory_.GetWeakPtr(),
    291                  Passed(output_file.Pass()),
    292                  success_callback,
    293                  failure_callback,
    294                  progress_callback,
    295                  0 /* initial offset */));
    296 }
    297 
    298 bool ZipReader::ExtractCurrentEntryIntoDirectory(
    299     const base::FilePath& output_directory_path) {
    300   DCHECK(current_entry_info_.get());
    301 
    302   base::FilePath output_file_path = output_directory_path.Append(
    303       current_entry_info()->file_path());
    304   return ExtractCurrentEntryToFilePath(output_file_path);
    305 }
    306 
    307 #if defined(OS_POSIX)
    308 bool ZipReader::ExtractCurrentEntryToFd(const int fd) {
    309   DCHECK(zip_file_);
    310 
    311   // If this is a directory, there's nothing to extract to the file descriptor,
    312   // so return false.
    313   if (current_entry_info()->is_directory())
    314     return false;
    315 
    316   const int open_result = unzOpenCurrentFile(zip_file_);
    317   if (open_result != UNZ_OK)
    318     return false;
    319 
    320   bool success = true;  // This becomes false when something bad happens.
    321   while (true) {
    322     char buf[internal::kZipBufSize];
    323     const int num_bytes_read = unzReadCurrentFile(zip_file_, buf,
    324                                                   internal::kZipBufSize);
    325     if (num_bytes_read == 0) {
    326       // Reached the end of the file.
    327       break;
    328     } else if (num_bytes_read < 0) {
    329       // If num_bytes_read < 0, then it's a specific UNZ_* error code.
    330       success = false;
    331       break;
    332     } else if (num_bytes_read > 0) {
    333       // Some data is read. Write it to the output file descriptor.
    334       if (num_bytes_read !=
    335           base::WriteFileDescriptor(fd, buf, num_bytes_read)) {
    336         success = false;
    337         break;
    338       }
    339     }
    340   }
    341 
    342   unzCloseCurrentFile(zip_file_);
    343   return success;
    344 }
    345 #endif  // defined(OS_POSIX)
    346 
    347 bool ZipReader::ExtractCurrentEntryToString(
    348     size_t max_read_bytes,
    349     std::string* output) const {
    350   DCHECK(output);
    351   DCHECK(zip_file_);
    352   DCHECK(max_read_bytes != 0);
    353 
    354   if (current_entry_info()->is_directory()) {
    355     output->clear();
    356     return true;
    357   }
    358 
    359   const int open_result = unzOpenCurrentFile(zip_file_);
    360   if (open_result != UNZ_OK)
    361     return false;
    362 
    363   // The original_size() is the best hint for the real size, so it saves
    364   // doing reallocations for the common case when the uncompressed size is
    365   // correct. However, we need to assume that the uncompressed size could be
    366   // incorrect therefore this function needs to read as much data as possible.
    367   std::string contents;
    368   contents.reserve(std::min<size_t>(
    369       max_read_bytes, current_entry_info()->original_size()));
    370 
    371   bool success = true;  // This becomes false when something bad happens.
    372   char buf[internal::kZipBufSize];
    373   while (true) {
    374     const int num_bytes_read = unzReadCurrentFile(zip_file_, buf,
    375                                                   internal::kZipBufSize);
    376     if (num_bytes_read == 0) {
    377       // Reached the end of the file.
    378       break;
    379     } else if (num_bytes_read < 0) {
    380       // If num_bytes_read < 0, then it's a specific UNZ_* error code.
    381       success = false;
    382       break;
    383     } else if (num_bytes_read > 0) {
    384       if (contents.size() + num_bytes_read > max_read_bytes) {
    385         success = false;
    386         break;
    387       }
    388       contents.append(buf, num_bytes_read);
    389     }
    390   }
    391 
    392   unzCloseCurrentFile(zip_file_);
    393   if (success)
    394     output->swap(contents);
    395 
    396   return success;
    397 }
    398 
    399 bool ZipReader::OpenInternal() {
    400   DCHECK(zip_file_);
    401 
    402   unz_global_info zip_info = {};  // Zero-clear.
    403   if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) {
    404     return false;
    405   }
    406   num_entries_ = zip_info.number_entry;
    407   if (num_entries_ < 0)
    408     return false;
    409 
    410   // We are already at the end if the zip file is empty.
    411   reached_end_ = (num_entries_ == 0);
    412   return true;
    413 }
    414 
    415 void ZipReader::Reset() {
    416   zip_file_ = NULL;
    417   num_entries_ = 0;
    418   reached_end_ = false;
    419   current_entry_info_.reset();
    420 }
    421 
    422 void ZipReader::ExtractChunk(base::File output_file,
    423                              const SuccessCallback& success_callback,
    424                              const FailureCallback& failure_callback,
    425                              const ProgressCallback& progress_callback,
    426                              const int64 offset) {
    427   char buffer[internal::kZipBufSize];
    428 
    429   const int num_bytes_read = unzReadCurrentFile(zip_file_,
    430                                                 buffer,
    431                                                 internal::kZipBufSize);
    432 
    433   if (num_bytes_read == 0) {
    434     unzCloseCurrentFile(zip_file_);
    435     success_callback.Run();
    436   } else if (num_bytes_read < 0) {
    437     DVLOG(1) << "Unzip failed: error while reading zipfile "
    438              << "(" << num_bytes_read << ")";
    439     failure_callback.Run();
    440   } else {
    441     if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) {
    442       DVLOG(1) << "Unzip failed: unable to write all bytes to target.";
    443       failure_callback.Run();
    444       return;
    445     }
    446 
    447     int64 current_progress = offset + num_bytes_read;
    448 
    449     progress_callback.Run(current_progress);
    450 
    451     base::MessageLoop::current()->PostTask(
    452         FROM_HERE,
    453         base::Bind(&ZipReader::ExtractChunk,
    454                    weak_ptr_factory_.GetWeakPtr(),
    455                    Passed(output_file.Pass()),
    456                    success_callback,
    457                    failure_callback,
    458                    progress_callback,
    459                    current_progress));
    460 
    461   }
    462 }
    463 
    464 
    465 }  // namespace zip
    466