1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "third_party/zlib/google/zip_reader.h" 6 7 #include "base/bind.h" 8 #include "base/files/file.h" 9 #include "base/logging.h" 10 #include "base/message_loop/message_loop.h" 11 #include "base/strings/string_util.h" 12 #include "base/strings/utf_string_conversions.h" 13 #include "third_party/zlib/google/zip_internal.h" 14 15 #if defined(USE_SYSTEM_MINIZIP) 16 #include <minizip/unzip.h> 17 #else 18 #include "third_party/zlib/contrib/minizip/unzip.h" 19 #if defined(OS_WIN) 20 #include "third_party/zlib/contrib/minizip/iowin32.h" 21 #endif // defined(OS_WIN) 22 #endif // defined(USE_SYSTEM_MINIZIP) 23 24 namespace zip { 25 26 // TODO(satorux): The implementation assumes that file names in zip files 27 // are encoded in UTF-8. This is true for zip files created by Zip() 28 // function in zip.h, but not true for user-supplied random zip files. 29 ZipReader::EntryInfo::EntryInfo(const std::string& file_name_in_zip, 30 const unz_file_info& raw_file_info) 31 : file_path_(base::FilePath::FromUTF8Unsafe(file_name_in_zip)), 32 is_directory_(false) { 33 original_size_ = raw_file_info.uncompressed_size; 34 35 // Directory entries in zip files end with "/". 36 is_directory_ = EndsWith(file_name_in_zip, "/", false); 37 38 // Check the file name here for directory traversal issues. 39 is_unsafe_ = file_path_.ReferencesParent(); 40 41 // We also consider that the file name is unsafe, if it's invalid UTF-8. 42 base::string16 file_name_utf16; 43 if (!base::UTF8ToUTF16(file_name_in_zip.data(), file_name_in_zip.size(), 44 &file_name_utf16)) { 45 is_unsafe_ = true; 46 } 47 48 // We also consider that the file name is unsafe, if it's absolute. 49 // On Windows, IsAbsolute() returns false for paths starting with "/". 50 if (file_path_.IsAbsolute() || StartsWithASCII(file_name_in_zip, "/", false)) 51 is_unsafe_ = true; 52 53 // Construct the last modified time. The timezone info is not present in 54 // zip files, so we construct the time as local time. 55 base::Time::Exploded exploded_time = {}; // Zero-clear. 56 exploded_time.year = raw_file_info.tmu_date.tm_year; 57 // The month in zip file is 0-based, whereas ours is 1-based. 58 exploded_time.month = raw_file_info.tmu_date.tm_mon + 1; 59 exploded_time.day_of_month = raw_file_info.tmu_date.tm_mday; 60 exploded_time.hour = raw_file_info.tmu_date.tm_hour; 61 exploded_time.minute = raw_file_info.tmu_date.tm_min; 62 exploded_time.second = raw_file_info.tmu_date.tm_sec; 63 exploded_time.millisecond = 0; 64 if (exploded_time.HasValidValues()) { 65 last_modified_ = base::Time::FromLocalExploded(exploded_time); 66 } else { 67 // Use Unix time epoch if the time stamp data is invalid. 68 last_modified_ = base::Time::UnixEpoch(); 69 } 70 } 71 72 ZipReader::ZipReader() 73 : weak_ptr_factory_(this) { 74 Reset(); 75 } 76 77 ZipReader::~ZipReader() { 78 Close(); 79 } 80 81 bool ZipReader::Open(const base::FilePath& zip_file_path) { 82 DCHECK(!zip_file_); 83 84 // Use of "Unsafe" function does not look good, but there is no way to do 85 // this safely on Linux. See file_util.h for details. 86 zip_file_ = internal::OpenForUnzipping(zip_file_path.AsUTF8Unsafe()); 87 if (!zip_file_) { 88 return false; 89 } 90 91 return OpenInternal(); 92 } 93 94 bool ZipReader::OpenFromPlatformFile(base::PlatformFile zip_fd) { 95 DCHECK(!zip_file_); 96 97 #if defined(OS_POSIX) 98 zip_file_ = internal::OpenFdForUnzipping(zip_fd); 99 #elif defined(OS_WIN) 100 zip_file_ = internal::OpenHandleForUnzipping(zip_fd); 101 #endif 102 if (!zip_file_) { 103 return false; 104 } 105 106 return OpenInternal(); 107 } 108 109 bool ZipReader::OpenFromString(const std::string& data) { 110 zip_file_ = internal::PrepareMemoryForUnzipping(data); 111 if (!zip_file_) 112 return false; 113 return OpenInternal(); 114 } 115 116 void ZipReader::Close() { 117 if (zip_file_) { 118 unzClose(zip_file_); 119 } 120 Reset(); 121 } 122 123 bool ZipReader::HasMore() { 124 return !reached_end_; 125 } 126 127 bool ZipReader::AdvanceToNextEntry() { 128 DCHECK(zip_file_); 129 130 // Should not go further if we already reached the end. 131 if (reached_end_) 132 return false; 133 134 unz_file_pos position = {}; 135 if (unzGetFilePos(zip_file_, &position) != UNZ_OK) 136 return false; 137 const int current_entry_index = position.num_of_file; 138 // If we are currently at the last entry, then the next position is the 139 // end of the zip file, so mark that we reached the end. 140 if (current_entry_index + 1 == num_entries_) { 141 reached_end_ = true; 142 } else { 143 DCHECK_LT(current_entry_index + 1, num_entries_); 144 if (unzGoToNextFile(zip_file_) != UNZ_OK) { 145 return false; 146 } 147 } 148 current_entry_info_.reset(); 149 return true; 150 } 151 152 bool ZipReader::OpenCurrentEntryInZip() { 153 DCHECK(zip_file_); 154 155 unz_file_info raw_file_info = {}; 156 char raw_file_name_in_zip[internal::kZipMaxPath] = {}; 157 const int result = unzGetCurrentFileInfo(zip_file_, 158 &raw_file_info, 159 raw_file_name_in_zip, 160 sizeof(raw_file_name_in_zip) - 1, 161 NULL, // extraField. 162 0, // extraFieldBufferSize. 163 NULL, // szComment. 164 0); // commentBufferSize. 165 if (result != UNZ_OK) 166 return false; 167 if (raw_file_name_in_zip[0] == '\0') 168 return false; 169 current_entry_info_.reset( 170 new EntryInfo(raw_file_name_in_zip, raw_file_info)); 171 return true; 172 } 173 174 bool ZipReader::LocateAndOpenEntry(const base::FilePath& path_in_zip) { 175 DCHECK(zip_file_); 176 177 current_entry_info_.reset(); 178 reached_end_ = false; 179 const int kDefaultCaseSensivityOfOS = 0; 180 const int result = unzLocateFile(zip_file_, 181 path_in_zip.AsUTF8Unsafe().c_str(), 182 kDefaultCaseSensivityOfOS); 183 if (result != UNZ_OK) 184 return false; 185 186 // Then Open the entry. 187 return OpenCurrentEntryInZip(); 188 } 189 190 bool ZipReader::ExtractCurrentEntryToFilePath( 191 const base::FilePath& output_file_path) { 192 DCHECK(zip_file_); 193 194 // If this is a directory, just create it and return. 195 if (current_entry_info()->is_directory()) 196 return base::CreateDirectory(output_file_path); 197 198 const int open_result = unzOpenCurrentFile(zip_file_); 199 if (open_result != UNZ_OK) 200 return false; 201 202 // We can't rely on parent directory entries being specified in the 203 // zip, so we make sure they are created. 204 base::FilePath output_dir_path = output_file_path.DirName(); 205 if (!base::CreateDirectory(output_dir_path)) 206 return false; 207 208 base::File file(output_file_path, 209 base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE); 210 if (!file.IsValid()) 211 return false; 212 213 bool success = true; // This becomes false when something bad happens. 214 while (true) { 215 char buf[internal::kZipBufSize]; 216 const int num_bytes_read = unzReadCurrentFile(zip_file_, buf, 217 internal::kZipBufSize); 218 if (num_bytes_read == 0) { 219 // Reached the end of the file. 220 break; 221 } else if (num_bytes_read < 0) { 222 // If num_bytes_read < 0, then it's a specific UNZ_* error code. 223 success = false; 224 break; 225 } else if (num_bytes_read > 0) { 226 // Some data is read. Write it to the output file. 227 if (num_bytes_read != file.WriteAtCurrentPos(buf, num_bytes_read)) { 228 success = false; 229 break; 230 } 231 } 232 } 233 234 file.Close(); 235 unzCloseCurrentFile(zip_file_); 236 237 if (current_entry_info()->last_modified() != base::Time::UnixEpoch()) 238 base::TouchFile(output_file_path, 239 base::Time::Now(), 240 current_entry_info()->last_modified()); 241 242 return success; 243 } 244 245 void ZipReader::ExtractCurrentEntryToFilePathAsync( 246 const base::FilePath& output_file_path, 247 const SuccessCallback& success_callback, 248 const FailureCallback& failure_callback, 249 const ProgressCallback& progress_callback) { 250 DCHECK(zip_file_); 251 DCHECK(current_entry_info_.get()); 252 253 // If this is a directory, just create it and return. 254 if (current_entry_info()->is_directory()) { 255 if (base::CreateDirectory(output_file_path)) { 256 base::MessageLoopProxy::current()->PostTask(FROM_HERE, success_callback); 257 } else { 258 DVLOG(1) << "Unzip failed: unable to create directory."; 259 base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback); 260 } 261 return; 262 } 263 264 if (unzOpenCurrentFile(zip_file_) != UNZ_OK) { 265 DVLOG(1) << "Unzip failed: unable to open current zip entry."; 266 base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback); 267 return; 268 } 269 270 base::FilePath output_dir_path = output_file_path.DirName(); 271 if (!base::CreateDirectory(output_dir_path)) { 272 DVLOG(1) << "Unzip failed: unable to create containing directory."; 273 base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback); 274 return; 275 } 276 277 const int flags = base::File::FLAG_CREATE_ALWAYS | base::File::FLAG_WRITE; 278 base::File output_file(output_file_path, flags); 279 280 if (!output_file.IsValid()) { 281 DVLOG(1) << "Unzip failed: unable to create platform file at " 282 << output_file_path.value(); 283 base::MessageLoopProxy::current()->PostTask(FROM_HERE, failure_callback); 284 return; 285 } 286 287 base::MessageLoop::current()->PostTask( 288 FROM_HERE, 289 base::Bind(&ZipReader::ExtractChunk, 290 weak_ptr_factory_.GetWeakPtr(), 291 Passed(output_file.Pass()), 292 success_callback, 293 failure_callback, 294 progress_callback, 295 0 /* initial offset */)); 296 } 297 298 bool ZipReader::ExtractCurrentEntryIntoDirectory( 299 const base::FilePath& output_directory_path) { 300 DCHECK(current_entry_info_.get()); 301 302 base::FilePath output_file_path = output_directory_path.Append( 303 current_entry_info()->file_path()); 304 return ExtractCurrentEntryToFilePath(output_file_path); 305 } 306 307 #if defined(OS_POSIX) 308 bool ZipReader::ExtractCurrentEntryToFd(const int fd) { 309 DCHECK(zip_file_); 310 311 // If this is a directory, there's nothing to extract to the file descriptor, 312 // so return false. 313 if (current_entry_info()->is_directory()) 314 return false; 315 316 const int open_result = unzOpenCurrentFile(zip_file_); 317 if (open_result != UNZ_OK) 318 return false; 319 320 bool success = true; // This becomes false when something bad happens. 321 while (true) { 322 char buf[internal::kZipBufSize]; 323 const int num_bytes_read = unzReadCurrentFile(zip_file_, buf, 324 internal::kZipBufSize); 325 if (num_bytes_read == 0) { 326 // Reached the end of the file. 327 break; 328 } else if (num_bytes_read < 0) { 329 // If num_bytes_read < 0, then it's a specific UNZ_* error code. 330 success = false; 331 break; 332 } else if (num_bytes_read > 0) { 333 // Some data is read. Write it to the output file descriptor. 334 if (num_bytes_read != 335 base::WriteFileDescriptor(fd, buf, num_bytes_read)) { 336 success = false; 337 break; 338 } 339 } 340 } 341 342 unzCloseCurrentFile(zip_file_); 343 return success; 344 } 345 #endif // defined(OS_POSIX) 346 347 bool ZipReader::ExtractCurrentEntryToString( 348 size_t max_read_bytes, 349 std::string* output) const { 350 DCHECK(output); 351 DCHECK(zip_file_); 352 DCHECK(max_read_bytes != 0); 353 354 if (current_entry_info()->is_directory()) { 355 output->clear(); 356 return true; 357 } 358 359 const int open_result = unzOpenCurrentFile(zip_file_); 360 if (open_result != UNZ_OK) 361 return false; 362 363 // The original_size() is the best hint for the real size, so it saves 364 // doing reallocations for the common case when the uncompressed size is 365 // correct. However, we need to assume that the uncompressed size could be 366 // incorrect therefore this function needs to read as much data as possible. 367 std::string contents; 368 contents.reserve(std::min<size_t>( 369 max_read_bytes, current_entry_info()->original_size())); 370 371 bool success = true; // This becomes false when something bad happens. 372 char buf[internal::kZipBufSize]; 373 while (true) { 374 const int num_bytes_read = unzReadCurrentFile(zip_file_, buf, 375 internal::kZipBufSize); 376 if (num_bytes_read == 0) { 377 // Reached the end of the file. 378 break; 379 } else if (num_bytes_read < 0) { 380 // If num_bytes_read < 0, then it's a specific UNZ_* error code. 381 success = false; 382 break; 383 } else if (num_bytes_read > 0) { 384 if (contents.size() + num_bytes_read > max_read_bytes) { 385 success = false; 386 break; 387 } 388 contents.append(buf, num_bytes_read); 389 } 390 } 391 392 unzCloseCurrentFile(zip_file_); 393 if (success) 394 output->swap(contents); 395 396 return success; 397 } 398 399 bool ZipReader::OpenInternal() { 400 DCHECK(zip_file_); 401 402 unz_global_info zip_info = {}; // Zero-clear. 403 if (unzGetGlobalInfo(zip_file_, &zip_info) != UNZ_OK) { 404 return false; 405 } 406 num_entries_ = zip_info.number_entry; 407 if (num_entries_ < 0) 408 return false; 409 410 // We are already at the end if the zip file is empty. 411 reached_end_ = (num_entries_ == 0); 412 return true; 413 } 414 415 void ZipReader::Reset() { 416 zip_file_ = NULL; 417 num_entries_ = 0; 418 reached_end_ = false; 419 current_entry_info_.reset(); 420 } 421 422 void ZipReader::ExtractChunk(base::File output_file, 423 const SuccessCallback& success_callback, 424 const FailureCallback& failure_callback, 425 const ProgressCallback& progress_callback, 426 const int64 offset) { 427 char buffer[internal::kZipBufSize]; 428 429 const int num_bytes_read = unzReadCurrentFile(zip_file_, 430 buffer, 431 internal::kZipBufSize); 432 433 if (num_bytes_read == 0) { 434 unzCloseCurrentFile(zip_file_); 435 success_callback.Run(); 436 } else if (num_bytes_read < 0) { 437 DVLOG(1) << "Unzip failed: error while reading zipfile " 438 << "(" << num_bytes_read << ")"; 439 failure_callback.Run(); 440 } else { 441 if (num_bytes_read != output_file.Write(offset, buffer, num_bytes_read)) { 442 DVLOG(1) << "Unzip failed: unable to write all bytes to target."; 443 failure_callback.Run(); 444 return; 445 } 446 447 int64 current_progress = offset + num_bytes_read; 448 449 progress_callback.Run(current_progress); 450 451 base::MessageLoop::current()->PostTask( 452 FROM_HERE, 453 base::Bind(&ZipReader::ExtractChunk, 454 weak_ptr_factory_.GetWeakPtr(), 455 Passed(output_file.Pass()), 456 success_callback, 457 failure_callback, 458 progress_callback, 459 current_progress)); 460 461 } 462 } 463 464 465 } // namespace zip 466