1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/browser/download/save_package.h" 6 7 #include <algorithm> 8 9 #include "base/bind.h" 10 #include "base/file_util.h" 11 #include "base/files/file_path.h" 12 #include "base/i18n/file_util_icu.h" 13 #include "base/logging.h" 14 #include "base/message_loop/message_loop.h" 15 #include "base/stl_util.h" 16 #include "base/strings/string_piece.h" 17 #include "base/strings/string_split.h" 18 #include "base/strings/sys_string_conversions.h" 19 #include "base/strings/utf_string_conversions.h" 20 #include "base/threading/thread.h" 21 #include "content/browser/download/download_item_impl.h" 22 #include "content/browser/download/download_manager_impl.h" 23 #include "content/browser/download/download_stats.h" 24 #include "content/browser/download/save_file.h" 25 #include "content/browser/download/save_file_manager.h" 26 #include "content/browser/download/save_item.h" 27 #include "content/browser/loader/resource_dispatcher_host_impl.h" 28 #include "content/browser/renderer_host/render_process_host_impl.h" 29 #include "content/browser/renderer_host/render_view_host_delegate.h" 30 #include "content/browser/renderer_host/render_view_host_impl.h" 31 #include "content/common/view_messages.h" 32 #include "content/public/browser/browser_context.h" 33 #include "content/public/browser/browser_thread.h" 34 #include "content/public/browser/content_browser_client.h" 35 #include "content/public/browser/download_manager_delegate.h" 36 #include "content/public/browser/navigation_entry.h" 37 #include "content/public/browser/notification_service.h" 38 #include "content/public/browser/notification_types.h" 39 #include "content/public/browser/resource_context.h" 40 #include "content/public/browser/web_contents.h" 41 #include "content/public/common/url_constants.h" 42 #include "net/base/io_buffer.h" 43 #include "net/base/mime_util.h" 44 #include "net/base/net_util.h" 45 #include "net/url_request/url_request_context.h" 46 #include "third_party/WebKit/public/web/WebPageSerializerClient.h" 47 48 using base::Time; 49 using WebKit::WebPageSerializerClient; 50 51 namespace content { 52 namespace { 53 54 // A counter for uniquely identifying each save package. 55 int g_save_package_id = 0; 56 57 // Default name which will be used when we can not get proper name from 58 // resource URL. 59 const char kDefaultSaveName[] = "saved_resource"; 60 61 // Maximum number of file ordinal number. I think it's big enough for resolving 62 // name-conflict files which has same base file name. 63 const int32 kMaxFileOrdinalNumber = 9999; 64 65 // Maximum length for file path. Since Windows have MAX_PATH limitation for 66 // file path, we need to make sure length of file path of every saved file 67 // is less than MAX_PATH 68 #if defined(OS_WIN) 69 const uint32 kMaxFilePathLength = MAX_PATH - 1; 70 #elif defined(OS_POSIX) 71 const uint32 kMaxFilePathLength = PATH_MAX - 1; 72 #endif 73 74 // Maximum length for file ordinal number part. Since we only support the 75 // maximum 9999 for ordinal number, which means maximum file ordinal number part 76 // should be "(9998)", so the value is 6. 77 const uint32 kMaxFileOrdinalNumberPartLength = 6; 78 79 // Strip current ordinal number, if any. Should only be used on pure 80 // file names, i.e. those stripped of their extensions. 81 // TODO(estade): improve this to not choke on alternate encodings. 82 base::FilePath::StringType StripOrdinalNumber( 83 const base::FilePath::StringType& pure_file_name) { 84 base::FilePath::StringType::size_type r_paren_index = 85 pure_file_name.rfind(FILE_PATH_LITERAL(')')); 86 base::FilePath::StringType::size_type l_paren_index = 87 pure_file_name.rfind(FILE_PATH_LITERAL('(')); 88 if (l_paren_index >= r_paren_index) 89 return pure_file_name; 90 91 for (base::FilePath::StringType::size_type i = l_paren_index + 1; 92 i != r_paren_index; ++i) { 93 if (!IsAsciiDigit(pure_file_name[i])) 94 return pure_file_name; 95 } 96 97 return pure_file_name.substr(0, l_paren_index); 98 } 99 100 // Check whether we can save page as complete-HTML for the contents which 101 // have specified a MIME type. Now only contents which have the MIME type 102 // "text/html" can be saved as complete-HTML. 103 bool CanSaveAsComplete(const std::string& contents_mime_type) { 104 return contents_mime_type == "text/html" || 105 contents_mime_type == "application/xhtml+xml"; 106 } 107 108 // Request handle for SavePackage downloads. Currently doesn't support 109 // pause/resume/cancel, but returns a WebContents. 110 class SavePackageRequestHandle : public DownloadRequestHandleInterface { 111 public: 112 SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package) 113 : save_package_(save_package) {} 114 115 // DownloadRequestHandleInterface 116 virtual WebContents* GetWebContents() const OVERRIDE { 117 return save_package_.get() ? save_package_->web_contents() : NULL; 118 } 119 virtual DownloadManager* GetDownloadManager() const OVERRIDE { 120 return NULL; 121 } 122 virtual void PauseRequest() const OVERRIDE {} 123 virtual void ResumeRequest() const OVERRIDE {} 124 virtual void CancelRequest() const OVERRIDE {} 125 virtual std::string DebugString() const OVERRIDE { 126 return "SavePackage DownloadRequestHandle"; 127 } 128 129 private: 130 base::WeakPtr<SavePackage> save_package_; 131 }; 132 133 } // namespace 134 135 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] = 136 #if defined(OS_WIN) 137 FILE_PATH_LITERAL("htm"); 138 #else 139 FILE_PATH_LITERAL("html"); 140 #endif 141 142 SavePackage::SavePackage(WebContents* web_contents, 143 SavePageType save_type, 144 const base::FilePath& file_full_path, 145 const base::FilePath& directory_full_path) 146 : WebContentsObserver(web_contents), 147 file_manager_(NULL), 148 download_manager_(NULL), 149 download_(NULL), 150 page_url_(GetUrlToBeSaved()), 151 saved_main_file_path_(file_full_path), 152 saved_main_directory_path_(directory_full_path), 153 title_(web_contents->GetTitle()), 154 start_tick_(base::TimeTicks::Now()), 155 finished_(false), 156 mhtml_finishing_(false), 157 user_canceled_(false), 158 disk_error_occurred_(false), 159 save_type_(save_type), 160 all_save_items_count_(0), 161 file_name_set_(&base::FilePath::CompareLessIgnoreCase), 162 wait_state_(INITIALIZE), 163 contents_id_(web_contents->GetRenderProcessHost()->GetID()), 164 unique_id_(g_save_package_id++), 165 wrote_to_completed_file_(false), 166 wrote_to_failed_file_(false) { 167 DCHECK(page_url_.is_valid()); 168 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 169 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) || 170 (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)); 171 DCHECK(!saved_main_file_path_.empty() && 172 saved_main_file_path_.value().length() <= kMaxFilePathLength); 173 DCHECK(!saved_main_directory_path_.empty() && 174 saved_main_directory_path_.value().length() < kMaxFilePathLength); 175 InternalInit(); 176 } 177 178 SavePackage::SavePackage(WebContents* web_contents) 179 : WebContentsObserver(web_contents), 180 file_manager_(NULL), 181 download_manager_(NULL), 182 download_(NULL), 183 page_url_(GetUrlToBeSaved()), 184 title_(web_contents->GetTitle()), 185 start_tick_(base::TimeTicks::Now()), 186 finished_(false), 187 mhtml_finishing_(false), 188 user_canceled_(false), 189 disk_error_occurred_(false), 190 save_type_(SAVE_PAGE_TYPE_UNKNOWN), 191 all_save_items_count_(0), 192 file_name_set_(&base::FilePath::CompareLessIgnoreCase), 193 wait_state_(INITIALIZE), 194 contents_id_(web_contents->GetRenderProcessHost()->GetID()), 195 unique_id_(g_save_package_id++), 196 wrote_to_completed_file_(false), 197 wrote_to_failed_file_(false) { 198 DCHECK(page_url_.is_valid()); 199 InternalInit(); 200 } 201 202 // This is for testing use. Set |finished_| as true because we don't want 203 // method Cancel to be be called in destructor in test mode. 204 // We also don't call InternalInit(). 205 SavePackage::SavePackage(WebContents* web_contents, 206 const base::FilePath& file_full_path, 207 const base::FilePath& directory_full_path) 208 : WebContentsObserver(web_contents), 209 file_manager_(NULL), 210 download_manager_(NULL), 211 download_(NULL), 212 saved_main_file_path_(file_full_path), 213 saved_main_directory_path_(directory_full_path), 214 start_tick_(base::TimeTicks::Now()), 215 finished_(true), 216 mhtml_finishing_(false), 217 user_canceled_(false), 218 disk_error_occurred_(false), 219 save_type_(SAVE_PAGE_TYPE_UNKNOWN), 220 all_save_items_count_(0), 221 file_name_set_(&base::FilePath::CompareLessIgnoreCase), 222 wait_state_(INITIALIZE), 223 contents_id_(0), 224 unique_id_(g_save_package_id++), 225 wrote_to_completed_file_(false), 226 wrote_to_failed_file_(false) { 227 } 228 229 SavePackage::~SavePackage() { 230 // Stop receiving saving job's updates 231 if (!finished_ && !canceled()) { 232 // Unexpected quit. 233 Cancel(true); 234 } 235 236 // We should no longer be observing the DownloadItem at this point. 237 CHECK(!download_); 238 239 DCHECK(all_save_items_count_ == (waiting_item_queue_.size() + 240 completed_count() + 241 in_process_count())); 242 // Free all SaveItems. 243 while (!waiting_item_queue_.empty()) { 244 // We still have some items which are waiting for start to save. 245 SaveItem* save_item = waiting_item_queue_.front(); 246 waiting_item_queue_.pop(); 247 delete save_item; 248 } 249 250 STLDeleteValues(&saved_success_items_); 251 STLDeleteValues(&in_progress_items_); 252 STLDeleteValues(&saved_failed_items_); 253 254 file_manager_ = NULL; 255 } 256 257 GURL SavePackage::GetUrlToBeSaved() { 258 // Instead of using web_contents_.GetURL here, we use url() (which is the 259 // "real" url of the page) from the NavigationEntry because it reflects its 260 // origin rather than the displayed one (returned by GetURL) which may be 261 // different (like having "view-source:" on the front). 262 NavigationEntry* active_entry = 263 web_contents()->GetController().GetActiveEntry(); 264 return active_entry->GetURL(); 265 } 266 267 void SavePackage::Cancel(bool user_action) { 268 if (!canceled()) { 269 if (user_action) 270 user_canceled_ = true; 271 else 272 disk_error_occurred_ = true; 273 Stop(); 274 } 275 RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED); 276 } 277 278 // Init() can be called directly, or indirectly via GetSaveInfo(). In both 279 // cases, we need file_manager_ to be initialized, so we do this first. 280 void SavePackage::InternalInit() { 281 ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get(); 282 if (!rdh) { 283 NOTREACHED(); 284 return; 285 } 286 287 file_manager_ = rdh->save_file_manager(); 288 DCHECK(file_manager_); 289 290 download_manager_ = static_cast<DownloadManagerImpl*>( 291 BrowserContext::GetDownloadManager( 292 web_contents()->GetBrowserContext())); 293 DCHECK(download_manager_); 294 295 RecordSavePackageEvent(SAVE_PACKAGE_STARTED); 296 } 297 298 bool SavePackage::Init( 299 const SavePackageDownloadCreatedCallback& download_created_callback) { 300 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 301 // Set proper running state. 302 if (wait_state_ != INITIALIZE) 303 return false; 304 305 wait_state_ = START_PROCESS; 306 307 // Initialize the request context and resource dispatcher. 308 BrowserContext* browser_context = web_contents()->GetBrowserContext(); 309 if (!browser_context) { 310 NOTREACHED(); 311 return false; 312 } 313 314 scoped_ptr<DownloadRequestHandleInterface> request_handle( 315 new SavePackageRequestHandle(AsWeakPtr())); 316 // The download manager keeps ownership but adds us as an observer. 317 download_manager_->CreateSavePackageDownloadItem( 318 saved_main_file_path_, 319 page_url_, 320 ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ? 321 "multipart/related" : "text/html"), 322 request_handle.Pass(), 323 base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(), 324 download_created_callback)); 325 return true; 326 } 327 328 void SavePackage::InitWithDownloadItem( 329 const SavePackageDownloadCreatedCallback& download_created_callback, 330 DownloadItemImpl* item) { 331 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 332 DCHECK(item); 333 download_ = item; 334 download_->AddObserver(this); 335 // Confirm above didn't delete the tab out from under us. 336 if (!download_created_callback.is_null()) 337 download_created_callback.Run(download_); 338 339 // Check save type and process the save page job. 340 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) { 341 // Get directory 342 DCHECK(!saved_main_directory_path_.empty()); 343 GetAllSavableResourceLinksForCurrentPage(); 344 } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) { 345 web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind( 346 &SavePackage::OnMHTMLGenerated, this)); 347 } else { 348 DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_; 349 wait_state_ = NET_FILES; 350 SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ? 351 SaveFileCreateInfo::SAVE_FILE_FROM_FILE : 352 SaveFileCreateInfo::SAVE_FILE_FROM_NET; 353 SaveItem* save_item = new SaveItem(page_url_, 354 Referrer(), 355 this, 356 save_source); 357 // Add this item to waiting list. 358 waiting_item_queue_.push(save_item); 359 all_save_items_count_ = 1; 360 download_->SetTotalBytes(1); 361 362 DoSavingProcess(); 363 } 364 } 365 366 void SavePackage::OnMHTMLGenerated(const base::FilePath& path, int64 size) { 367 if (size <= 0) { 368 Cancel(false); 369 return; 370 } 371 wrote_to_completed_file_ = true; 372 373 // Hack to avoid touching download_ after user cancel. 374 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 375 // with SavePackage flow. 376 if (download_->GetState() == DownloadItem::IN_PROGRESS) { 377 download_->SetTotalBytes(size); 378 download_->DestinationUpdate(size, 0, std::string()); 379 // Must call OnAllDataSaved here in order for 380 // GDataDownloadObserver::ShouldUpload() to return true. 381 // ShouldCompleteDownload() may depend on the gdata uploader to finish. 382 download_->OnAllDataSaved(DownloadItem::kEmptyFileHash); 383 } 384 385 if (!download_manager_->GetDelegate()) { 386 Finish(); 387 return; 388 } 389 390 if (download_manager_->GetDelegate()->ShouldCompleteDownload( 391 download_, base::Bind(&SavePackage::Finish, this))) { 392 Finish(); 393 } 394 } 395 396 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further 397 // restricted by NAME_MAX. The maximum allowed path looks like: 398 // '/path/to/save_dir' + '/' + NAME_MAX. 399 uint32 SavePackage::GetMaxPathLengthForDirectory( 400 const base::FilePath& base_dir) { 401 #if defined(OS_POSIX) 402 return std::min(kMaxFilePathLength, 403 static_cast<uint32>(base_dir.value().length()) + 404 NAME_MAX + 1); 405 #else 406 return kMaxFilePathLength; 407 #endif 408 } 409 410 // File name is considered being consist of pure file name, dot and file 411 // extension name. File name might has no dot and file extension, or has 412 // multiple dot inside file name. The dot, which separates the pure file 413 // name and file extension name, is last dot in the whole file name. 414 // This function is for making sure the length of specified file path is not 415 // great than the specified maximum length of file path and getting safe pure 416 // file name part if the input pure file name is too long. 417 // The parameter |dir_path| specifies directory part of the specified 418 // file path. The parameter |file_name_ext| specifies file extension 419 // name part of the specified file path (including start dot). The parameter 420 // |max_file_path_len| specifies maximum length of the specified file path. 421 // The parameter |pure_file_name| input pure file name part of the specified 422 // file path. If the length of specified file path is great than 423 // |max_file_path_len|, the |pure_file_name| will output new pure file name 424 // part for making sure the length of specified file path is less than 425 // specified maximum length of file path. Return false if the function can 426 // not get a safe pure file name, otherwise it returns true. 427 bool SavePackage::GetSafePureFileName( 428 const base::FilePath& dir_path, 429 const base::FilePath::StringType& file_name_ext, 430 uint32 max_file_path_len, 431 base::FilePath::StringType* pure_file_name) { 432 DCHECK(!pure_file_name->empty()); 433 int available_length = static_cast<int>(max_file_path_len - 434 dir_path.value().length() - 435 file_name_ext.length()); 436 // Need an extra space for the separator. 437 if (!dir_path.EndsWithSeparator()) 438 --available_length; 439 440 // Plenty of room. 441 if (static_cast<int>(pure_file_name->length()) <= available_length) 442 return true; 443 444 // Limited room. Truncate |pure_file_name| to fit. 445 if (available_length > 0) { 446 *pure_file_name = pure_file_name->substr(0, available_length); 447 return true; 448 } 449 450 // Not enough room to even use a shortened |pure_file_name|. 451 pure_file_name->clear(); 452 return false; 453 } 454 455 // Generate name for saving resource. 456 bool SavePackage::GenerateFileName(const std::string& disposition, 457 const GURL& url, 458 bool need_html_ext, 459 base::FilePath::StringType* generated_name) { 460 // TODO(jungshik): Figure out the referrer charset when having one 461 // makes sense and pass it to GenerateFileName. 462 base::FilePath file_path = net::GenerateFileName(url, 463 disposition, 464 std::string(), 465 std::string(), 466 std::string(), 467 kDefaultSaveName); 468 469 DCHECK(!file_path.empty()); 470 base::FilePath::StringType pure_file_name = 471 file_path.RemoveExtension().BaseName().value(); 472 base::FilePath::StringType file_name_ext = file_path.Extension(); 473 474 // If it is HTML resource, use ".htm{l,}" as its extension. 475 if (need_html_ext) { 476 file_name_ext = FILE_PATH_LITERAL("."); 477 file_name_ext.append(kDefaultHtmlExtension); 478 } 479 480 // Need to make sure the suggested file name is not too long. 481 uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_); 482 483 // Get safe pure file name. 484 if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext, 485 max_path, &pure_file_name)) 486 return false; 487 488 base::FilePath::StringType file_name = pure_file_name + file_name_ext; 489 490 // Check whether we already have same name in a case insensitive manner. 491 FileNameSet::const_iterator iter = file_name_set_.find(file_name); 492 if (iter == file_name_set_.end()) { 493 file_name_set_.insert(file_name); 494 } else { 495 // Found same name, increase the ordinal number for the file name. 496 pure_file_name = 497 base::FilePath(*iter).RemoveExtension().BaseName().value(); 498 base::FilePath::StringType base_file_name = 499 StripOrdinalNumber(pure_file_name); 500 501 // We need to make sure the length of base file name plus maximum ordinal 502 // number path will be less than or equal to kMaxFilePathLength. 503 if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext, 504 max_path - kMaxFileOrdinalNumberPartLength, &base_file_name)) 505 return false; 506 507 // Prepare the new ordinal number. 508 uint32 ordinal_number; 509 FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name); 510 if (it == file_name_count_map_.end()) { 511 // First base-name-conflict resolving, use 1 as initial ordinal number. 512 file_name_count_map_[base_file_name] = 1; 513 ordinal_number = 1; 514 } else { 515 // We have met same base-name conflict, use latest ordinal number. 516 ordinal_number = it->second; 517 } 518 519 if (ordinal_number > (kMaxFileOrdinalNumber - 1)) { 520 // Use a random file from temporary file. 521 base::FilePath temp_file; 522 file_util::CreateTemporaryFile(&temp_file); 523 file_name = temp_file.RemoveExtension().BaseName().value(); 524 // Get safe pure file name. 525 if (!GetSafePureFileName(saved_main_directory_path_, 526 base::FilePath::StringType(), 527 max_path, &file_name)) 528 return false; 529 } else { 530 for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) { 531 base::FilePath::StringType new_name = base_file_name + 532 base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext; 533 if (file_name_set_.find(new_name) == file_name_set_.end()) { 534 // Resolved name conflict. 535 file_name = new_name; 536 file_name_count_map_[base_file_name] = ++i; 537 break; 538 } 539 } 540 } 541 542 file_name_set_.insert(file_name); 543 } 544 545 DCHECK(!file_name.empty()); 546 generated_name->assign(file_name); 547 548 return true; 549 } 550 551 // We have received a message from SaveFileManager about a new saving job. We 552 // create a SaveItem and store it in our in_progress list. 553 void SavePackage::StartSave(const SaveFileCreateInfo* info) { 554 DCHECK(info && !info->url.is_empty()); 555 556 SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec()); 557 if (it == in_progress_items_.end()) { 558 // If not found, we must have cancel action. 559 DCHECK(canceled()); 560 return; 561 } 562 SaveItem* save_item = it->second; 563 564 DCHECK(!saved_main_file_path_.empty()); 565 566 save_item->SetSaveId(info->save_id); 567 save_item->SetTotalBytes(info->total_bytes); 568 569 // Determine the proper path for a saving job, by choosing either the default 570 // save directory, or prompting the user. 571 DCHECK(!save_item->has_final_name()); 572 if (info->url != page_url_) { 573 base::FilePath::StringType generated_name; 574 // For HTML resource file, make sure it will have .htm as extension name, 575 // otherwise, when you open the saved page in Chrome again, download 576 // file manager will treat it as downloadable resource, and download it 577 // instead of opening it as HTML. 578 bool need_html_ext = 579 info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM; 580 if (!GenerateFileName(info->content_disposition, 581 GURL(info->url), 582 need_html_ext, 583 &generated_name)) { 584 // We can not generate file name for this SaveItem, so we cancel the 585 // saving page job if the save source is from serialized DOM data. 586 // Otherwise, it means this SaveItem is sub-resource type, we treat it 587 // as an error happened on saving. We can ignore this type error for 588 // sub-resource links which will be resolved as absolute links instead 589 // of local links in final saved contents. 590 if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM) 591 Cancel(true); 592 else 593 SaveFinished(save_item->save_id(), 0, false); 594 return; 595 } 596 597 // When saving page as only-HTML, we only have a SaveItem whose url 598 // must be page_url_. 599 DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML); 600 DCHECK(!saved_main_directory_path_.empty()); 601 602 // Now we get final name retrieved from GenerateFileName, we will use it 603 // rename the SaveItem. 604 base::FilePath final_name = 605 saved_main_directory_path_.Append(generated_name); 606 save_item->Rename(final_name); 607 } else { 608 // It is the main HTML file, use the name chosen by the user. 609 save_item->Rename(saved_main_file_path_); 610 } 611 612 // If the save source is from file system, inform SaveFileManager to copy 613 // corresponding file to the file path which this SaveItem specifies. 614 if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) { 615 BrowserThread::PostTask( 616 BrowserThread::FILE, FROM_HERE, 617 base::Bind(&SaveFileManager::SaveLocalFile, 618 file_manager_, 619 save_item->url(), 620 save_item->save_id(), 621 contents_id())); 622 return; 623 } 624 625 // Check whether we begin to require serialized HTML data. 626 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML && 627 wait_state_ == HTML_DATA) { 628 // Inform backend to serialize the all frames' DOM and send serialized 629 // HTML data back. 630 GetSerializedHtmlDataForCurrentPageWithLocalLinks(); 631 } 632 } 633 634 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) { 635 if (in_process_count()) { 636 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); 637 it != in_progress_items_.end(); ++it) { 638 SaveItem* save_item = it->second; 639 DCHECK(save_item->state() == SaveItem::IN_PROGRESS); 640 if (save_item->save_id() == save_id) 641 return save_item; 642 } 643 } 644 return NULL; 645 } 646 647 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) { 648 SaveUrlItemMap::iterator it = in_progress_items_.find( 649 save_item->url().spec()); 650 DCHECK(it != in_progress_items_.end()); 651 DCHECK(save_item == it->second); 652 in_progress_items_.erase(it); 653 654 if (save_item->success()) { 655 // Add it to saved_success_items_. 656 DCHECK(saved_success_items_.find(save_item->save_id()) == 657 saved_success_items_.end()); 658 saved_success_items_[save_item->save_id()] = save_item; 659 } else { 660 // Add it to saved_failed_items_. 661 DCHECK(saved_failed_items_.find(save_item->url().spec()) == 662 saved_failed_items_.end()); 663 saved_failed_items_[save_item->url().spec()] = save_item; 664 } 665 } 666 667 // Called for updating saving state. 668 bool SavePackage::UpdateSaveProgress(int32 save_id, 669 int64 size, 670 bool write_success) { 671 // Because we might have canceled this saving job before, 672 // so we might not find corresponding SaveItem. 673 SaveItem* save_item = LookupItemInProcessBySaveId(save_id); 674 if (!save_item) 675 return false; 676 677 save_item->Update(size); 678 679 // If we got disk error, cancel whole save page job. 680 if (!write_success) { 681 // Cancel job with reason of disk error. 682 Cancel(false); 683 } 684 return true; 685 } 686 687 // Stop all page saving jobs that are in progress and instruct the file thread 688 // to delete all saved files. 689 void SavePackage::Stop() { 690 // If we haven't moved out of the initial state, there's nothing to cancel and 691 // there won't be valid pointers for file_manager_ or download_. 692 if (wait_state_ == INITIALIZE) 693 return; 694 695 // When stopping, if it still has some items in in_progress, cancel them. 696 DCHECK(canceled()); 697 if (in_process_count()) { 698 SaveUrlItemMap::iterator it = in_progress_items_.begin(); 699 for (; it != in_progress_items_.end(); ++it) { 700 SaveItem* save_item = it->second; 701 DCHECK(save_item->state() == SaveItem::IN_PROGRESS); 702 save_item->Cancel(); 703 } 704 // Remove all in progress item to saved map. For failed items, they will 705 // be put into saved_failed_items_, for successful item, they will be put 706 // into saved_success_items_. 707 while (in_process_count()) 708 PutInProgressItemToSavedMap(in_progress_items_.begin()->second); 709 } 710 711 // This vector contains the save ids of the save files which SaveFileManager 712 // needs to remove from its save_file_map_. 713 SaveIDList save_ids; 714 for (SavedItemMap::iterator it = saved_success_items_.begin(); 715 it != saved_success_items_.end(); ++it) 716 save_ids.push_back(it->first); 717 for (SaveUrlItemMap::iterator it = saved_failed_items_.begin(); 718 it != saved_failed_items_.end(); ++it) 719 save_ids.push_back(it->second->save_id()); 720 721 BrowserThread::PostTask( 722 BrowserThread::FILE, FROM_HERE, 723 base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap, 724 file_manager_, 725 save_ids)); 726 727 finished_ = true; 728 wait_state_ = FAILED; 729 730 // Inform the DownloadItem we have canceled whole save page job. 731 if (download_) { 732 download_->Cancel(false); 733 FinalizeDownloadEntry(); 734 } 735 } 736 737 void SavePackage::CheckFinish() { 738 if (in_process_count() || finished_) 739 return; 740 741 base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML && 742 saved_success_items_.size() > 1) ? 743 saved_main_directory_path_ : base::FilePath(); 744 745 // This vector contains the final names of all the successfully saved files 746 // along with their save ids. It will be passed to SaveFileManager to do the 747 // renaming job. 748 FinalNameList final_names; 749 for (SavedItemMap::iterator it = saved_success_items_.begin(); 750 it != saved_success_items_.end(); ++it) 751 final_names.push_back(std::make_pair(it->first, 752 it->second->full_path())); 753 754 BrowserThread::PostTask( 755 BrowserThread::FILE, FROM_HERE, 756 base::Bind(&SaveFileManager::RenameAllFiles, 757 file_manager_, 758 final_names, 759 dir, 760 web_contents()->GetRenderProcessHost()->GetID(), 761 web_contents()->GetRenderViewHost()->GetRoutingID(), 762 id())); 763 } 764 765 // Successfully finished all items of this SavePackage. 766 void SavePackage::Finish() { 767 // User may cancel the job when we're moving files to the final directory. 768 if (canceled()) 769 return; 770 771 wait_state_ = SUCCESSFUL; 772 finished_ = true; 773 774 // Record finish. 775 RecordSavePackageEvent(SAVE_PACKAGE_FINISHED); 776 777 // Record any errors that occurred. 778 if (wrote_to_completed_file_) { 779 RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED); 780 } 781 782 if (wrote_to_failed_file_) { 783 RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED); 784 } 785 786 // This vector contains the save ids of the save files which SaveFileManager 787 // needs to remove from its save_file_map_. 788 SaveIDList save_ids; 789 for (SaveUrlItemMap::iterator it = saved_failed_items_.begin(); 790 it != saved_failed_items_.end(); ++it) 791 save_ids.push_back(it->second->save_id()); 792 793 BrowserThread::PostTask( 794 BrowserThread::FILE, FROM_HERE, 795 base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap, 796 file_manager_, 797 save_ids)); 798 799 if (download_) { 800 // Hack to avoid touching download_ after user cancel. 801 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 802 // with SavePackage flow. 803 if (download_->GetState() == DownloadItem::IN_PROGRESS) { 804 if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) { 805 download_->DestinationUpdate( 806 all_save_items_count_, CurrentSpeed(), std::string()); 807 download_->OnAllDataSaved(DownloadItem::kEmptyFileHash); 808 } 809 download_->MarkAsComplete(); 810 } 811 FinalizeDownloadEntry(); 812 } 813 } 814 815 // Called for updating end state. 816 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) { 817 // Because we might have canceled this saving job before, 818 // so we might not find corresponding SaveItem. Just ignore it. 819 SaveItem* save_item = LookupItemInProcessBySaveId(save_id); 820 if (!save_item) 821 return; 822 823 // Let SaveItem set end state. 824 save_item->Finish(size, is_success); 825 // Remove the associated save id and SavePackage. 826 file_manager_->RemoveSaveFile(save_id, save_item->url(), this); 827 828 PutInProgressItemToSavedMap(save_item); 829 830 // Inform the DownloadItem to update UI. 831 // We use the received bytes as number of saved files. 832 // Hack to avoid touching download_ after user cancel. 833 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 834 // with SavePackage flow. 835 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) { 836 download_->DestinationUpdate( 837 completed_count(), CurrentSpeed(), std::string()); 838 } 839 840 if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM && 841 save_item->url() == page_url_ && !save_item->received_bytes()) { 842 // If size of main HTML page is 0, treat it as disk error. 843 Cancel(false); 844 return; 845 } 846 847 if (canceled()) { 848 DCHECK(finished_); 849 return; 850 } 851 852 // Continue processing the save page job. 853 DoSavingProcess(); 854 855 // Check whether we can successfully finish whole job. 856 CheckFinish(); 857 } 858 859 // Sometimes, the net io will only call SaveFileManager::SaveFinished with 860 // save id -1 when it encounters error. Since in this case, save id will be 861 // -1, so we can only use URL to find which SaveItem is associated with 862 // this error. 863 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes 864 // from serializing HTML data, then cancel saving page. 865 void SavePackage::SaveFailed(const GURL& save_url) { 866 SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec()); 867 if (it == in_progress_items_.end()) { 868 NOTREACHED(); // Should not exist! 869 return; 870 } 871 SaveItem* save_item = it->second; 872 873 save_item->Finish(0, false); 874 875 PutInProgressItemToSavedMap(save_item); 876 877 // Inform the DownloadItem to update UI. 878 // We use the received bytes as number of saved files. 879 // Hack to avoid touching download_ after user cancel. 880 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 881 // with SavePackage flow. 882 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) { 883 download_->DestinationUpdate( 884 completed_count(), CurrentSpeed(), std::string()); 885 } 886 887 if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 888 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) || 889 (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) { 890 // We got error when saving page. Treat it as disk error. 891 Cancel(true); 892 } 893 894 if (canceled()) { 895 DCHECK(finished_); 896 return; 897 } 898 899 // Continue processing the save page job. 900 DoSavingProcess(); 901 902 CheckFinish(); 903 } 904 905 void SavePackage::SaveCanceled(SaveItem* save_item) { 906 // Call the RemoveSaveFile in UI thread. 907 file_manager_->RemoveSaveFile(save_item->save_id(), 908 save_item->url(), 909 this); 910 if (save_item->save_id() != -1) 911 BrowserThread::PostTask( 912 BrowserThread::FILE, FROM_HERE, 913 base::Bind(&SaveFileManager::CancelSave, 914 file_manager_, 915 save_item->save_id())); 916 } 917 918 // Initiate a saving job of a specific URL. We send the request to 919 // SaveFileManager, which will dispatch it to different approach according to 920 // the save source. Parameter process_all_remaining_items indicates whether 921 // we need to save all remaining items. 922 void SavePackage::SaveNextFile(bool process_all_remaining_items) { 923 DCHECK(web_contents()); 924 DCHECK(waiting_item_queue_.size()); 925 926 do { 927 // Pop SaveItem from waiting list. 928 SaveItem* save_item = waiting_item_queue_.front(); 929 waiting_item_queue_.pop(); 930 931 // Add the item to in_progress_items_. 932 SaveUrlItemMap::iterator it = in_progress_items_.find( 933 save_item->url().spec()); 934 DCHECK(it == in_progress_items_.end()); 935 in_progress_items_[save_item->url().spec()] = save_item; 936 save_item->Start(); 937 file_manager_->SaveURL(save_item->url(), 938 save_item->referrer(), 939 web_contents()->GetRenderProcessHost()->GetID(), 940 routing_id(), 941 save_item->save_source(), 942 save_item->full_path(), 943 web_contents()-> 944 GetBrowserContext()->GetResourceContext(), 945 this); 946 } while (process_all_remaining_items && waiting_item_queue_.size()); 947 } 948 949 // Calculate the percentage of whole save page job. 950 int SavePackage::PercentComplete() { 951 if (!all_save_items_count_) 952 return 0; 953 else if (!in_process_count()) 954 return 100; 955 else 956 return completed_count() / all_save_items_count_; 957 } 958 959 int64 SavePackage::CurrentSpeed() const { 960 base::TimeDelta diff = base::TimeTicks::Now() - start_tick_; 961 int64 diff_ms = diff.InMilliseconds(); 962 return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms; 963 } 964 965 // Continue processing the save page job after one SaveItem has been 966 // finished. 967 void SavePackage::DoSavingProcess() { 968 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) { 969 // We guarantee that images and JavaScripts must be downloaded first. 970 // So when finishing all those sub-resources, we will know which 971 // sub-resource's link can be replaced with local file path, which 972 // sub-resource's link need to be replaced with absolute URL which 973 // point to its internet address because it got error when saving its data. 974 SaveItem* save_item = NULL; 975 // Start a new SaveItem job if we still have job in waiting queue. 976 if (waiting_item_queue_.size()) { 977 DCHECK(wait_state_ == NET_FILES); 978 save_item = waiting_item_queue_.front(); 979 if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) { 980 SaveNextFile(false); 981 } else if (!in_process_count()) { 982 // If there is no in-process SaveItem, it means all sub-resources 983 // have been processed. Now we need to start serializing HTML DOM 984 // for the current page to get the generated HTML data. 985 wait_state_ = HTML_DATA; 986 // All non-HTML resources have been finished, start all remaining 987 // HTML files. 988 SaveNextFile(true); 989 } 990 } else if (in_process_count()) { 991 // Continue asking for HTML data. 992 DCHECK(wait_state_ == HTML_DATA); 993 } 994 } else { 995 // Save as HTML only or MHTML. 996 DCHECK(wait_state_ == NET_FILES); 997 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 998 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML)); 999 if (waiting_item_queue_.size()) { 1000 DCHECK(all_save_items_count_ == waiting_item_queue_.size()); 1001 SaveNextFile(false); 1002 } 1003 } 1004 } 1005 1006 bool SavePackage::OnMessageReceived(const IPC::Message& message) { 1007 bool handled = true; 1008 IPC_BEGIN_MESSAGE_MAP(SavePackage, message) 1009 IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks, 1010 OnReceivedSavableResourceLinksForCurrentPage) 1011 IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData, 1012 OnReceivedSerializedHtmlData) 1013 IPC_MESSAGE_UNHANDLED(handled = false) 1014 IPC_END_MESSAGE_MAP() 1015 return handled; 1016 } 1017 1018 // After finishing all SaveItems which need to get data from net. 1019 // We collect all URLs which have local storage and send the 1020 // map:(originalURL:currentLocalPath) to render process (backend). 1021 // Then render process will serialize DOM and send data to us. 1022 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() { 1023 if (wait_state_ != HTML_DATA) 1024 return; 1025 std::vector<GURL> saved_links; 1026 std::vector<base::FilePath> saved_file_paths; 1027 int successful_started_items_count = 0; 1028 1029 // Collect all saved items which have local storage. 1030 // First collect the status of all the resource files and check whether they 1031 // have created local files although they have not been completely saved. 1032 // If yes, the file can be saved. Otherwise, there is a disk error, so we 1033 // need to cancel the page saving job. 1034 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); 1035 it != in_progress_items_.end(); ++it) { 1036 DCHECK(it->second->save_source() == 1037 SaveFileCreateInfo::SAVE_FILE_FROM_DOM); 1038 if (it->second->has_final_name()) 1039 successful_started_items_count++; 1040 saved_links.push_back(it->second->url()); 1041 saved_file_paths.push_back(it->second->file_name()); 1042 } 1043 1044 // If not all file of HTML resource have been started, then wait. 1045 if (successful_started_items_count != in_process_count()) 1046 return; 1047 1048 // Collect all saved success items. 1049 for (SavedItemMap::iterator it = saved_success_items_.begin(); 1050 it != saved_success_items_.end(); ++it) { 1051 DCHECK(it->second->has_final_name()); 1052 saved_links.push_back(it->second->url()); 1053 saved_file_paths.push_back(it->second->file_name()); 1054 } 1055 1056 // Get the relative directory name. 1057 base::FilePath relative_dir_name = saved_main_directory_path_.BaseName(); 1058 1059 Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks( 1060 routing_id(), saved_links, saved_file_paths, relative_dir_name)); 1061 } 1062 1063 // Process the serialized HTML content data of a specified web page 1064 // retrieved from render process. 1065 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url, 1066 const std::string& data, 1067 int32 status) { 1068 WebPageSerializerClient::PageSerializationStatus flag = 1069 static_cast<WebPageSerializerClient::PageSerializationStatus>(status); 1070 // Check current state. 1071 if (wait_state_ != HTML_DATA) 1072 return; 1073 1074 int id = contents_id(); 1075 // If the all frames are finished saving, we need to close the 1076 // remaining SaveItems. 1077 if (flag == WebPageSerializerClient::AllFramesAreFinished) { 1078 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); 1079 it != in_progress_items_.end(); ++it) { 1080 VLOG(20) << " " << __FUNCTION__ << "()" 1081 << " save_id = " << it->second->save_id() 1082 << " url = \"" << it->second->url().spec() << "\""; 1083 BrowserThread::PostTask( 1084 BrowserThread::FILE, FROM_HERE, 1085 base::Bind(&SaveFileManager::SaveFinished, 1086 file_manager_, 1087 it->second->save_id(), 1088 it->second->url(), 1089 id, 1090 true)); 1091 } 1092 return; 1093 } 1094 1095 SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec()); 1096 if (it == in_progress_items_.end()) { 1097 for (SavedItemMap::iterator saved_it = saved_success_items_.begin(); 1098 saved_it != saved_success_items_.end(); ++saved_it) { 1099 if (saved_it->second->url() == frame_url) { 1100 wrote_to_completed_file_ = true; 1101 break; 1102 } 1103 } 1104 1105 it = saved_failed_items_.find(frame_url.spec()); 1106 if (it != saved_failed_items_.end()) 1107 wrote_to_failed_file_ = true; 1108 1109 return; 1110 } 1111 1112 SaveItem* save_item = it->second; 1113 DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM); 1114 1115 if (!data.empty()) { 1116 // Prepare buffer for saving HTML data. 1117 scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size())); 1118 memcpy(new_data->data(), data.data(), data.size()); 1119 1120 // Call write file functionality in file thread. 1121 BrowserThread::PostTask( 1122 BrowserThread::FILE, FROM_HERE, 1123 base::Bind(&SaveFileManager::UpdateSaveProgress, 1124 file_manager_, 1125 save_item->save_id(), 1126 new_data, 1127 static_cast<int>(data.size()))); 1128 } 1129 1130 // Current frame is completed saving, call finish in file thread. 1131 if (flag == WebPageSerializerClient::CurrentFrameIsFinished) { 1132 VLOG(20) << " " << __FUNCTION__ << "()" 1133 << " save_id = " << save_item->save_id() 1134 << " url = \"" << save_item->url().spec() << "\""; 1135 BrowserThread::PostTask( 1136 BrowserThread::FILE, FROM_HERE, 1137 base::Bind(&SaveFileManager::SaveFinished, 1138 file_manager_, 1139 save_item->save_id(), 1140 save_item->url(), 1141 id, 1142 true)); 1143 } 1144 } 1145 1146 // Ask for all savable resource links from backend, include main frame and 1147 // sub-frame. 1148 void SavePackage::GetAllSavableResourceLinksForCurrentPage() { 1149 if (wait_state_ != START_PROCESS) 1150 return; 1151 1152 wait_state_ = RESOURCES_LIST; 1153 Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(), 1154 page_url_)); 1155 } 1156 1157 // Give backend the lists which contain all resource links that have local 1158 // storage, after which, render process will serialize DOM for generating 1159 // HTML data. 1160 void SavePackage::OnReceivedSavableResourceLinksForCurrentPage( 1161 const std::vector<GURL>& resources_list, 1162 const std::vector<Referrer>& referrers_list, 1163 const std::vector<GURL>& frames_list) { 1164 if (wait_state_ != RESOURCES_LIST) 1165 return; 1166 1167 if (resources_list.size() != referrers_list.size()) 1168 return; 1169 1170 all_save_items_count_ = static_cast<int>(resources_list.size()) + 1171 static_cast<int>(frames_list.size()); 1172 1173 // We use total bytes as the total number of files we want to save. 1174 // Hack to avoid touching download_ after user cancel. 1175 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 1176 // with SavePackage flow. 1177 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) 1178 download_->SetTotalBytes(all_save_items_count_); 1179 1180 if (all_save_items_count_) { 1181 // Put all sub-resources to wait list. 1182 for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) { 1183 const GURL& u = resources_list[i]; 1184 DCHECK(u.is_valid()); 1185 SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ? 1186 SaveFileCreateInfo::SAVE_FILE_FROM_FILE : 1187 SaveFileCreateInfo::SAVE_FILE_FROM_NET; 1188 SaveItem* save_item = new SaveItem(u, referrers_list[i], 1189 this, save_source); 1190 waiting_item_queue_.push(save_item); 1191 } 1192 // Put all HTML resources to wait list. 1193 for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) { 1194 const GURL& u = frames_list[i]; 1195 DCHECK(u.is_valid()); 1196 SaveItem* save_item = new SaveItem( 1197 u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM); 1198 waiting_item_queue_.push(save_item); 1199 } 1200 wait_state_ = NET_FILES; 1201 DoSavingProcess(); 1202 } else { 1203 // No resource files need to be saved, treat it as user cancel. 1204 Cancel(true); 1205 } 1206 } 1207 1208 base::FilePath SavePackage::GetSuggestedNameForSaveAs( 1209 bool can_save_as_complete, 1210 const std::string& contents_mime_type, 1211 const std::string& accept_langs) { 1212 base::FilePath name_with_proper_ext = 1213 base::FilePath::FromWStringHack(UTF16ToWideHack(title_)); 1214 1215 // If the page's title matches its URL, use the URL. Try to use the last path 1216 // component or if there is none, the domain as the file name. 1217 // Normally we want to base the filename on the page title, or if it doesn't 1218 // exist, on the URL. It's not easy to tell if the page has no title, because 1219 // if the page has no title, WebContents::GetTitle() will return the page's 1220 // URL (adjusted for display purposes). Therefore, we convert the "title" 1221 // back to a URL, and if it matches the original page URL, we know the page 1222 // had no title (or had a title equal to its URL, which is fine to treat 1223 // similarly). 1224 if (title_ == net::FormatUrl(page_url_, accept_langs)) { 1225 std::string url_path; 1226 if (!page_url_.SchemeIs(chrome::kDataScheme)) { 1227 std::vector<std::string> url_parts; 1228 base::SplitString(page_url_.path(), '/', &url_parts); 1229 if (!url_parts.empty()) { 1230 for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) { 1231 url_path = url_parts[i]; 1232 if (!url_path.empty()) 1233 break; 1234 } 1235 } 1236 if (url_path.empty()) 1237 url_path = page_url_.host(); 1238 } else { 1239 url_path = "dataurl"; 1240 } 1241 name_with_proper_ext = 1242 base::FilePath::FromWStringHack(UTF8ToWide(url_path)); 1243 } 1244 1245 // Ask user for getting final saving name. 1246 name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext, 1247 contents_mime_type); 1248 // Adjust extension for complete types. 1249 if (can_save_as_complete) 1250 name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext); 1251 1252 base::FilePath::StringType file_name = name_with_proper_ext.value(); 1253 file_util::ReplaceIllegalCharactersInPath(&file_name, ' '); 1254 return base::FilePath(file_name); 1255 } 1256 1257 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) { 1258 // If the file name doesn't have an extension suitable for HTML files, 1259 // append one. 1260 base::FilePath::StringType ext = name.Extension(); 1261 if (!ext.empty()) 1262 ext.erase(ext.begin()); // Erase preceding '.'. 1263 std::string mime_type; 1264 if (!net::GetMimeTypeFromExtension(ext, &mime_type) || 1265 !CanSaveAsComplete(mime_type)) { 1266 return base::FilePath(name.value() + FILE_PATH_LITERAL(".") + 1267 kDefaultHtmlExtension); 1268 } 1269 return name; 1270 } 1271 1272 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name, 1273 const std::string& contents_mime_type) { 1274 // Start extension at 1 to skip over period if non-empty. 1275 base::FilePath::StringType ext = name.Extension().length() ? 1276 name.Extension().substr(1) : name.Extension(); 1277 base::FilePath::StringType suggested_extension = 1278 ExtensionForMimeType(contents_mime_type); 1279 std::string mime_type; 1280 if (!suggested_extension.empty() && 1281 !net::GetMimeTypeFromExtension(ext, &mime_type)) { 1282 // Extension is absent or needs to be updated. 1283 return base::FilePath(name.value() + FILE_PATH_LITERAL(".") + 1284 suggested_extension); 1285 } 1286 return name; 1287 } 1288 1289 const base::FilePath::CharType* SavePackage::ExtensionForMimeType( 1290 const std::string& contents_mime_type) { 1291 static const struct { 1292 const base::FilePath::CharType *mime_type; 1293 const base::FilePath::CharType *suggested_extension; 1294 } extensions[] = { 1295 { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension }, 1296 { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") }, 1297 { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") }, 1298 { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") }, 1299 { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") }, 1300 }; 1301 #if defined(OS_POSIX) 1302 base::FilePath::StringType mime_type(contents_mime_type); 1303 #elif defined(OS_WIN) 1304 base::FilePath::StringType mime_type(UTF8ToWide(contents_mime_type)); 1305 #endif // OS_WIN 1306 for (uint32 i = 0; i < ARRAYSIZE_UNSAFE(extensions); ++i) { 1307 if (mime_type == extensions[i].mime_type) 1308 return extensions[i].suggested_extension; 1309 } 1310 return FILE_PATH_LITERAL(""); 1311 } 1312 1313 WebContents* SavePackage::web_contents() const { 1314 return WebContentsObserver::web_contents(); 1315 } 1316 1317 void SavePackage::GetSaveInfo() { 1318 // Can't use web_contents_ in the file thread, so get the data that we need 1319 // before calling to it. 1320 base::FilePath website_save_dir, download_save_dir; 1321 bool skip_dir_check = false; 1322 DCHECK(download_manager_); 1323 if (download_manager_->GetDelegate()) { 1324 download_manager_->GetDelegate()->GetSaveDir( 1325 web_contents()->GetBrowserContext(), &website_save_dir, 1326 &download_save_dir, &skip_dir_check); 1327 } 1328 std::string mime_type = web_contents()->GetContentsMimeType(); 1329 std::string accept_languages = 1330 GetContentClient()->browser()->GetAcceptLangs( 1331 web_contents()->GetBrowserContext()); 1332 1333 BrowserThread::PostTask( 1334 BrowserThread::FILE, FROM_HERE, 1335 base::Bind(&SavePackage::CreateDirectoryOnFileThread, this, 1336 website_save_dir, download_save_dir, skip_dir_check, 1337 mime_type, accept_languages)); 1338 } 1339 1340 void SavePackage::CreateDirectoryOnFileThread( 1341 const base::FilePath& website_save_dir, 1342 const base::FilePath& download_save_dir, 1343 bool skip_dir_check, 1344 const std::string& mime_type, 1345 const std::string& accept_langs) { 1346 base::FilePath save_dir; 1347 // If the default html/websites save folder doesn't exist... 1348 // We skip the directory check for gdata directories on ChromeOS. 1349 if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) { 1350 // If the default download dir doesn't exist, create it. 1351 if (!base::DirectoryExists(download_save_dir)) { 1352 bool res = file_util::CreateDirectory(download_save_dir); 1353 DCHECK(res); 1354 } 1355 save_dir = download_save_dir; 1356 } else { 1357 // If it does exist, use the default save dir param. 1358 save_dir = website_save_dir; 1359 } 1360 1361 bool can_save_as_complete = CanSaveAsComplete(mime_type); 1362 base::FilePath suggested_filename = GetSuggestedNameForSaveAs( 1363 can_save_as_complete, mime_type, accept_langs); 1364 base::FilePath::StringType pure_file_name = 1365 suggested_filename.RemoveExtension().BaseName().value(); 1366 base::FilePath::StringType file_name_ext = suggested_filename.Extension(); 1367 1368 // Need to make sure the suggested file name is not too long. 1369 uint32 max_path = GetMaxPathLengthForDirectory(save_dir); 1370 1371 if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) { 1372 save_dir = save_dir.Append(pure_file_name + file_name_ext); 1373 } else { 1374 // Cannot create a shorter filename. This will cause the save as operation 1375 // to fail unless the user pick a shorter name. Continuing even though it 1376 // will fail because returning means no save as popup for the user, which 1377 // is even more confusing. This case should be rare though. 1378 save_dir = save_dir.Append(suggested_filename); 1379 } 1380 1381 BrowserThread::PostTask( 1382 BrowserThread::UI, FROM_HERE, 1383 base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir, 1384 can_save_as_complete)); 1385 } 1386 1387 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path, 1388 bool can_save_as_complete) { 1389 1390 // The WebContents which owns this SavePackage may have disappeared during 1391 // the UI->FILE->UI thread hop of 1392 // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo. 1393 if (!web_contents() || !download_manager_->GetDelegate()) 1394 return; 1395 1396 base::FilePath::StringType default_extension; 1397 if (can_save_as_complete) 1398 default_extension = kDefaultHtmlExtension; 1399 1400 download_manager_->GetDelegate()->ChooseSavePath( 1401 web_contents(), 1402 suggested_path, 1403 default_extension, 1404 can_save_as_complete, 1405 base::Bind(&SavePackage::OnPathPicked, AsWeakPtr())); 1406 } 1407 1408 void SavePackage::OnPathPicked( 1409 const base::FilePath& final_name, 1410 SavePageType type, 1411 const SavePackageDownloadCreatedCallback& download_created_callback) { 1412 DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 1413 (type == SAVE_PAGE_TYPE_AS_MHTML) || 1414 (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type; 1415 // Ensure the filename is safe. 1416 saved_main_file_path_ = final_name; 1417 // TODO(asanka): This call may block on IO and shouldn't be made 1418 // from the UI thread. See http://crbug.com/61827. 1419 net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false, 1420 &saved_main_file_path_); 1421 1422 saved_main_directory_path_ = saved_main_file_path_.DirName(); 1423 save_type_ = type; 1424 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) { 1425 // Make new directory for saving complete file. 1426 saved_main_directory_path_ = saved_main_directory_path_.Append( 1427 saved_main_file_path_.RemoveExtension().BaseName().value() + 1428 FILE_PATH_LITERAL("_files")); 1429 } 1430 1431 Init(download_created_callback); 1432 } 1433 1434 void SavePackage::StopObservation() { 1435 DCHECK(download_); 1436 DCHECK(download_manager_); 1437 1438 download_->RemoveObserver(this); 1439 download_ = NULL; 1440 download_manager_ = NULL; 1441 } 1442 1443 void SavePackage::OnDownloadDestroyed(DownloadItem* download) { 1444 StopObservation(); 1445 } 1446 1447 void SavePackage::FinalizeDownloadEntry() { 1448 DCHECK(download_); 1449 DCHECK(download_manager_); 1450 1451 download_manager_->OnSavePackageSuccessfullyFinished(download_); 1452 StopObservation(); 1453 } 1454 1455 } // namespace content 1456