1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "content/browser/download/save_package.h" 6 7 #include <algorithm> 8 9 #include "base/bind.h" 10 #include "base/files/file_path.h" 11 #include "base/files/file_util.h" 12 #include "base/i18n/file_util_icu.h" 13 #include "base/logging.h" 14 #include "base/message_loop/message_loop.h" 15 #include "base/stl_util.h" 16 #include "base/strings/string_piece.h" 17 #include "base/strings/string_split.h" 18 #include "base/strings/sys_string_conversions.h" 19 #include "base/strings/utf_string_conversions.h" 20 #include "base/threading/thread.h" 21 #include "content/browser/download/download_item_impl.h" 22 #include "content/browser/download/download_manager_impl.h" 23 #include "content/browser/download/download_stats.h" 24 #include "content/browser/download/save_file.h" 25 #include "content/browser/download/save_file_manager.h" 26 #include "content/browser/download/save_item.h" 27 #include "content/browser/loader/resource_dispatcher_host_impl.h" 28 #include "content/browser/renderer_host/render_process_host_impl.h" 29 #include "content/browser/renderer_host/render_view_host_delegate.h" 30 #include "content/browser/renderer_host/render_view_host_impl.h" 31 #include "content/common/view_messages.h" 32 #include "content/public/browser/browser_context.h" 33 #include "content/public/browser/browser_thread.h" 34 #include "content/public/browser/content_browser_client.h" 35 #include "content/public/browser/download_manager_delegate.h" 36 #include "content/public/browser/navigation_entry.h" 37 #include "content/public/browser/notification_service.h" 38 #include "content/public/browser/notification_types.h" 39 #include "content/public/browser/resource_context.h" 40 #include "content/public/browser/web_contents.h" 41 #include "net/base/filename_util.h" 42 #include "net/base/io_buffer.h" 43 #include "net/base/mime_util.h" 44 #include "net/url_request/url_request_context.h" 45 #include "third_party/WebKit/public/web/WebPageSerializerClient.h" 46 #include "url/url_constants.h" 47 48 using base::Time; 49 using blink::WebPageSerializerClient; 50 51 namespace content { 52 namespace { 53 54 // A counter for uniquely identifying each save package. 55 int g_save_package_id = 0; 56 57 // Default name which will be used when we can not get proper name from 58 // resource URL. 59 const char kDefaultSaveName[] = "saved_resource"; 60 61 // Maximum number of file ordinal number. I think it's big enough for resolving 62 // name-conflict files which has same base file name. 63 const int32 kMaxFileOrdinalNumber = 9999; 64 65 // Maximum length for file path. Since Windows have MAX_PATH limitation for 66 // file path, we need to make sure length of file path of every saved file 67 // is less than MAX_PATH 68 #if defined(OS_WIN) 69 const uint32 kMaxFilePathLength = MAX_PATH - 1; 70 #elif defined(OS_POSIX) 71 const uint32 kMaxFilePathLength = PATH_MAX - 1; 72 #endif 73 74 // Maximum length for file ordinal number part. Since we only support the 75 // maximum 9999 for ordinal number, which means maximum file ordinal number part 76 // should be "(9998)", so the value is 6. 77 const uint32 kMaxFileOrdinalNumberPartLength = 6; 78 79 // Strip current ordinal number, if any. Should only be used on pure 80 // file names, i.e. those stripped of their extensions. 81 // TODO(estade): improve this to not choke on alternate encodings. 82 base::FilePath::StringType StripOrdinalNumber( 83 const base::FilePath::StringType& pure_file_name) { 84 base::FilePath::StringType::size_type r_paren_index = 85 pure_file_name.rfind(FILE_PATH_LITERAL(')')); 86 base::FilePath::StringType::size_type l_paren_index = 87 pure_file_name.rfind(FILE_PATH_LITERAL('(')); 88 if (l_paren_index >= r_paren_index) 89 return pure_file_name; 90 91 for (base::FilePath::StringType::size_type i = l_paren_index + 1; 92 i != r_paren_index; ++i) { 93 if (!IsAsciiDigit(pure_file_name[i])) 94 return pure_file_name; 95 } 96 97 return pure_file_name.substr(0, l_paren_index); 98 } 99 100 // Check whether we can save page as complete-HTML for the contents which 101 // have specified a MIME type. Now only contents which have the MIME type 102 // "text/html" can be saved as complete-HTML. 103 bool CanSaveAsComplete(const std::string& contents_mime_type) { 104 return contents_mime_type == "text/html" || 105 contents_mime_type == "application/xhtml+xml"; 106 } 107 108 // Request handle for SavePackage downloads. Currently doesn't support 109 // pause/resume/cancel, but returns a WebContents. 110 class SavePackageRequestHandle : public DownloadRequestHandleInterface { 111 public: 112 SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package) 113 : save_package_(save_package) {} 114 115 // DownloadRequestHandleInterface 116 virtual WebContents* GetWebContents() const OVERRIDE { 117 return save_package_.get() ? save_package_->web_contents() : NULL; 118 } 119 virtual DownloadManager* GetDownloadManager() const OVERRIDE { 120 return NULL; 121 } 122 virtual void PauseRequest() const OVERRIDE {} 123 virtual void ResumeRequest() const OVERRIDE {} 124 virtual void CancelRequest() const OVERRIDE {} 125 virtual std::string DebugString() const OVERRIDE { 126 return "SavePackage DownloadRequestHandle"; 127 } 128 129 private: 130 base::WeakPtr<SavePackage> save_package_; 131 }; 132 133 } // namespace 134 135 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] = 136 FILE_PATH_LITERAL("html"); 137 138 SavePackage::SavePackage(WebContents* web_contents, 139 SavePageType save_type, 140 const base::FilePath& file_full_path, 141 const base::FilePath& directory_full_path) 142 : WebContentsObserver(web_contents), 143 file_manager_(NULL), 144 download_manager_(NULL), 145 download_(NULL), 146 page_url_(GetUrlToBeSaved()), 147 saved_main_file_path_(file_full_path), 148 saved_main_directory_path_(directory_full_path), 149 title_(web_contents->GetTitle()), 150 start_tick_(base::TimeTicks::Now()), 151 finished_(false), 152 mhtml_finishing_(false), 153 user_canceled_(false), 154 disk_error_occurred_(false), 155 save_type_(save_type), 156 all_save_items_count_(0), 157 file_name_set_(&base::FilePath::CompareLessIgnoreCase), 158 wait_state_(INITIALIZE), 159 contents_id_(web_contents->GetRenderProcessHost()->GetID()), 160 unique_id_(g_save_package_id++), 161 wrote_to_completed_file_(false), 162 wrote_to_failed_file_(false) { 163 DCHECK(page_url_.is_valid()); 164 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 165 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) || 166 (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)); 167 DCHECK(!saved_main_file_path_.empty() && 168 saved_main_file_path_.value().length() <= kMaxFilePathLength); 169 DCHECK(!saved_main_directory_path_.empty() && 170 saved_main_directory_path_.value().length() < kMaxFilePathLength); 171 InternalInit(); 172 } 173 174 SavePackage::SavePackage(WebContents* web_contents) 175 : WebContentsObserver(web_contents), 176 file_manager_(NULL), 177 download_manager_(NULL), 178 download_(NULL), 179 page_url_(GetUrlToBeSaved()), 180 title_(web_contents->GetTitle()), 181 start_tick_(base::TimeTicks::Now()), 182 finished_(false), 183 mhtml_finishing_(false), 184 user_canceled_(false), 185 disk_error_occurred_(false), 186 save_type_(SAVE_PAGE_TYPE_UNKNOWN), 187 all_save_items_count_(0), 188 file_name_set_(&base::FilePath::CompareLessIgnoreCase), 189 wait_state_(INITIALIZE), 190 contents_id_(web_contents->GetRenderProcessHost()->GetID()), 191 unique_id_(g_save_package_id++), 192 wrote_to_completed_file_(false), 193 wrote_to_failed_file_(false) { 194 DCHECK(page_url_.is_valid()); 195 InternalInit(); 196 } 197 198 // This is for testing use. Set |finished_| as true because we don't want 199 // method Cancel to be be called in destructor in test mode. 200 // We also don't call InternalInit(). 201 SavePackage::SavePackage(WebContents* web_contents, 202 const base::FilePath& file_full_path, 203 const base::FilePath& directory_full_path) 204 : WebContentsObserver(web_contents), 205 file_manager_(NULL), 206 download_manager_(NULL), 207 download_(NULL), 208 saved_main_file_path_(file_full_path), 209 saved_main_directory_path_(directory_full_path), 210 start_tick_(base::TimeTicks::Now()), 211 finished_(true), 212 mhtml_finishing_(false), 213 user_canceled_(false), 214 disk_error_occurred_(false), 215 save_type_(SAVE_PAGE_TYPE_UNKNOWN), 216 all_save_items_count_(0), 217 file_name_set_(&base::FilePath::CompareLessIgnoreCase), 218 wait_state_(INITIALIZE), 219 contents_id_(0), 220 unique_id_(g_save_package_id++), 221 wrote_to_completed_file_(false), 222 wrote_to_failed_file_(false) { 223 } 224 225 SavePackage::~SavePackage() { 226 // Stop receiving saving job's updates 227 if (!finished_ && !canceled()) { 228 // Unexpected quit. 229 Cancel(true); 230 } 231 232 // We should no longer be observing the DownloadItem at this point. 233 CHECK(!download_); 234 235 DCHECK(all_save_items_count_ == (waiting_item_queue_.size() + 236 completed_count() + 237 in_process_count())); 238 // Free all SaveItems. 239 while (!waiting_item_queue_.empty()) { 240 // We still have some items which are waiting for start to save. 241 SaveItem* save_item = waiting_item_queue_.front(); 242 waiting_item_queue_.pop(); 243 delete save_item; 244 } 245 246 STLDeleteValues(&saved_success_items_); 247 STLDeleteValues(&in_progress_items_); 248 STLDeleteValues(&saved_failed_items_); 249 250 file_manager_ = NULL; 251 } 252 253 GURL SavePackage::GetUrlToBeSaved() { 254 // Instead of using web_contents_.GetURL here, we use url() (which is the 255 // "real" url of the page) from the NavigationEntry because it reflects its 256 // origin rather than the displayed one (returned by GetURL) which may be 257 // different (like having "view-source:" on the front). 258 NavigationEntry* visible_entry = 259 web_contents()->GetController().GetVisibleEntry(); 260 return visible_entry->GetURL(); 261 } 262 263 void SavePackage::Cancel(bool user_action) { 264 if (!canceled()) { 265 if (user_action) 266 user_canceled_ = true; 267 else 268 disk_error_occurred_ = true; 269 Stop(); 270 } 271 RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED); 272 } 273 274 // Init() can be called directly, or indirectly via GetSaveInfo(). In both 275 // cases, we need file_manager_ to be initialized, so we do this first. 276 void SavePackage::InternalInit() { 277 ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get(); 278 if (!rdh) { 279 NOTREACHED(); 280 return; 281 } 282 283 file_manager_ = rdh->save_file_manager(); 284 DCHECK(file_manager_); 285 286 download_manager_ = static_cast<DownloadManagerImpl*>( 287 BrowserContext::GetDownloadManager( 288 web_contents()->GetBrowserContext())); 289 DCHECK(download_manager_); 290 291 RecordSavePackageEvent(SAVE_PACKAGE_STARTED); 292 } 293 294 bool SavePackage::Init( 295 const SavePackageDownloadCreatedCallback& download_created_callback) { 296 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 297 // Set proper running state. 298 if (wait_state_ != INITIALIZE) 299 return false; 300 301 wait_state_ = START_PROCESS; 302 303 // Initialize the request context and resource dispatcher. 304 BrowserContext* browser_context = web_contents()->GetBrowserContext(); 305 if (!browser_context) { 306 NOTREACHED(); 307 return false; 308 } 309 310 scoped_ptr<DownloadRequestHandleInterface> request_handle( 311 new SavePackageRequestHandle(AsWeakPtr())); 312 // The download manager keeps ownership but adds us as an observer. 313 download_manager_->CreateSavePackageDownloadItem( 314 saved_main_file_path_, 315 page_url_, 316 ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ? 317 "multipart/related" : "text/html"), 318 request_handle.Pass(), 319 base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(), 320 download_created_callback)); 321 return true; 322 } 323 324 void SavePackage::InitWithDownloadItem( 325 const SavePackageDownloadCreatedCallback& download_created_callback, 326 DownloadItemImpl* item) { 327 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 328 DCHECK(item); 329 download_ = item; 330 download_->AddObserver(this); 331 // Confirm above didn't delete the tab out from under us. 332 if (!download_created_callback.is_null()) 333 download_created_callback.Run(download_); 334 335 // Check save type and process the save page job. 336 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) { 337 // Get directory 338 DCHECK(!saved_main_directory_path_.empty()); 339 GetAllSavableResourceLinksForCurrentPage(); 340 } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) { 341 web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind( 342 &SavePackage::OnMHTMLGenerated, this)); 343 } else { 344 DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_; 345 wait_state_ = NET_FILES; 346 SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ? 347 SaveFileCreateInfo::SAVE_FILE_FROM_FILE : 348 SaveFileCreateInfo::SAVE_FILE_FROM_NET; 349 SaveItem* save_item = new SaveItem(page_url_, 350 Referrer(), 351 this, 352 save_source); 353 // Add this item to waiting list. 354 waiting_item_queue_.push(save_item); 355 all_save_items_count_ = 1; 356 download_->SetTotalBytes(1); 357 358 DoSavingProcess(); 359 } 360 } 361 362 void SavePackage::OnMHTMLGenerated(int64 size) { 363 if (size <= 0) { 364 Cancel(false); 365 return; 366 } 367 wrote_to_completed_file_ = true; 368 369 // Hack to avoid touching download_ after user cancel. 370 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 371 // with SavePackage flow. 372 if (download_->GetState() == DownloadItem::IN_PROGRESS) { 373 download_->SetTotalBytes(size); 374 download_->DestinationUpdate(size, 0, std::string()); 375 // Must call OnAllDataSaved here in order for 376 // GDataDownloadObserver::ShouldUpload() to return true. 377 // ShouldCompleteDownload() may depend on the gdata uploader to finish. 378 download_->OnAllDataSaved(DownloadItem::kEmptyFileHash); 379 } 380 381 if (!download_manager_->GetDelegate()) { 382 Finish(); 383 return; 384 } 385 386 if (download_manager_->GetDelegate()->ShouldCompleteDownload( 387 download_, base::Bind(&SavePackage::Finish, this))) { 388 Finish(); 389 } 390 } 391 392 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further 393 // restricted by NAME_MAX. The maximum allowed path looks like: 394 // '/path/to/save_dir' + '/' + NAME_MAX. 395 uint32 SavePackage::GetMaxPathLengthForDirectory( 396 const base::FilePath& base_dir) { 397 #if defined(OS_POSIX) 398 return std::min(kMaxFilePathLength, 399 static_cast<uint32>(base_dir.value().length()) + 400 NAME_MAX + 1); 401 #else 402 return kMaxFilePathLength; 403 #endif 404 } 405 406 // File name is considered being consist of pure file name, dot and file 407 // extension name. File name might has no dot and file extension, or has 408 // multiple dot inside file name. The dot, which separates the pure file 409 // name and file extension name, is last dot in the whole file name. 410 // This function is for making sure the length of specified file path is not 411 // great than the specified maximum length of file path and getting safe pure 412 // file name part if the input pure file name is too long. 413 // The parameter |dir_path| specifies directory part of the specified 414 // file path. The parameter |file_name_ext| specifies file extension 415 // name part of the specified file path (including start dot). The parameter 416 // |max_file_path_len| specifies maximum length of the specified file path. 417 // The parameter |pure_file_name| input pure file name part of the specified 418 // file path. If the length of specified file path is great than 419 // |max_file_path_len|, the |pure_file_name| will output new pure file name 420 // part for making sure the length of specified file path is less than 421 // specified maximum length of file path. Return false if the function can 422 // not get a safe pure file name, otherwise it returns true. 423 bool SavePackage::GetSafePureFileName( 424 const base::FilePath& dir_path, 425 const base::FilePath::StringType& file_name_ext, 426 uint32 max_file_path_len, 427 base::FilePath::StringType* pure_file_name) { 428 DCHECK(!pure_file_name->empty()); 429 int available_length = static_cast<int>(max_file_path_len - 430 dir_path.value().length() - 431 file_name_ext.length()); 432 // Need an extra space for the separator. 433 if (!dir_path.EndsWithSeparator()) 434 --available_length; 435 436 // Plenty of room. 437 if (static_cast<int>(pure_file_name->length()) <= available_length) 438 return true; 439 440 // Limited room. Truncate |pure_file_name| to fit. 441 if (available_length > 0) { 442 *pure_file_name = pure_file_name->substr(0, available_length); 443 return true; 444 } 445 446 // Not enough room to even use a shortened |pure_file_name|. 447 pure_file_name->clear(); 448 return false; 449 } 450 451 // Generate name for saving resource. 452 bool SavePackage::GenerateFileName(const std::string& disposition, 453 const GURL& url, 454 bool need_html_ext, 455 base::FilePath::StringType* generated_name) { 456 // TODO(jungshik): Figure out the referrer charset when having one 457 // makes sense and pass it to GenerateFileName. 458 base::FilePath file_path = net::GenerateFileName(url, 459 disposition, 460 std::string(), 461 std::string(), 462 std::string(), 463 kDefaultSaveName); 464 465 DCHECK(!file_path.empty()); 466 base::FilePath::StringType pure_file_name = 467 file_path.RemoveExtension().BaseName().value(); 468 base::FilePath::StringType file_name_ext = file_path.Extension(); 469 470 // If it is HTML resource, use ".html" as its extension. 471 if (need_html_ext) { 472 file_name_ext = FILE_PATH_LITERAL("."); 473 file_name_ext.append(kDefaultHtmlExtension); 474 } 475 476 // Need to make sure the suggested file name is not too long. 477 uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_); 478 479 // Get safe pure file name. 480 if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext, 481 max_path, &pure_file_name)) 482 return false; 483 484 base::FilePath::StringType file_name = pure_file_name + file_name_ext; 485 486 // Check whether we already have same name in a case insensitive manner. 487 FileNameSet::const_iterator iter = file_name_set_.find(file_name); 488 if (iter == file_name_set_.end()) { 489 file_name_set_.insert(file_name); 490 } else { 491 // Found same name, increase the ordinal number for the file name. 492 pure_file_name = 493 base::FilePath(*iter).RemoveExtension().BaseName().value(); 494 base::FilePath::StringType base_file_name = 495 StripOrdinalNumber(pure_file_name); 496 497 // We need to make sure the length of base file name plus maximum ordinal 498 // number path will be less than or equal to kMaxFilePathLength. 499 if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext, 500 max_path - kMaxFileOrdinalNumberPartLength, &base_file_name)) 501 return false; 502 503 // Prepare the new ordinal number. 504 uint32 ordinal_number; 505 FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name); 506 if (it == file_name_count_map_.end()) { 507 // First base-name-conflict resolving, use 1 as initial ordinal number. 508 file_name_count_map_[base_file_name] = 1; 509 ordinal_number = 1; 510 } else { 511 // We have met same base-name conflict, use latest ordinal number. 512 ordinal_number = it->second; 513 } 514 515 if (ordinal_number > (kMaxFileOrdinalNumber - 1)) { 516 // Use a random file from temporary file. 517 base::FilePath temp_file; 518 base::CreateTemporaryFile(&temp_file); 519 file_name = temp_file.RemoveExtension().BaseName().value(); 520 // Get safe pure file name. 521 if (!GetSafePureFileName(saved_main_directory_path_, 522 base::FilePath::StringType(), 523 max_path, &file_name)) 524 return false; 525 } else { 526 for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) { 527 base::FilePath::StringType new_name = base_file_name + 528 base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext; 529 if (file_name_set_.find(new_name) == file_name_set_.end()) { 530 // Resolved name conflict. 531 file_name = new_name; 532 file_name_count_map_[base_file_name] = ++i; 533 break; 534 } 535 } 536 } 537 538 file_name_set_.insert(file_name); 539 } 540 541 DCHECK(!file_name.empty()); 542 generated_name->assign(file_name); 543 544 return true; 545 } 546 547 // We have received a message from SaveFileManager about a new saving job. We 548 // create a SaveItem and store it in our in_progress list. 549 void SavePackage::StartSave(const SaveFileCreateInfo* info) { 550 DCHECK(info && !info->url.is_empty()); 551 552 SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec()); 553 if (it == in_progress_items_.end()) { 554 // If not found, we must have cancel action. 555 DCHECK(canceled()); 556 return; 557 } 558 SaveItem* save_item = it->second; 559 560 DCHECK(!saved_main_file_path_.empty()); 561 562 save_item->SetSaveId(info->save_id); 563 save_item->SetTotalBytes(info->total_bytes); 564 565 // Determine the proper path for a saving job, by choosing either the default 566 // save directory, or prompting the user. 567 DCHECK(!save_item->has_final_name()); 568 if (info->url != page_url_) { 569 base::FilePath::StringType generated_name; 570 // For HTML resource file, make sure it will have .htm as extension name, 571 // otherwise, when you open the saved page in Chrome again, download 572 // file manager will treat it as downloadable resource, and download it 573 // instead of opening it as HTML. 574 bool need_html_ext = 575 info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM; 576 if (!GenerateFileName(info->content_disposition, 577 GURL(info->url), 578 need_html_ext, 579 &generated_name)) { 580 // We can not generate file name for this SaveItem, so we cancel the 581 // saving page job if the save source is from serialized DOM data. 582 // Otherwise, it means this SaveItem is sub-resource type, we treat it 583 // as an error happened on saving. We can ignore this type error for 584 // sub-resource links which will be resolved as absolute links instead 585 // of local links in final saved contents. 586 if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM) 587 Cancel(true); 588 else 589 SaveFinished(save_item->save_id(), 0, false); 590 return; 591 } 592 593 // When saving page as only-HTML, we only have a SaveItem whose url 594 // must be page_url_. 595 DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML); 596 DCHECK(!saved_main_directory_path_.empty()); 597 598 // Now we get final name retrieved from GenerateFileName, we will use it 599 // rename the SaveItem. 600 base::FilePath final_name = 601 saved_main_directory_path_.Append(generated_name); 602 save_item->Rename(final_name); 603 } else { 604 // It is the main HTML file, use the name chosen by the user. 605 save_item->Rename(saved_main_file_path_); 606 } 607 608 // If the save source is from file system, inform SaveFileManager to copy 609 // corresponding file to the file path which this SaveItem specifies. 610 if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) { 611 BrowserThread::PostTask( 612 BrowserThread::FILE, FROM_HERE, 613 base::Bind(&SaveFileManager::SaveLocalFile, 614 file_manager_, 615 save_item->url(), 616 save_item->save_id(), 617 contents_id())); 618 return; 619 } 620 621 // Check whether we begin to require serialized HTML data. 622 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML && 623 wait_state_ == HTML_DATA) { 624 // Inform backend to serialize the all frames' DOM and send serialized 625 // HTML data back. 626 GetSerializedHtmlDataForCurrentPageWithLocalLinks(); 627 } 628 } 629 630 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) { 631 if (in_process_count()) { 632 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); 633 it != in_progress_items_.end(); ++it) { 634 SaveItem* save_item = it->second; 635 DCHECK(save_item->state() == SaveItem::IN_PROGRESS); 636 if (save_item->save_id() == save_id) 637 return save_item; 638 } 639 } 640 return NULL; 641 } 642 643 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) { 644 SaveUrlItemMap::iterator it = in_progress_items_.find( 645 save_item->url().spec()); 646 DCHECK(it != in_progress_items_.end()); 647 DCHECK(save_item == it->second); 648 in_progress_items_.erase(it); 649 650 if (save_item->success()) { 651 // Add it to saved_success_items_. 652 DCHECK(saved_success_items_.find(save_item->save_id()) == 653 saved_success_items_.end()); 654 saved_success_items_[save_item->save_id()] = save_item; 655 } else { 656 // Add it to saved_failed_items_. 657 DCHECK(saved_failed_items_.find(save_item->url().spec()) == 658 saved_failed_items_.end()); 659 saved_failed_items_[save_item->url().spec()] = save_item; 660 } 661 } 662 663 // Called for updating saving state. 664 bool SavePackage::UpdateSaveProgress(int32 save_id, 665 int64 size, 666 bool write_success) { 667 // Because we might have canceled this saving job before, 668 // so we might not find corresponding SaveItem. 669 SaveItem* save_item = LookupItemInProcessBySaveId(save_id); 670 if (!save_item) 671 return false; 672 673 save_item->Update(size); 674 675 // If we got disk error, cancel whole save page job. 676 if (!write_success) { 677 // Cancel job with reason of disk error. 678 Cancel(false); 679 } 680 return true; 681 } 682 683 // Stop all page saving jobs that are in progress and instruct the file thread 684 // to delete all saved files. 685 void SavePackage::Stop() { 686 // If we haven't moved out of the initial state, there's nothing to cancel and 687 // there won't be valid pointers for file_manager_ or download_. 688 if (wait_state_ == INITIALIZE) 689 return; 690 691 // When stopping, if it still has some items in in_progress, cancel them. 692 DCHECK(canceled()); 693 if (in_process_count()) { 694 SaveUrlItemMap::iterator it = in_progress_items_.begin(); 695 for (; it != in_progress_items_.end(); ++it) { 696 SaveItem* save_item = it->second; 697 DCHECK(save_item->state() == SaveItem::IN_PROGRESS); 698 save_item->Cancel(); 699 } 700 // Remove all in progress item to saved map. For failed items, they will 701 // be put into saved_failed_items_, for successful item, they will be put 702 // into saved_success_items_. 703 while (in_process_count()) 704 PutInProgressItemToSavedMap(in_progress_items_.begin()->second); 705 } 706 707 // This vector contains the save ids of the save files which SaveFileManager 708 // needs to remove from its save_file_map_. 709 SaveIDList save_ids; 710 for (SavedItemMap::iterator it = saved_success_items_.begin(); 711 it != saved_success_items_.end(); ++it) 712 save_ids.push_back(it->first); 713 for (SaveUrlItemMap::iterator it = saved_failed_items_.begin(); 714 it != saved_failed_items_.end(); ++it) 715 save_ids.push_back(it->second->save_id()); 716 717 BrowserThread::PostTask( 718 BrowserThread::FILE, FROM_HERE, 719 base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap, 720 file_manager_, 721 save_ids)); 722 723 finished_ = true; 724 wait_state_ = FAILED; 725 726 // Inform the DownloadItem we have canceled whole save page job. 727 if (download_) { 728 download_->Cancel(false); 729 FinalizeDownloadEntry(); 730 } 731 } 732 733 void SavePackage::CheckFinish() { 734 if (in_process_count() || finished_) 735 return; 736 737 base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML && 738 saved_success_items_.size() > 1) ? 739 saved_main_directory_path_ : base::FilePath(); 740 741 // This vector contains the final names of all the successfully saved files 742 // along with their save ids. It will be passed to SaveFileManager to do the 743 // renaming job. 744 FinalNameList final_names; 745 for (SavedItemMap::iterator it = saved_success_items_.begin(); 746 it != saved_success_items_.end(); ++it) 747 final_names.push_back(std::make_pair(it->first, 748 it->second->full_path())); 749 750 BrowserThread::PostTask( 751 BrowserThread::FILE, FROM_HERE, 752 base::Bind(&SaveFileManager::RenameAllFiles, 753 file_manager_, 754 final_names, 755 dir, 756 web_contents()->GetRenderProcessHost()->GetID(), 757 web_contents()->GetRenderViewHost()->GetRoutingID(), 758 id())); 759 } 760 761 // Successfully finished all items of this SavePackage. 762 void SavePackage::Finish() { 763 // User may cancel the job when we're moving files to the final directory. 764 if (canceled()) 765 return; 766 767 wait_state_ = SUCCESSFUL; 768 finished_ = true; 769 770 // Record finish. 771 RecordSavePackageEvent(SAVE_PACKAGE_FINISHED); 772 773 // Record any errors that occurred. 774 if (wrote_to_completed_file_) { 775 RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED); 776 } 777 778 if (wrote_to_failed_file_) { 779 RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED); 780 } 781 782 // This vector contains the save ids of the save files which SaveFileManager 783 // needs to remove from its save_file_map_. 784 SaveIDList save_ids; 785 for (SaveUrlItemMap::iterator it = saved_failed_items_.begin(); 786 it != saved_failed_items_.end(); ++it) 787 save_ids.push_back(it->second->save_id()); 788 789 BrowserThread::PostTask( 790 BrowserThread::FILE, FROM_HERE, 791 base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap, 792 file_manager_, 793 save_ids)); 794 795 if (download_) { 796 // Hack to avoid touching download_ after user cancel. 797 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 798 // with SavePackage flow. 799 if (download_->GetState() == DownloadItem::IN_PROGRESS) { 800 if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) { 801 download_->DestinationUpdate( 802 all_save_items_count_, CurrentSpeed(), std::string()); 803 download_->OnAllDataSaved(DownloadItem::kEmptyFileHash); 804 } 805 download_->MarkAsComplete(); 806 } 807 FinalizeDownloadEntry(); 808 } 809 } 810 811 // Called for updating end state. 812 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) { 813 // Because we might have canceled this saving job before, 814 // so we might not find corresponding SaveItem. Just ignore it. 815 SaveItem* save_item = LookupItemInProcessBySaveId(save_id); 816 if (!save_item) 817 return; 818 819 // Let SaveItem set end state. 820 save_item->Finish(size, is_success); 821 // Remove the associated save id and SavePackage. 822 file_manager_->RemoveSaveFile(save_id, save_item->url(), this); 823 824 PutInProgressItemToSavedMap(save_item); 825 826 // Inform the DownloadItem to update UI. 827 // We use the received bytes as number of saved files. 828 // Hack to avoid touching download_ after user cancel. 829 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 830 // with SavePackage flow. 831 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) { 832 download_->DestinationUpdate( 833 completed_count(), CurrentSpeed(), std::string()); 834 } 835 836 if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM && 837 save_item->url() == page_url_ && !save_item->received_bytes()) { 838 // If size of main HTML page is 0, treat it as disk error. 839 Cancel(false); 840 return; 841 } 842 843 if (canceled()) { 844 DCHECK(finished_); 845 return; 846 } 847 848 // Continue processing the save page job. 849 DoSavingProcess(); 850 851 // Check whether we can successfully finish whole job. 852 CheckFinish(); 853 } 854 855 // Sometimes, the net io will only call SaveFileManager::SaveFinished with 856 // save id -1 when it encounters error. Since in this case, save id will be 857 // -1, so we can only use URL to find which SaveItem is associated with 858 // this error. 859 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes 860 // from serializing HTML data, then cancel saving page. 861 void SavePackage::SaveFailed(const GURL& save_url) { 862 SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec()); 863 if (it == in_progress_items_.end()) { 864 NOTREACHED(); // Should not exist! 865 return; 866 } 867 SaveItem* save_item = it->second; 868 869 save_item->Finish(0, false); 870 871 PutInProgressItemToSavedMap(save_item); 872 873 // Inform the DownloadItem to update UI. 874 // We use the received bytes as number of saved files. 875 // Hack to avoid touching download_ after user cancel. 876 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 877 // with SavePackage flow. 878 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) { 879 download_->DestinationUpdate( 880 completed_count(), CurrentSpeed(), std::string()); 881 } 882 883 if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 884 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) || 885 (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) { 886 // We got error when saving page. Treat it as disk error. 887 Cancel(true); 888 } 889 890 if (canceled()) { 891 DCHECK(finished_); 892 return; 893 } 894 895 // Continue processing the save page job. 896 DoSavingProcess(); 897 898 CheckFinish(); 899 } 900 901 void SavePackage::SaveCanceled(SaveItem* save_item) { 902 // Call the RemoveSaveFile in UI thread. 903 file_manager_->RemoveSaveFile(save_item->save_id(), 904 save_item->url(), 905 this); 906 if (save_item->save_id() != -1) 907 BrowserThread::PostTask( 908 BrowserThread::FILE, FROM_HERE, 909 base::Bind(&SaveFileManager::CancelSave, 910 file_manager_, 911 save_item->save_id())); 912 } 913 914 // Initiate a saving job of a specific URL. We send the request to 915 // SaveFileManager, which will dispatch it to different approach according to 916 // the save source. Parameter process_all_remaining_items indicates whether 917 // we need to save all remaining items. 918 void SavePackage::SaveNextFile(bool process_all_remaining_items) { 919 DCHECK(web_contents()); 920 DCHECK(waiting_item_queue_.size()); 921 922 do { 923 // Pop SaveItem from waiting list. 924 SaveItem* save_item = waiting_item_queue_.front(); 925 waiting_item_queue_.pop(); 926 927 // Add the item to in_progress_items_. 928 SaveUrlItemMap::iterator it = in_progress_items_.find( 929 save_item->url().spec()); 930 DCHECK(it == in_progress_items_.end()); 931 in_progress_items_[save_item->url().spec()] = save_item; 932 save_item->Start(); 933 file_manager_->SaveURL(save_item->url(), 934 save_item->referrer(), 935 web_contents()->GetRenderProcessHost()->GetID(), 936 routing_id(), 937 save_item->save_source(), 938 save_item->full_path(), 939 web_contents()-> 940 GetBrowserContext()->GetResourceContext(), 941 this); 942 } while (process_all_remaining_items && waiting_item_queue_.size()); 943 } 944 945 // Calculate the percentage of whole save page job. 946 int SavePackage::PercentComplete() { 947 if (!all_save_items_count_) 948 return 0; 949 else if (!in_process_count()) 950 return 100; 951 else 952 return completed_count() / all_save_items_count_; 953 } 954 955 int64 SavePackage::CurrentSpeed() const { 956 base::TimeDelta diff = base::TimeTicks::Now() - start_tick_; 957 int64 diff_ms = diff.InMilliseconds(); 958 return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms; 959 } 960 961 // Continue processing the save page job after one SaveItem has been 962 // finished. 963 void SavePackage::DoSavingProcess() { 964 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) { 965 // We guarantee that images and JavaScripts must be downloaded first. 966 // So when finishing all those sub-resources, we will know which 967 // sub-resource's link can be replaced with local file path, which 968 // sub-resource's link need to be replaced with absolute URL which 969 // point to its internet address because it got error when saving its data. 970 971 // Start a new SaveItem job if we still have job in waiting queue. 972 if (waiting_item_queue_.size()) { 973 DCHECK(wait_state_ == NET_FILES); 974 SaveItem* save_item = waiting_item_queue_.front(); 975 if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) { 976 SaveNextFile(false); 977 } else if (!in_process_count()) { 978 // If there is no in-process SaveItem, it means all sub-resources 979 // have been processed. Now we need to start serializing HTML DOM 980 // for the current page to get the generated HTML data. 981 wait_state_ = HTML_DATA; 982 // All non-HTML resources have been finished, start all remaining 983 // HTML files. 984 SaveNextFile(true); 985 } 986 } else if (in_process_count()) { 987 // Continue asking for HTML data. 988 DCHECK(wait_state_ == HTML_DATA); 989 } 990 } else { 991 // Save as HTML only or MHTML. 992 DCHECK(wait_state_ == NET_FILES); 993 DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 994 (save_type_ == SAVE_PAGE_TYPE_AS_MHTML)); 995 if (waiting_item_queue_.size()) { 996 DCHECK(all_save_items_count_ == waiting_item_queue_.size()); 997 SaveNextFile(false); 998 } 999 } 1000 } 1001 1002 bool SavePackage::OnMessageReceived(const IPC::Message& message) { 1003 bool handled = true; 1004 IPC_BEGIN_MESSAGE_MAP(SavePackage, message) 1005 IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks, 1006 OnReceivedSavableResourceLinksForCurrentPage) 1007 IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData, 1008 OnReceivedSerializedHtmlData) 1009 IPC_MESSAGE_UNHANDLED(handled = false) 1010 IPC_END_MESSAGE_MAP() 1011 return handled; 1012 } 1013 1014 // After finishing all SaveItems which need to get data from net. 1015 // We collect all URLs which have local storage and send the 1016 // map:(originalURL:currentLocalPath) to render process (backend). 1017 // Then render process will serialize DOM and send data to us. 1018 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() { 1019 if (wait_state_ != HTML_DATA) 1020 return; 1021 std::vector<GURL> saved_links; 1022 std::vector<base::FilePath> saved_file_paths; 1023 int successful_started_items_count = 0; 1024 1025 // Collect all saved items which have local storage. 1026 // First collect the status of all the resource files and check whether they 1027 // have created local files although they have not been completely saved. 1028 // If yes, the file can be saved. Otherwise, there is a disk error, so we 1029 // need to cancel the page saving job. 1030 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); 1031 it != in_progress_items_.end(); ++it) { 1032 DCHECK(it->second->save_source() == 1033 SaveFileCreateInfo::SAVE_FILE_FROM_DOM); 1034 if (it->second->has_final_name()) 1035 successful_started_items_count++; 1036 saved_links.push_back(it->second->url()); 1037 saved_file_paths.push_back(it->second->file_name()); 1038 } 1039 1040 // If not all file of HTML resource have been started, then wait. 1041 if (successful_started_items_count != in_process_count()) 1042 return; 1043 1044 // Collect all saved success items. 1045 for (SavedItemMap::iterator it = saved_success_items_.begin(); 1046 it != saved_success_items_.end(); ++it) { 1047 DCHECK(it->second->has_final_name()); 1048 saved_links.push_back(it->second->url()); 1049 saved_file_paths.push_back(it->second->file_name()); 1050 } 1051 1052 // Get the relative directory name. 1053 base::FilePath relative_dir_name = saved_main_directory_path_.BaseName(); 1054 1055 Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks( 1056 routing_id(), saved_links, saved_file_paths, relative_dir_name)); 1057 } 1058 1059 // Process the serialized HTML content data of a specified web page 1060 // retrieved from render process. 1061 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url, 1062 const std::string& data, 1063 int32 status) { 1064 WebPageSerializerClient::PageSerializationStatus flag = 1065 static_cast<WebPageSerializerClient::PageSerializationStatus>(status); 1066 // Check current state. 1067 if (wait_state_ != HTML_DATA) 1068 return; 1069 1070 int id = contents_id(); 1071 // If the all frames are finished saving, we need to close the 1072 // remaining SaveItems. 1073 if (flag == WebPageSerializerClient::AllFramesAreFinished) { 1074 for (SaveUrlItemMap::iterator it = in_progress_items_.begin(); 1075 it != in_progress_items_.end(); ++it) { 1076 VLOG(20) << " " << __FUNCTION__ << "()" 1077 << " save_id = " << it->second->save_id() 1078 << " url = \"" << it->second->url().spec() << "\""; 1079 BrowserThread::PostTask( 1080 BrowserThread::FILE, FROM_HERE, 1081 base::Bind(&SaveFileManager::SaveFinished, 1082 file_manager_, 1083 it->second->save_id(), 1084 it->second->url(), 1085 id, 1086 true)); 1087 } 1088 return; 1089 } 1090 1091 SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec()); 1092 if (it == in_progress_items_.end()) { 1093 for (SavedItemMap::iterator saved_it = saved_success_items_.begin(); 1094 saved_it != saved_success_items_.end(); ++saved_it) { 1095 if (saved_it->second->url() == frame_url) { 1096 wrote_to_completed_file_ = true; 1097 break; 1098 } 1099 } 1100 1101 it = saved_failed_items_.find(frame_url.spec()); 1102 if (it != saved_failed_items_.end()) 1103 wrote_to_failed_file_ = true; 1104 1105 return; 1106 } 1107 1108 SaveItem* save_item = it->second; 1109 DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM); 1110 1111 if (!data.empty()) { 1112 // Prepare buffer for saving HTML data. 1113 scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size())); 1114 memcpy(new_data->data(), data.data(), data.size()); 1115 1116 // Call write file functionality in file thread. 1117 BrowserThread::PostTask( 1118 BrowserThread::FILE, FROM_HERE, 1119 base::Bind(&SaveFileManager::UpdateSaveProgress, 1120 file_manager_, 1121 save_item->save_id(), 1122 new_data, 1123 static_cast<int>(data.size()))); 1124 } 1125 1126 // Current frame is completed saving, call finish in file thread. 1127 if (flag == WebPageSerializerClient::CurrentFrameIsFinished) { 1128 VLOG(20) << " " << __FUNCTION__ << "()" 1129 << " save_id = " << save_item->save_id() 1130 << " url = \"" << save_item->url().spec() << "\""; 1131 BrowserThread::PostTask( 1132 BrowserThread::FILE, FROM_HERE, 1133 base::Bind(&SaveFileManager::SaveFinished, 1134 file_manager_, 1135 save_item->save_id(), 1136 save_item->url(), 1137 id, 1138 true)); 1139 } 1140 } 1141 1142 // Ask for all savable resource links from backend, include main frame and 1143 // sub-frame. 1144 void SavePackage::GetAllSavableResourceLinksForCurrentPage() { 1145 if (wait_state_ != START_PROCESS) 1146 return; 1147 1148 wait_state_ = RESOURCES_LIST; 1149 Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(), 1150 page_url_)); 1151 } 1152 1153 // Give backend the lists which contain all resource links that have local 1154 // storage, after which, render process will serialize DOM for generating 1155 // HTML data. 1156 void SavePackage::OnReceivedSavableResourceLinksForCurrentPage( 1157 const std::vector<GURL>& resources_list, 1158 const std::vector<Referrer>& referrers_list, 1159 const std::vector<GURL>& frames_list) { 1160 if (wait_state_ != RESOURCES_LIST) 1161 return; 1162 1163 if (resources_list.size() != referrers_list.size()) 1164 return; 1165 1166 all_save_items_count_ = static_cast<int>(resources_list.size()) + 1167 static_cast<int>(frames_list.size()); 1168 1169 // We use total bytes as the total number of files we want to save. 1170 // Hack to avoid touching download_ after user cancel. 1171 // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem 1172 // with SavePackage flow. 1173 if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) 1174 download_->SetTotalBytes(all_save_items_count_); 1175 1176 if (all_save_items_count_) { 1177 // Put all sub-resources to wait list. 1178 for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) { 1179 const GURL& u = resources_list[i]; 1180 DCHECK(u.is_valid()); 1181 SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ? 1182 SaveFileCreateInfo::SAVE_FILE_FROM_FILE : 1183 SaveFileCreateInfo::SAVE_FILE_FROM_NET; 1184 SaveItem* save_item = new SaveItem(u, referrers_list[i], 1185 this, save_source); 1186 waiting_item_queue_.push(save_item); 1187 } 1188 // Put all HTML resources to wait list. 1189 for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) { 1190 const GURL& u = frames_list[i]; 1191 DCHECK(u.is_valid()); 1192 SaveItem* save_item = new SaveItem( 1193 u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM); 1194 waiting_item_queue_.push(save_item); 1195 } 1196 wait_state_ = NET_FILES; 1197 DoSavingProcess(); 1198 } else { 1199 // No resource files need to be saved, treat it as user cancel. 1200 Cancel(true); 1201 } 1202 } 1203 1204 base::FilePath SavePackage::GetSuggestedNameForSaveAs( 1205 bool can_save_as_complete, 1206 const std::string& contents_mime_type, 1207 const std::string& accept_langs) { 1208 base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_); 1209 1210 // If the page's title matches its URL, use the URL. Try to use the last path 1211 // component or if there is none, the domain as the file name. 1212 // Normally we want to base the filename on the page title, or if it doesn't 1213 // exist, on the URL. It's not easy to tell if the page has no title, because 1214 // if the page has no title, WebContents::GetTitle() will return the page's 1215 // URL (adjusted for display purposes). Therefore, we convert the "title" 1216 // back to a URL, and if it matches the original page URL, we know the page 1217 // had no title (or had a title equal to its URL, which is fine to treat 1218 // similarly). 1219 if (title_ == net::FormatUrl(page_url_, accept_langs)) { 1220 std::string url_path; 1221 if (!page_url_.SchemeIs(url::kDataScheme)) { 1222 std::vector<std::string> url_parts; 1223 base::SplitString(page_url_.path(), '/', &url_parts); 1224 if (!url_parts.empty()) { 1225 for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) { 1226 url_path = url_parts[i]; 1227 if (!url_path.empty()) 1228 break; 1229 } 1230 } 1231 if (url_path.empty()) 1232 url_path = page_url_.host(); 1233 } else { 1234 url_path = "dataurl"; 1235 } 1236 name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path); 1237 } 1238 1239 // Ask user for getting final saving name. 1240 name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext, 1241 contents_mime_type); 1242 // Adjust extension for complete types. 1243 if (can_save_as_complete) 1244 name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext); 1245 1246 base::FilePath::StringType file_name = name_with_proper_ext.value(); 1247 base::i18n::ReplaceIllegalCharactersInPath(&file_name, ' '); 1248 return base::FilePath(file_name); 1249 } 1250 1251 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) { 1252 // If the file name doesn't have an extension suitable for HTML files, 1253 // append one. 1254 base::FilePath::StringType ext = name.Extension(); 1255 if (!ext.empty()) 1256 ext.erase(ext.begin()); // Erase preceding '.'. 1257 std::string mime_type; 1258 if (!net::GetMimeTypeFromExtension(ext, &mime_type) || 1259 !CanSaveAsComplete(mime_type)) { 1260 return base::FilePath(name.value() + FILE_PATH_LITERAL(".") + 1261 kDefaultHtmlExtension); 1262 } 1263 return name; 1264 } 1265 1266 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name, 1267 const std::string& contents_mime_type) { 1268 // Start extension at 1 to skip over period if non-empty. 1269 base::FilePath::StringType ext = name.Extension().length() ? 1270 name.Extension().substr(1) : name.Extension(); 1271 base::FilePath::StringType suggested_extension = 1272 ExtensionForMimeType(contents_mime_type); 1273 std::string mime_type; 1274 if (!suggested_extension.empty() && 1275 !net::GetMimeTypeFromExtension(ext, &mime_type)) { 1276 // Extension is absent or needs to be updated. 1277 return base::FilePath(name.value() + FILE_PATH_LITERAL(".") + 1278 suggested_extension); 1279 } 1280 return name; 1281 } 1282 1283 const base::FilePath::CharType* SavePackage::ExtensionForMimeType( 1284 const std::string& contents_mime_type) { 1285 static const struct { 1286 const base::FilePath::CharType *mime_type; 1287 const base::FilePath::CharType *suggested_extension; 1288 } extensions[] = { 1289 { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension }, 1290 { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") }, 1291 { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") }, 1292 { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") }, 1293 { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") }, 1294 }; 1295 #if defined(OS_POSIX) 1296 base::FilePath::StringType mime_type(contents_mime_type); 1297 #elif defined(OS_WIN) 1298 base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type)); 1299 #endif // OS_WIN 1300 for (uint32 i = 0; i < ARRAYSIZE_UNSAFE(extensions); ++i) { 1301 if (mime_type == extensions[i].mime_type) 1302 return extensions[i].suggested_extension; 1303 } 1304 return FILE_PATH_LITERAL(""); 1305 } 1306 1307 void SavePackage::GetSaveInfo() { 1308 // Can't use web_contents_ in the file thread, so get the data that we need 1309 // before calling to it. 1310 base::FilePath website_save_dir, download_save_dir; 1311 bool skip_dir_check = false; 1312 DCHECK(download_manager_); 1313 if (download_manager_->GetDelegate()) { 1314 download_manager_->GetDelegate()->GetSaveDir( 1315 web_contents()->GetBrowserContext(), &website_save_dir, 1316 &download_save_dir, &skip_dir_check); 1317 } 1318 std::string mime_type = web_contents()->GetContentsMimeType(); 1319 std::string accept_languages = 1320 GetContentClient()->browser()->GetAcceptLangs( 1321 web_contents()->GetBrowserContext()); 1322 1323 BrowserThread::PostTask( 1324 BrowserThread::FILE, FROM_HERE, 1325 base::Bind(&SavePackage::CreateDirectoryOnFileThread, this, 1326 website_save_dir, download_save_dir, skip_dir_check, 1327 mime_type, accept_languages)); 1328 } 1329 1330 void SavePackage::CreateDirectoryOnFileThread( 1331 const base::FilePath& website_save_dir, 1332 const base::FilePath& download_save_dir, 1333 bool skip_dir_check, 1334 const std::string& mime_type, 1335 const std::string& accept_langs) { 1336 base::FilePath save_dir; 1337 // If the default html/websites save folder doesn't exist... 1338 // We skip the directory check for gdata directories on ChromeOS. 1339 if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) { 1340 // If the default download dir doesn't exist, create it. 1341 if (!base::DirectoryExists(download_save_dir)) { 1342 bool res = base::CreateDirectory(download_save_dir); 1343 DCHECK(res); 1344 } 1345 save_dir = download_save_dir; 1346 } else { 1347 // If it does exist, use the default save dir param. 1348 save_dir = website_save_dir; 1349 } 1350 1351 bool can_save_as_complete = CanSaveAsComplete(mime_type); 1352 base::FilePath suggested_filename = GetSuggestedNameForSaveAs( 1353 can_save_as_complete, mime_type, accept_langs); 1354 base::FilePath::StringType pure_file_name = 1355 suggested_filename.RemoveExtension().BaseName().value(); 1356 base::FilePath::StringType file_name_ext = suggested_filename.Extension(); 1357 1358 // Need to make sure the suggested file name is not too long. 1359 uint32 max_path = GetMaxPathLengthForDirectory(save_dir); 1360 1361 if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) { 1362 save_dir = save_dir.Append(pure_file_name + file_name_ext); 1363 } else { 1364 // Cannot create a shorter filename. This will cause the save as operation 1365 // to fail unless the user pick a shorter name. Continuing even though it 1366 // will fail because returning means no save as popup for the user, which 1367 // is even more confusing. This case should be rare though. 1368 save_dir = save_dir.Append(suggested_filename); 1369 } 1370 1371 BrowserThread::PostTask( 1372 BrowserThread::UI, FROM_HERE, 1373 base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir, 1374 can_save_as_complete)); 1375 } 1376 1377 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path, 1378 bool can_save_as_complete) { 1379 1380 // The WebContents which owns this SavePackage may have disappeared during 1381 // the UI->FILE->UI thread hop of 1382 // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo. 1383 if (!web_contents() || !download_manager_->GetDelegate()) 1384 return; 1385 1386 base::FilePath::StringType default_extension; 1387 if (can_save_as_complete) 1388 default_extension = kDefaultHtmlExtension; 1389 1390 download_manager_->GetDelegate()->ChooseSavePath( 1391 web_contents(), 1392 suggested_path, 1393 default_extension, 1394 can_save_as_complete, 1395 base::Bind(&SavePackage::OnPathPicked, AsWeakPtr())); 1396 } 1397 1398 void SavePackage::OnPathPicked( 1399 const base::FilePath& final_name, 1400 SavePageType type, 1401 const SavePackageDownloadCreatedCallback& download_created_callback) { 1402 DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) || 1403 (type == SAVE_PAGE_TYPE_AS_MHTML) || 1404 (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type; 1405 // Ensure the filename is safe. 1406 saved_main_file_path_ = final_name; 1407 // TODO(asanka): This call may block on IO and shouldn't be made 1408 // from the UI thread. See http://crbug.com/61827. 1409 net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false, 1410 &saved_main_file_path_); 1411 1412 saved_main_directory_path_ = saved_main_file_path_.DirName(); 1413 save_type_ = type; 1414 if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) { 1415 // Make new directory for saving complete file. 1416 saved_main_directory_path_ = saved_main_directory_path_.Append( 1417 saved_main_file_path_.RemoveExtension().BaseName().value() + 1418 FILE_PATH_LITERAL("_files")); 1419 } 1420 1421 Init(download_created_callback); 1422 } 1423 1424 void SavePackage::StopObservation() { 1425 DCHECK(download_); 1426 DCHECK(download_manager_); 1427 1428 download_->RemoveObserver(this); 1429 download_ = NULL; 1430 download_manager_ = NULL; 1431 } 1432 1433 void SavePackage::OnDownloadDestroyed(DownloadItem* download) { 1434 StopObservation(); 1435 } 1436 1437 void SavePackage::FinalizeDownloadEntry() { 1438 DCHECK(download_); 1439 DCHECK(download_manager_); 1440 1441 download_manager_->OnSavePackageSuccessfullyFinished(download_); 1442 StopObservation(); 1443 } 1444 1445 } // namespace content 1446