Home | History | Annotate | Download | only in download
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "content/browser/download/save_package.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/bind.h"
     10 #include "base/file_util.h"
     11 #include "base/files/file_path.h"
     12 #include "base/i18n/file_util_icu.h"
     13 #include "base/logging.h"
     14 #include "base/message_loop/message_loop.h"
     15 #include "base/stl_util.h"
     16 #include "base/strings/string_piece.h"
     17 #include "base/strings/string_split.h"
     18 #include "base/strings/sys_string_conversions.h"
     19 #include "base/strings/utf_string_conversions.h"
     20 #include "base/threading/thread.h"
     21 #include "content/browser/download/download_item_impl.h"
     22 #include "content/browser/download/download_manager_impl.h"
     23 #include "content/browser/download/download_stats.h"
     24 #include "content/browser/download/save_file.h"
     25 #include "content/browser/download/save_file_manager.h"
     26 #include "content/browser/download/save_item.h"
     27 #include "content/browser/loader/resource_dispatcher_host_impl.h"
     28 #include "content/browser/renderer_host/render_process_host_impl.h"
     29 #include "content/browser/renderer_host/render_view_host_delegate.h"
     30 #include "content/browser/renderer_host/render_view_host_impl.h"
     31 #include "content/common/view_messages.h"
     32 #include "content/public/browser/browser_context.h"
     33 #include "content/public/browser/browser_thread.h"
     34 #include "content/public/browser/content_browser_client.h"
     35 #include "content/public/browser/download_manager_delegate.h"
     36 #include "content/public/browser/navigation_entry.h"
     37 #include "content/public/browser/notification_service.h"
     38 #include "content/public/browser/notification_types.h"
     39 #include "content/public/browser/resource_context.h"
     40 #include "content/public/browser/web_contents.h"
     41 #include "net/base/filename_util.h"
     42 #include "net/base/io_buffer.h"
     43 #include "net/base/mime_util.h"
     44 #include "net/url_request/url_request_context.h"
     45 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
     46 #include "url/url_constants.h"
     47 
     48 using base::Time;
     49 using blink::WebPageSerializerClient;
     50 
     51 namespace content {
     52 namespace {
     53 
     54 // A counter for uniquely identifying each save package.
     55 int g_save_package_id = 0;
     56 
     57 // Default name which will be used when we can not get proper name from
     58 // resource URL.
     59 const char kDefaultSaveName[] = "saved_resource";
     60 
     61 // Maximum number of file ordinal number. I think it's big enough for resolving
     62 // name-conflict files which has same base file name.
     63 const int32 kMaxFileOrdinalNumber = 9999;
     64 
     65 // Maximum length for file path. Since Windows have MAX_PATH limitation for
     66 // file path, we need to make sure length of file path of every saved file
     67 // is less than MAX_PATH
     68 #if defined(OS_WIN)
     69 const uint32 kMaxFilePathLength = MAX_PATH - 1;
     70 #elif defined(OS_POSIX)
     71 const uint32 kMaxFilePathLength = PATH_MAX - 1;
     72 #endif
     73 
     74 // Maximum length for file ordinal number part. Since we only support the
     75 // maximum 9999 for ordinal number, which means maximum file ordinal number part
     76 // should be "(9998)", so the value is 6.
     77 const uint32 kMaxFileOrdinalNumberPartLength = 6;
     78 
     79 // Strip current ordinal number, if any. Should only be used on pure
     80 // file names, i.e. those stripped of their extensions.
     81 // TODO(estade): improve this to not choke on alternate encodings.
     82 base::FilePath::StringType StripOrdinalNumber(
     83     const base::FilePath::StringType& pure_file_name) {
     84   base::FilePath::StringType::size_type r_paren_index =
     85       pure_file_name.rfind(FILE_PATH_LITERAL(')'));
     86   base::FilePath::StringType::size_type l_paren_index =
     87       pure_file_name.rfind(FILE_PATH_LITERAL('('));
     88   if (l_paren_index >= r_paren_index)
     89     return pure_file_name;
     90 
     91   for (base::FilePath::StringType::size_type i = l_paren_index + 1;
     92        i != r_paren_index; ++i) {
     93     if (!IsAsciiDigit(pure_file_name[i]))
     94       return pure_file_name;
     95   }
     96 
     97   return pure_file_name.substr(0, l_paren_index);
     98 }
     99 
    100 // Check whether we can save page as complete-HTML for the contents which
    101 // have specified a MIME type. Now only contents which have the MIME type
    102 // "text/html" can be saved as complete-HTML.
    103 bool CanSaveAsComplete(const std::string& contents_mime_type) {
    104   return contents_mime_type == "text/html" ||
    105          contents_mime_type == "application/xhtml+xml";
    106 }
    107 
    108 // Request handle for SavePackage downloads. Currently doesn't support
    109 // pause/resume/cancel, but returns a WebContents.
    110 class SavePackageRequestHandle : public DownloadRequestHandleInterface {
    111  public:
    112   SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)
    113       : save_package_(save_package) {}
    114 
    115   // DownloadRequestHandleInterface
    116   virtual WebContents* GetWebContents() const OVERRIDE {
    117     return save_package_.get() ? save_package_->web_contents() : NULL;
    118   }
    119   virtual DownloadManager* GetDownloadManager() const OVERRIDE {
    120     return NULL;
    121   }
    122   virtual void PauseRequest() const OVERRIDE {}
    123   virtual void ResumeRequest() const OVERRIDE {}
    124   virtual void CancelRequest() const OVERRIDE {}
    125   virtual std::string DebugString() const OVERRIDE {
    126     return "SavePackage DownloadRequestHandle";
    127   }
    128 
    129  private:
    130   base::WeakPtr<SavePackage> save_package_;
    131 };
    132 
    133 }  // namespace
    134 
    135 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
    136 #if defined(OS_WIN)
    137     FILE_PATH_LITERAL("htm");
    138 #else
    139     FILE_PATH_LITERAL("html");
    140 #endif
    141 
    142 SavePackage::SavePackage(WebContents* web_contents,
    143                          SavePageType save_type,
    144                          const base::FilePath& file_full_path,
    145                          const base::FilePath& directory_full_path)
    146     : WebContentsObserver(web_contents),
    147       file_manager_(NULL),
    148       download_manager_(NULL),
    149       download_(NULL),
    150       page_url_(GetUrlToBeSaved()),
    151       saved_main_file_path_(file_full_path),
    152       saved_main_directory_path_(directory_full_path),
    153       title_(web_contents->GetTitle()),
    154       start_tick_(base::TimeTicks::Now()),
    155       finished_(false),
    156       mhtml_finishing_(false),
    157       user_canceled_(false),
    158       disk_error_occurred_(false),
    159       save_type_(save_type),
    160       all_save_items_count_(0),
    161       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
    162       wait_state_(INITIALIZE),
    163       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
    164       unique_id_(g_save_package_id++),
    165       wrote_to_completed_file_(false),
    166       wrote_to_failed_file_(false) {
    167   DCHECK(page_url_.is_valid());
    168   DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
    169          (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
    170          (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
    171   DCHECK(!saved_main_file_path_.empty() &&
    172          saved_main_file_path_.value().length() <= kMaxFilePathLength);
    173   DCHECK(!saved_main_directory_path_.empty() &&
    174          saved_main_directory_path_.value().length() < kMaxFilePathLength);
    175   InternalInit();
    176 }
    177 
    178 SavePackage::SavePackage(WebContents* web_contents)
    179     : WebContentsObserver(web_contents),
    180       file_manager_(NULL),
    181       download_manager_(NULL),
    182       download_(NULL),
    183       page_url_(GetUrlToBeSaved()),
    184       title_(web_contents->GetTitle()),
    185       start_tick_(base::TimeTicks::Now()),
    186       finished_(false),
    187       mhtml_finishing_(false),
    188       user_canceled_(false),
    189       disk_error_occurred_(false),
    190       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
    191       all_save_items_count_(0),
    192       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
    193       wait_state_(INITIALIZE),
    194       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
    195       unique_id_(g_save_package_id++),
    196       wrote_to_completed_file_(false),
    197       wrote_to_failed_file_(false) {
    198   DCHECK(page_url_.is_valid());
    199   InternalInit();
    200 }
    201 
    202 // This is for testing use. Set |finished_| as true because we don't want
    203 // method Cancel to be be called in destructor in test mode.
    204 // We also don't call InternalInit().
    205 SavePackage::SavePackage(WebContents* web_contents,
    206                          const base::FilePath& file_full_path,
    207                          const base::FilePath& directory_full_path)
    208     : WebContentsObserver(web_contents),
    209       file_manager_(NULL),
    210       download_manager_(NULL),
    211       download_(NULL),
    212       saved_main_file_path_(file_full_path),
    213       saved_main_directory_path_(directory_full_path),
    214       start_tick_(base::TimeTicks::Now()),
    215       finished_(true),
    216       mhtml_finishing_(false),
    217       user_canceled_(false),
    218       disk_error_occurred_(false),
    219       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
    220       all_save_items_count_(0),
    221       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
    222       wait_state_(INITIALIZE),
    223       contents_id_(0),
    224       unique_id_(g_save_package_id++),
    225       wrote_to_completed_file_(false),
    226       wrote_to_failed_file_(false) {
    227 }
    228 
    229 SavePackage::~SavePackage() {
    230   // Stop receiving saving job's updates
    231   if (!finished_ && !canceled()) {
    232     // Unexpected quit.
    233     Cancel(true);
    234   }
    235 
    236   // We should no longer be observing the DownloadItem at this point.
    237   CHECK(!download_);
    238 
    239   DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
    240                                    completed_count() +
    241                                    in_process_count()));
    242   // Free all SaveItems.
    243   while (!waiting_item_queue_.empty()) {
    244     // We still have some items which are waiting for start to save.
    245     SaveItem* save_item = waiting_item_queue_.front();
    246     waiting_item_queue_.pop();
    247     delete save_item;
    248   }
    249 
    250   STLDeleteValues(&saved_success_items_);
    251   STLDeleteValues(&in_progress_items_);
    252   STLDeleteValues(&saved_failed_items_);
    253 
    254   file_manager_ = NULL;
    255 }
    256 
    257 GURL SavePackage::GetUrlToBeSaved() {
    258   // Instead of using web_contents_.GetURL here, we use url() (which is the
    259   // "real" url of the page) from the NavigationEntry because it reflects its
    260   // origin rather than the displayed one (returned by GetURL) which may be
    261   // different (like having "view-source:" on the front).
    262   NavigationEntry* visible_entry =
    263       web_contents()->GetController().GetVisibleEntry();
    264   return visible_entry->GetURL();
    265 }
    266 
    267 void SavePackage::Cancel(bool user_action) {
    268   if (!canceled()) {
    269     if (user_action)
    270       user_canceled_ = true;
    271     else
    272       disk_error_occurred_ = true;
    273     Stop();
    274   }
    275   RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED);
    276 }
    277 
    278 // Init() can be called directly, or indirectly via GetSaveInfo(). In both
    279 // cases, we need file_manager_ to be initialized, so we do this first.
    280 void SavePackage::InternalInit() {
    281   ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get();
    282   if (!rdh) {
    283     NOTREACHED();
    284     return;
    285   }
    286 
    287   file_manager_ = rdh->save_file_manager();
    288   DCHECK(file_manager_);
    289 
    290   download_manager_ = static_cast<DownloadManagerImpl*>(
    291       BrowserContext::GetDownloadManager(
    292           web_contents()->GetBrowserContext()));
    293   DCHECK(download_manager_);
    294 
    295   RecordSavePackageEvent(SAVE_PACKAGE_STARTED);
    296 }
    297 
    298 bool SavePackage::Init(
    299     const SavePackageDownloadCreatedCallback& download_created_callback) {
    300   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    301   // Set proper running state.
    302   if (wait_state_ != INITIALIZE)
    303     return false;
    304 
    305   wait_state_ = START_PROCESS;
    306 
    307   // Initialize the request context and resource dispatcher.
    308   BrowserContext* browser_context = web_contents()->GetBrowserContext();
    309   if (!browser_context) {
    310     NOTREACHED();
    311     return false;
    312   }
    313 
    314   scoped_ptr<DownloadRequestHandleInterface> request_handle(
    315       new SavePackageRequestHandle(AsWeakPtr()));
    316   // The download manager keeps ownership but adds us as an observer.
    317   download_manager_->CreateSavePackageDownloadItem(
    318       saved_main_file_path_,
    319       page_url_,
    320       ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ?
    321        "multipart/related" : "text/html"),
    322       request_handle.Pass(),
    323       base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(),
    324                  download_created_callback));
    325   return true;
    326 }
    327 
    328 void SavePackage::InitWithDownloadItem(
    329     const SavePackageDownloadCreatedCallback& download_created_callback,
    330     DownloadItemImpl* item) {
    331   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    332   DCHECK(item);
    333   download_ = item;
    334   download_->AddObserver(this);
    335   // Confirm above didn't delete the tab out from under us.
    336   if (!download_created_callback.is_null())
    337     download_created_callback.Run(download_);
    338 
    339   // Check save type and process the save page job.
    340   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
    341     // Get directory
    342     DCHECK(!saved_main_directory_path_.empty());
    343     GetAllSavableResourceLinksForCurrentPage();
    344   } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
    345     web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
    346         &SavePackage::OnMHTMLGenerated, this));
    347   } else {
    348     DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
    349     wait_state_ = NET_FILES;
    350     SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
    351         SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
    352         SaveFileCreateInfo::SAVE_FILE_FROM_NET;
    353     SaveItem* save_item = new SaveItem(page_url_,
    354                                        Referrer(),
    355                                        this,
    356                                        save_source);
    357     // Add this item to waiting list.
    358     waiting_item_queue_.push(save_item);
    359     all_save_items_count_ = 1;
    360     download_->SetTotalBytes(1);
    361 
    362     DoSavingProcess();
    363   }
    364 }
    365 
    366 void SavePackage::OnMHTMLGenerated(int64 size) {
    367   if (size <= 0) {
    368     Cancel(false);
    369     return;
    370   }
    371   wrote_to_completed_file_ = true;
    372 
    373   // Hack to avoid touching download_ after user cancel.
    374   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
    375   // with SavePackage flow.
    376   if (download_->GetState() == DownloadItem::IN_PROGRESS) {
    377     download_->SetTotalBytes(size);
    378     download_->DestinationUpdate(size, 0, std::string());
    379     // Must call OnAllDataSaved here in order for
    380     // GDataDownloadObserver::ShouldUpload() to return true.
    381     // ShouldCompleteDownload() may depend on the gdata uploader to finish.
    382     download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
    383   }
    384 
    385   if (!download_manager_->GetDelegate()) {
    386     Finish();
    387     return;
    388   }
    389 
    390   if (download_manager_->GetDelegate()->ShouldCompleteDownload(
    391           download_, base::Bind(&SavePackage::Finish, this))) {
    392     Finish();
    393   }
    394 }
    395 
    396 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further
    397 // restricted by NAME_MAX. The maximum allowed path looks like:
    398 // '/path/to/save_dir' + '/' + NAME_MAX.
    399 uint32 SavePackage::GetMaxPathLengthForDirectory(
    400     const base::FilePath& base_dir) {
    401 #if defined(OS_POSIX)
    402   return std::min(kMaxFilePathLength,
    403                   static_cast<uint32>(base_dir.value().length()) +
    404                   NAME_MAX + 1);
    405 #else
    406   return kMaxFilePathLength;
    407 #endif
    408 }
    409 
    410 // File name is considered being consist of pure file name, dot and file
    411 // extension name. File name might has no dot and file extension, or has
    412 // multiple dot inside file name. The dot, which separates the pure file
    413 // name and file extension name, is last dot in the whole file name.
    414 // This function is for making sure the length of specified file path is not
    415 // great than the specified maximum length of file path and getting safe pure
    416 // file name part if the input pure file name is too long.
    417 // The parameter |dir_path| specifies directory part of the specified
    418 // file path. The parameter |file_name_ext| specifies file extension
    419 // name part of the specified file path (including start dot). The parameter
    420 // |max_file_path_len| specifies maximum length of the specified file path.
    421 // The parameter |pure_file_name| input pure file name part of the specified
    422 // file path. If the length of specified file path is great than
    423 // |max_file_path_len|, the |pure_file_name| will output new pure file name
    424 // part for making sure the length of specified file path is less than
    425 // specified maximum length of file path. Return false if the function can
    426 // not get a safe pure file name, otherwise it returns true.
    427 bool SavePackage::GetSafePureFileName(
    428     const base::FilePath& dir_path,
    429     const base::FilePath::StringType& file_name_ext,
    430     uint32 max_file_path_len,
    431     base::FilePath::StringType* pure_file_name) {
    432   DCHECK(!pure_file_name->empty());
    433   int available_length = static_cast<int>(max_file_path_len -
    434                                           dir_path.value().length() -
    435                                           file_name_ext.length());
    436   // Need an extra space for the separator.
    437   if (!dir_path.EndsWithSeparator())
    438     --available_length;
    439 
    440   // Plenty of room.
    441   if (static_cast<int>(pure_file_name->length()) <= available_length)
    442     return true;
    443 
    444   // Limited room. Truncate |pure_file_name| to fit.
    445   if (available_length > 0) {
    446     *pure_file_name = pure_file_name->substr(0, available_length);
    447     return true;
    448   }
    449 
    450   // Not enough room to even use a shortened |pure_file_name|.
    451   pure_file_name->clear();
    452   return false;
    453 }
    454 
    455 // Generate name for saving resource.
    456 bool SavePackage::GenerateFileName(const std::string& disposition,
    457                                    const GURL& url,
    458                                    bool need_html_ext,
    459                                    base::FilePath::StringType* generated_name) {
    460   // TODO(jungshik): Figure out the referrer charset when having one
    461   // makes sense and pass it to GenerateFileName.
    462   base::FilePath file_path = net::GenerateFileName(url,
    463                                                    disposition,
    464                                                    std::string(),
    465                                                    std::string(),
    466                                                    std::string(),
    467                                                    kDefaultSaveName);
    468 
    469   DCHECK(!file_path.empty());
    470   base::FilePath::StringType pure_file_name =
    471       file_path.RemoveExtension().BaseName().value();
    472   base::FilePath::StringType file_name_ext = file_path.Extension();
    473 
    474   // If it is HTML resource, use ".htm{l,}" as its extension.
    475   if (need_html_ext) {
    476     file_name_ext = FILE_PATH_LITERAL(".");
    477     file_name_ext.append(kDefaultHtmlExtension);
    478   }
    479 
    480   // Need to make sure the suggested file name is not too long.
    481   uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
    482 
    483   // Get safe pure file name.
    484   if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
    485                            max_path, &pure_file_name))
    486     return false;
    487 
    488   base::FilePath::StringType file_name = pure_file_name + file_name_ext;
    489 
    490   // Check whether we already have same name in a case insensitive manner.
    491   FileNameSet::const_iterator iter = file_name_set_.find(file_name);
    492   if (iter == file_name_set_.end()) {
    493     file_name_set_.insert(file_name);
    494   } else {
    495     // Found same name, increase the ordinal number for the file name.
    496     pure_file_name =
    497         base::FilePath(*iter).RemoveExtension().BaseName().value();
    498     base::FilePath::StringType base_file_name =
    499         StripOrdinalNumber(pure_file_name);
    500 
    501     // We need to make sure the length of base file name plus maximum ordinal
    502     // number path will be less than or equal to kMaxFilePathLength.
    503     if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
    504         max_path - kMaxFileOrdinalNumberPartLength, &base_file_name))
    505       return false;
    506 
    507     // Prepare the new ordinal number.
    508     uint32 ordinal_number;
    509     FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name);
    510     if (it == file_name_count_map_.end()) {
    511       // First base-name-conflict resolving, use 1 as initial ordinal number.
    512       file_name_count_map_[base_file_name] = 1;
    513       ordinal_number = 1;
    514     } else {
    515       // We have met same base-name conflict, use latest ordinal number.
    516       ordinal_number = it->second;
    517     }
    518 
    519     if (ordinal_number > (kMaxFileOrdinalNumber - 1)) {
    520       // Use a random file from temporary file.
    521       base::FilePath temp_file;
    522       base::CreateTemporaryFile(&temp_file);
    523       file_name = temp_file.RemoveExtension().BaseName().value();
    524       // Get safe pure file name.
    525       if (!GetSafePureFileName(saved_main_directory_path_,
    526                                base::FilePath::StringType(),
    527                                max_path, &file_name))
    528         return false;
    529     } else {
    530       for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
    531         base::FilePath::StringType new_name = base_file_name +
    532             base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext;
    533         if (file_name_set_.find(new_name) == file_name_set_.end()) {
    534           // Resolved name conflict.
    535           file_name = new_name;
    536           file_name_count_map_[base_file_name] = ++i;
    537           break;
    538         }
    539       }
    540     }
    541 
    542     file_name_set_.insert(file_name);
    543   }
    544 
    545   DCHECK(!file_name.empty());
    546   generated_name->assign(file_name);
    547 
    548   return true;
    549 }
    550 
    551 // We have received a message from SaveFileManager about a new saving job. We
    552 // create a SaveItem and store it in our in_progress list.
    553 void SavePackage::StartSave(const SaveFileCreateInfo* info) {
    554   DCHECK(info && !info->url.is_empty());
    555 
    556   SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec());
    557   if (it == in_progress_items_.end()) {
    558     // If not found, we must have cancel action.
    559     DCHECK(canceled());
    560     return;
    561   }
    562   SaveItem* save_item = it->second;
    563 
    564   DCHECK(!saved_main_file_path_.empty());
    565 
    566   save_item->SetSaveId(info->save_id);
    567   save_item->SetTotalBytes(info->total_bytes);
    568 
    569   // Determine the proper path for a saving job, by choosing either the default
    570   // save directory, or prompting the user.
    571   DCHECK(!save_item->has_final_name());
    572   if (info->url != page_url_) {
    573     base::FilePath::StringType generated_name;
    574     // For HTML resource file, make sure it will have .htm as extension name,
    575     // otherwise, when you open the saved page in Chrome again, download
    576     // file manager will treat it as downloadable resource, and download it
    577     // instead of opening it as HTML.
    578     bool need_html_ext =
    579         info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
    580     if (!GenerateFileName(info->content_disposition,
    581                           GURL(info->url),
    582                           need_html_ext,
    583                           &generated_name)) {
    584       // We can not generate file name for this SaveItem, so we cancel the
    585       // saving page job if the save source is from serialized DOM data.
    586       // Otherwise, it means this SaveItem is sub-resource type, we treat it
    587       // as an error happened on saving. We can ignore this type error for
    588       // sub-resource links which will be resolved as absolute links instead
    589       // of local links in final saved contents.
    590       if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
    591         Cancel(true);
    592       else
    593         SaveFinished(save_item->save_id(), 0, false);
    594       return;
    595     }
    596 
    597     // When saving page as only-HTML, we only have a SaveItem whose url
    598     // must be page_url_.
    599     DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
    600     DCHECK(!saved_main_directory_path_.empty());
    601 
    602     // Now we get final name retrieved from GenerateFileName, we will use it
    603     // rename the SaveItem.
    604     base::FilePath final_name =
    605         saved_main_directory_path_.Append(generated_name);
    606     save_item->Rename(final_name);
    607   } else {
    608     // It is the main HTML file, use the name chosen by the user.
    609     save_item->Rename(saved_main_file_path_);
    610   }
    611 
    612   // If the save source is from file system, inform SaveFileManager to copy
    613   // corresponding file to the file path which this SaveItem specifies.
    614   if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) {
    615     BrowserThread::PostTask(
    616         BrowserThread::FILE, FROM_HERE,
    617         base::Bind(&SaveFileManager::SaveLocalFile,
    618                    file_manager_,
    619                    save_item->url(),
    620                    save_item->save_id(),
    621                    contents_id()));
    622     return;
    623   }
    624 
    625   // Check whether we begin to require serialized HTML data.
    626   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
    627       wait_state_ == HTML_DATA) {
    628     // Inform backend to serialize the all frames' DOM and send serialized
    629     // HTML data back.
    630     GetSerializedHtmlDataForCurrentPageWithLocalLinks();
    631   }
    632 }
    633 
    634 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) {
    635   if (in_process_count()) {
    636     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
    637         it != in_progress_items_.end(); ++it) {
    638       SaveItem* save_item = it->second;
    639       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
    640       if (save_item->save_id() == save_id)
    641         return save_item;
    642     }
    643   }
    644   return NULL;
    645 }
    646 
    647 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
    648   SaveUrlItemMap::iterator it = in_progress_items_.find(
    649       save_item->url().spec());
    650   DCHECK(it != in_progress_items_.end());
    651   DCHECK(save_item == it->second);
    652   in_progress_items_.erase(it);
    653 
    654   if (save_item->success()) {
    655     // Add it to saved_success_items_.
    656     DCHECK(saved_success_items_.find(save_item->save_id()) ==
    657            saved_success_items_.end());
    658     saved_success_items_[save_item->save_id()] = save_item;
    659   } else {
    660     // Add it to saved_failed_items_.
    661     DCHECK(saved_failed_items_.find(save_item->url().spec()) ==
    662            saved_failed_items_.end());
    663     saved_failed_items_[save_item->url().spec()] = save_item;
    664   }
    665 }
    666 
    667 // Called for updating saving state.
    668 bool SavePackage::UpdateSaveProgress(int32 save_id,
    669                                      int64 size,
    670                                      bool write_success) {
    671   // Because we might have canceled this saving job before,
    672   // so we might not find corresponding SaveItem.
    673   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
    674   if (!save_item)
    675     return false;
    676 
    677   save_item->Update(size);
    678 
    679   // If we got disk error, cancel whole save page job.
    680   if (!write_success) {
    681     // Cancel job with reason of disk error.
    682     Cancel(false);
    683   }
    684   return true;
    685 }
    686 
    687 // Stop all page saving jobs that are in progress and instruct the file thread
    688 // to delete all saved  files.
    689 void SavePackage::Stop() {
    690   // If we haven't moved out of the initial state, there's nothing to cancel and
    691   // there won't be valid pointers for file_manager_ or download_.
    692   if (wait_state_ == INITIALIZE)
    693     return;
    694 
    695   // When stopping, if it still has some items in in_progress, cancel them.
    696   DCHECK(canceled());
    697   if (in_process_count()) {
    698     SaveUrlItemMap::iterator it = in_progress_items_.begin();
    699     for (; it != in_progress_items_.end(); ++it) {
    700       SaveItem* save_item = it->second;
    701       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
    702       save_item->Cancel();
    703     }
    704     // Remove all in progress item to saved map. For failed items, they will
    705     // be put into saved_failed_items_, for successful item, they will be put
    706     // into saved_success_items_.
    707     while (in_process_count())
    708       PutInProgressItemToSavedMap(in_progress_items_.begin()->second);
    709   }
    710 
    711   // This vector contains the save ids of the save files which SaveFileManager
    712   // needs to remove from its save_file_map_.
    713   SaveIDList save_ids;
    714   for (SavedItemMap::iterator it = saved_success_items_.begin();
    715       it != saved_success_items_.end(); ++it)
    716     save_ids.push_back(it->first);
    717   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
    718       it != saved_failed_items_.end(); ++it)
    719     save_ids.push_back(it->second->save_id());
    720 
    721   BrowserThread::PostTask(
    722       BrowserThread::FILE, FROM_HERE,
    723       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
    724                  file_manager_,
    725                  save_ids));
    726 
    727   finished_ = true;
    728   wait_state_ = FAILED;
    729 
    730   // Inform the DownloadItem we have canceled whole save page job.
    731   if (download_) {
    732     download_->Cancel(false);
    733     FinalizeDownloadEntry();
    734   }
    735 }
    736 
    737 void SavePackage::CheckFinish() {
    738   if (in_process_count() || finished_)
    739     return;
    740 
    741   base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
    742                         saved_success_items_.size() > 1) ?
    743                         saved_main_directory_path_ : base::FilePath();
    744 
    745   // This vector contains the final names of all the successfully saved files
    746   // along with their save ids. It will be passed to SaveFileManager to do the
    747   // renaming job.
    748   FinalNameList final_names;
    749   for (SavedItemMap::iterator it = saved_success_items_.begin();
    750       it != saved_success_items_.end(); ++it)
    751     final_names.push_back(std::make_pair(it->first,
    752                                          it->second->full_path()));
    753 
    754   BrowserThread::PostTask(
    755       BrowserThread::FILE, FROM_HERE,
    756       base::Bind(&SaveFileManager::RenameAllFiles,
    757                  file_manager_,
    758                  final_names,
    759                  dir,
    760                  web_contents()->GetRenderProcessHost()->GetID(),
    761                  web_contents()->GetRenderViewHost()->GetRoutingID(),
    762                  id()));
    763 }
    764 
    765 // Successfully finished all items of this SavePackage.
    766 void SavePackage::Finish() {
    767   // User may cancel the job when we're moving files to the final directory.
    768   if (canceled())
    769     return;
    770 
    771   wait_state_ = SUCCESSFUL;
    772   finished_ = true;
    773 
    774   // Record finish.
    775   RecordSavePackageEvent(SAVE_PACKAGE_FINISHED);
    776 
    777   // Record any errors that occurred.
    778   if (wrote_to_completed_file_) {
    779     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED);
    780   }
    781 
    782   if (wrote_to_failed_file_) {
    783     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED);
    784   }
    785 
    786   // This vector contains the save ids of the save files which SaveFileManager
    787   // needs to remove from its save_file_map_.
    788   SaveIDList save_ids;
    789   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
    790        it != saved_failed_items_.end(); ++it)
    791     save_ids.push_back(it->second->save_id());
    792 
    793   BrowserThread::PostTask(
    794       BrowserThread::FILE, FROM_HERE,
    795       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
    796                  file_manager_,
    797                  save_ids));
    798 
    799   if (download_) {
    800     // Hack to avoid touching download_ after user cancel.
    801     // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
    802     // with SavePackage flow.
    803     if (download_->GetState() == DownloadItem::IN_PROGRESS) {
    804       if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
    805         download_->DestinationUpdate(
    806             all_save_items_count_, CurrentSpeed(), std::string());
    807         download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
    808       }
    809       download_->MarkAsComplete();
    810     }
    811     FinalizeDownloadEntry();
    812   }
    813 }
    814 
    815 // Called for updating end state.
    816 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) {
    817   // Because we might have canceled this saving job before,
    818   // so we might not find corresponding SaveItem. Just ignore it.
    819   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
    820   if (!save_item)
    821     return;
    822 
    823   // Let SaveItem set end state.
    824   save_item->Finish(size, is_success);
    825   // Remove the associated save id and SavePackage.
    826   file_manager_->RemoveSaveFile(save_id, save_item->url(), this);
    827 
    828   PutInProgressItemToSavedMap(save_item);
    829 
    830   // Inform the DownloadItem to update UI.
    831   // We use the received bytes as number of saved files.
    832   // Hack to avoid touching download_ after user cancel.
    833   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
    834   // with SavePackage flow.
    835   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
    836     download_->DestinationUpdate(
    837         completed_count(), CurrentSpeed(), std::string());
    838   }
    839 
    840   if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
    841       save_item->url() == page_url_ && !save_item->received_bytes()) {
    842     // If size of main HTML page is 0, treat it as disk error.
    843     Cancel(false);
    844     return;
    845   }
    846 
    847   if (canceled()) {
    848     DCHECK(finished_);
    849     return;
    850   }
    851 
    852   // Continue processing the save page job.
    853   DoSavingProcess();
    854 
    855   // Check whether we can successfully finish whole job.
    856   CheckFinish();
    857 }
    858 
    859 // Sometimes, the net io will only call SaveFileManager::SaveFinished with
    860 // save id -1 when it encounters error. Since in this case, save id will be
    861 // -1, so we can only use URL to find which SaveItem is associated with
    862 // this error.
    863 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes
    864 // from serializing HTML data, then cancel saving page.
    865 void SavePackage::SaveFailed(const GURL& save_url) {
    866   SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec());
    867   if (it == in_progress_items_.end()) {
    868     NOTREACHED();  // Should not exist!
    869     return;
    870   }
    871   SaveItem* save_item = it->second;
    872 
    873   save_item->Finish(0, false);
    874 
    875   PutInProgressItemToSavedMap(save_item);
    876 
    877   // Inform the DownloadItem to update UI.
    878   // We use the received bytes as number of saved files.
    879   // Hack to avoid touching download_ after user cancel.
    880   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
    881   // with SavePackage flow.
    882   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
    883     download_->DestinationUpdate(
    884         completed_count(), CurrentSpeed(), std::string());
    885   }
    886 
    887   if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
    888       (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
    889       (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) {
    890     // We got error when saving page. Treat it as disk error.
    891     Cancel(true);
    892   }
    893 
    894   if (canceled()) {
    895     DCHECK(finished_);
    896     return;
    897   }
    898 
    899   // Continue processing the save page job.
    900   DoSavingProcess();
    901 
    902   CheckFinish();
    903 }
    904 
    905 void SavePackage::SaveCanceled(SaveItem* save_item) {
    906   // Call the RemoveSaveFile in UI thread.
    907   file_manager_->RemoveSaveFile(save_item->save_id(),
    908                                 save_item->url(),
    909                                 this);
    910   if (save_item->save_id() != -1)
    911     BrowserThread::PostTask(
    912         BrowserThread::FILE, FROM_HERE,
    913         base::Bind(&SaveFileManager::CancelSave,
    914                    file_manager_,
    915                    save_item->save_id()));
    916 }
    917 
    918 // Initiate a saving job of a specific URL. We send the request to
    919 // SaveFileManager, which will dispatch it to different approach according to
    920 // the save source. Parameter process_all_remaining_items indicates whether
    921 // we need to save all remaining items.
    922 void SavePackage::SaveNextFile(bool process_all_remaining_items) {
    923   DCHECK(web_contents());
    924   DCHECK(waiting_item_queue_.size());
    925 
    926   do {
    927     // Pop SaveItem from waiting list.
    928     SaveItem* save_item = waiting_item_queue_.front();
    929     waiting_item_queue_.pop();
    930 
    931     // Add the item to in_progress_items_.
    932     SaveUrlItemMap::iterator it = in_progress_items_.find(
    933         save_item->url().spec());
    934     DCHECK(it == in_progress_items_.end());
    935     in_progress_items_[save_item->url().spec()] = save_item;
    936     save_item->Start();
    937     file_manager_->SaveURL(save_item->url(),
    938                            save_item->referrer(),
    939                            web_contents()->GetRenderProcessHost()->GetID(),
    940                            routing_id(),
    941                            save_item->save_source(),
    942                            save_item->full_path(),
    943                            web_contents()->
    944                                GetBrowserContext()->GetResourceContext(),
    945                            this);
    946   } while (process_all_remaining_items && waiting_item_queue_.size());
    947 }
    948 
    949 // Calculate the percentage of whole save page job.
    950 int SavePackage::PercentComplete() {
    951   if (!all_save_items_count_)
    952     return 0;
    953   else if (!in_process_count())
    954     return 100;
    955   else
    956     return completed_count() / all_save_items_count_;
    957 }
    958 
    959 int64 SavePackage::CurrentSpeed() const {
    960   base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
    961   int64 diff_ms = diff.InMilliseconds();
    962   return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
    963 }
    964 
    965 // Continue processing the save page job after one SaveItem has been
    966 // finished.
    967 void SavePackage::DoSavingProcess() {
    968   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
    969     // We guarantee that images and JavaScripts must be downloaded first.
    970     // So when finishing all those sub-resources, we will know which
    971     // sub-resource's link can be replaced with local file path, which
    972     // sub-resource's link need to be replaced with absolute URL which
    973     // point to its internet address because it got error when saving its data.
    974 
    975     // Start a new SaveItem job if we still have job in waiting queue.
    976     if (waiting_item_queue_.size()) {
    977       DCHECK(wait_state_ == NET_FILES);
    978       SaveItem* save_item = waiting_item_queue_.front();
    979       if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
    980         SaveNextFile(false);
    981       } else if (!in_process_count()) {
    982         // If there is no in-process SaveItem, it means all sub-resources
    983         // have been processed. Now we need to start serializing HTML DOM
    984         // for the current page to get the generated HTML data.
    985         wait_state_ = HTML_DATA;
    986         // All non-HTML resources have been finished, start all remaining
    987         // HTML files.
    988         SaveNextFile(true);
    989       }
    990     } else if (in_process_count()) {
    991       // Continue asking for HTML data.
    992       DCHECK(wait_state_ == HTML_DATA);
    993     }
    994   } else {
    995     // Save as HTML only or MHTML.
    996     DCHECK(wait_state_ == NET_FILES);
    997     DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
    998            (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
    999     if (waiting_item_queue_.size()) {
   1000       DCHECK(all_save_items_count_ == waiting_item_queue_.size());
   1001       SaveNextFile(false);
   1002     }
   1003   }
   1004 }
   1005 
   1006 bool SavePackage::OnMessageReceived(const IPC::Message& message) {
   1007   bool handled = true;
   1008   IPC_BEGIN_MESSAGE_MAP(SavePackage, message)
   1009     IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks,
   1010                         OnReceivedSavableResourceLinksForCurrentPage)
   1011     IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData,
   1012                         OnReceivedSerializedHtmlData)
   1013     IPC_MESSAGE_UNHANDLED(handled = false)
   1014   IPC_END_MESSAGE_MAP()
   1015   return handled;
   1016 }
   1017 
   1018 // After finishing all SaveItems which need to get data from net.
   1019 // We collect all URLs which have local storage and send the
   1020 // map:(originalURL:currentLocalPath) to render process (backend).
   1021 // Then render process will serialize DOM and send data to us.
   1022 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() {
   1023   if (wait_state_ != HTML_DATA)
   1024     return;
   1025   std::vector<GURL> saved_links;
   1026   std::vector<base::FilePath> saved_file_paths;
   1027   int successful_started_items_count = 0;
   1028 
   1029   // Collect all saved items which have local storage.
   1030   // First collect the status of all the resource files and check whether they
   1031   // have created local files although they have not been completely saved.
   1032   // If yes, the file can be saved. Otherwise, there is a disk error, so we
   1033   // need to cancel the page saving job.
   1034   for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
   1035        it != in_progress_items_.end(); ++it) {
   1036     DCHECK(it->second->save_source() ==
   1037            SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
   1038     if (it->second->has_final_name())
   1039       successful_started_items_count++;
   1040     saved_links.push_back(it->second->url());
   1041     saved_file_paths.push_back(it->second->file_name());
   1042   }
   1043 
   1044   // If not all file of HTML resource have been started, then wait.
   1045   if (successful_started_items_count != in_process_count())
   1046     return;
   1047 
   1048   // Collect all saved success items.
   1049   for (SavedItemMap::iterator it = saved_success_items_.begin();
   1050        it != saved_success_items_.end(); ++it) {
   1051     DCHECK(it->second->has_final_name());
   1052     saved_links.push_back(it->second->url());
   1053     saved_file_paths.push_back(it->second->file_name());
   1054   }
   1055 
   1056   // Get the relative directory name.
   1057   base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
   1058 
   1059   Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks(
   1060       routing_id(), saved_links, saved_file_paths, relative_dir_name));
   1061 }
   1062 
   1063 // Process the serialized HTML content data of a specified web page
   1064 // retrieved from render process.
   1065 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url,
   1066                                                const std::string& data,
   1067                                                int32 status) {
   1068   WebPageSerializerClient::PageSerializationStatus flag =
   1069       static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
   1070   // Check current state.
   1071   if (wait_state_ != HTML_DATA)
   1072     return;
   1073 
   1074   int id = contents_id();
   1075   // If the all frames are finished saving, we need to close the
   1076   // remaining SaveItems.
   1077   if (flag == WebPageSerializerClient::AllFramesAreFinished) {
   1078     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
   1079          it != in_progress_items_.end(); ++it) {
   1080       VLOG(20) << " " << __FUNCTION__ << "()"
   1081                << " save_id = " << it->second->save_id()
   1082                << " url = \"" << it->second->url().spec() << "\"";
   1083       BrowserThread::PostTask(
   1084           BrowserThread::FILE, FROM_HERE,
   1085           base::Bind(&SaveFileManager::SaveFinished,
   1086                      file_manager_,
   1087                      it->second->save_id(),
   1088                      it->second->url(),
   1089                      id,
   1090                      true));
   1091     }
   1092     return;
   1093   }
   1094 
   1095   SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec());
   1096   if (it == in_progress_items_.end()) {
   1097     for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
   1098       saved_it != saved_success_items_.end(); ++saved_it) {
   1099       if (saved_it->second->url() == frame_url) {
   1100         wrote_to_completed_file_ = true;
   1101         break;
   1102       }
   1103     }
   1104 
   1105     it = saved_failed_items_.find(frame_url.spec());
   1106     if (it != saved_failed_items_.end())
   1107       wrote_to_failed_file_ = true;
   1108 
   1109     return;
   1110   }
   1111 
   1112   SaveItem* save_item = it->second;
   1113   DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
   1114 
   1115   if (!data.empty()) {
   1116     // Prepare buffer for saving HTML data.
   1117     scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
   1118     memcpy(new_data->data(), data.data(), data.size());
   1119 
   1120     // Call write file functionality in file thread.
   1121     BrowserThread::PostTask(
   1122         BrowserThread::FILE, FROM_HERE,
   1123         base::Bind(&SaveFileManager::UpdateSaveProgress,
   1124                    file_manager_,
   1125                    save_item->save_id(),
   1126                    new_data,
   1127                    static_cast<int>(data.size())));
   1128   }
   1129 
   1130   // Current frame is completed saving, call finish in file thread.
   1131   if (flag == WebPageSerializerClient::CurrentFrameIsFinished) {
   1132     VLOG(20) << " " << __FUNCTION__ << "()"
   1133              << " save_id = " << save_item->save_id()
   1134              << " url = \"" << save_item->url().spec() << "\"";
   1135     BrowserThread::PostTask(
   1136         BrowserThread::FILE, FROM_HERE,
   1137         base::Bind(&SaveFileManager::SaveFinished,
   1138                    file_manager_,
   1139                    save_item->save_id(),
   1140                    save_item->url(),
   1141                    id,
   1142                    true));
   1143   }
   1144 }
   1145 
   1146 // Ask for all savable resource links from backend, include main frame and
   1147 // sub-frame.
   1148 void SavePackage::GetAllSavableResourceLinksForCurrentPage() {
   1149   if (wait_state_ != START_PROCESS)
   1150     return;
   1151 
   1152   wait_state_ = RESOURCES_LIST;
   1153   Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(),
   1154                                                             page_url_));
   1155 }
   1156 
   1157 // Give backend the lists which contain all resource links that have local
   1158 // storage, after which, render process will serialize DOM for generating
   1159 // HTML data.
   1160 void SavePackage::OnReceivedSavableResourceLinksForCurrentPage(
   1161     const std::vector<GURL>& resources_list,
   1162     const std::vector<Referrer>& referrers_list,
   1163     const std::vector<GURL>& frames_list) {
   1164   if (wait_state_ != RESOURCES_LIST)
   1165     return;
   1166 
   1167   if (resources_list.size() != referrers_list.size())
   1168     return;
   1169 
   1170   all_save_items_count_ = static_cast<int>(resources_list.size()) +
   1171                            static_cast<int>(frames_list.size());
   1172 
   1173   // We use total bytes as the total number of files we want to save.
   1174   // Hack to avoid touching download_ after user cancel.
   1175   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
   1176   // with SavePackage flow.
   1177   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
   1178     download_->SetTotalBytes(all_save_items_count_);
   1179 
   1180   if (all_save_items_count_) {
   1181     // Put all sub-resources to wait list.
   1182     for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
   1183       const GURL& u = resources_list[i];
   1184       DCHECK(u.is_valid());
   1185       SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ?
   1186           SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
   1187           SaveFileCreateInfo::SAVE_FILE_FROM_NET;
   1188       SaveItem* save_item = new SaveItem(u, referrers_list[i],
   1189                                          this, save_source);
   1190       waiting_item_queue_.push(save_item);
   1191     }
   1192     // Put all HTML resources to wait list.
   1193     for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) {
   1194       const GURL& u = frames_list[i];
   1195       DCHECK(u.is_valid());
   1196       SaveItem* save_item = new SaveItem(
   1197           u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
   1198       waiting_item_queue_.push(save_item);
   1199     }
   1200     wait_state_ = NET_FILES;
   1201     DoSavingProcess();
   1202   } else {
   1203     // No resource files need to be saved, treat it as user cancel.
   1204     Cancel(true);
   1205   }
   1206 }
   1207 
   1208 base::FilePath SavePackage::GetSuggestedNameForSaveAs(
   1209     bool can_save_as_complete,
   1210     const std::string& contents_mime_type,
   1211     const std::string& accept_langs) {
   1212   base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_);
   1213 
   1214   // If the page's title matches its URL, use the URL. Try to use the last path
   1215   // component or if there is none, the domain as the file name.
   1216   // Normally we want to base the filename on the page title, or if it doesn't
   1217   // exist, on the URL. It's not easy to tell if the page has no title, because
   1218   // if the page has no title, WebContents::GetTitle() will return the page's
   1219   // URL (adjusted for display purposes). Therefore, we convert the "title"
   1220   // back to a URL, and if it matches the original page URL, we know the page
   1221   // had no title (or had a title equal to its URL, which is fine to treat
   1222   // similarly).
   1223   if (title_ == net::FormatUrl(page_url_, accept_langs)) {
   1224     std::string url_path;
   1225     if (!page_url_.SchemeIs(url::kDataScheme)) {
   1226       std::vector<std::string> url_parts;
   1227       base::SplitString(page_url_.path(), '/', &url_parts);
   1228       if (!url_parts.empty()) {
   1229         for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) {
   1230           url_path = url_parts[i];
   1231           if (!url_path.empty())
   1232             break;
   1233         }
   1234       }
   1235       if (url_path.empty())
   1236         url_path = page_url_.host();
   1237     } else {
   1238       url_path = "dataurl";
   1239     }
   1240     name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path);
   1241   }
   1242 
   1243   // Ask user for getting final saving name.
   1244   name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext,
   1245                                              contents_mime_type);
   1246   // Adjust extension for complete types.
   1247   if (can_save_as_complete)
   1248     name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
   1249 
   1250   base::FilePath::StringType file_name = name_with_proper_ext.value();
   1251   file_util::ReplaceIllegalCharactersInPath(&file_name, ' ');
   1252   return base::FilePath(file_name);
   1253 }
   1254 
   1255 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) {
   1256   // If the file name doesn't have an extension suitable for HTML files,
   1257   // append one.
   1258   base::FilePath::StringType ext = name.Extension();
   1259   if (!ext.empty())
   1260     ext.erase(ext.begin());  // Erase preceding '.'.
   1261   std::string mime_type;
   1262   if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
   1263       !CanSaveAsComplete(mime_type)) {
   1264     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
   1265                           kDefaultHtmlExtension);
   1266   }
   1267   return name;
   1268 }
   1269 
   1270 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name,
   1271     const std::string& contents_mime_type) {
   1272   // Start extension at 1 to skip over period if non-empty.
   1273   base::FilePath::StringType ext = name.Extension().length() ?
   1274       name.Extension().substr(1) : name.Extension();
   1275   base::FilePath::StringType suggested_extension =
   1276       ExtensionForMimeType(contents_mime_type);
   1277   std::string mime_type;
   1278   if (!suggested_extension.empty() &&
   1279       !net::GetMimeTypeFromExtension(ext, &mime_type)) {
   1280     // Extension is absent or needs to be updated.
   1281     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
   1282                     suggested_extension);
   1283   }
   1284   return name;
   1285 }
   1286 
   1287 const base::FilePath::CharType* SavePackage::ExtensionForMimeType(
   1288     const std::string& contents_mime_type) {
   1289   static const struct {
   1290     const base::FilePath::CharType *mime_type;
   1291     const base::FilePath::CharType *suggested_extension;
   1292   } extensions[] = {
   1293     { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension },
   1294     { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") },
   1295     { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") },
   1296     { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") },
   1297     { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") },
   1298   };
   1299 #if defined(OS_POSIX)
   1300   base::FilePath::StringType mime_type(contents_mime_type);
   1301 #elif defined(OS_WIN)
   1302   base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type));
   1303 #endif  // OS_WIN
   1304   for (uint32 i = 0; i < ARRAYSIZE_UNSAFE(extensions); ++i) {
   1305     if (mime_type == extensions[i].mime_type)
   1306       return extensions[i].suggested_extension;
   1307   }
   1308   return FILE_PATH_LITERAL("");
   1309 }
   1310 
   1311 WebContents* SavePackage::web_contents() const {
   1312   return WebContentsObserver::web_contents();
   1313 }
   1314 
   1315 void SavePackage::GetSaveInfo() {
   1316   // Can't use web_contents_ in the file thread, so get the data that we need
   1317   // before calling to it.
   1318   base::FilePath website_save_dir, download_save_dir;
   1319   bool skip_dir_check = false;
   1320   DCHECK(download_manager_);
   1321   if (download_manager_->GetDelegate()) {
   1322     download_manager_->GetDelegate()->GetSaveDir(
   1323         web_contents()->GetBrowserContext(), &website_save_dir,
   1324         &download_save_dir, &skip_dir_check);
   1325   }
   1326   std::string mime_type = web_contents()->GetContentsMimeType();
   1327   std::string accept_languages =
   1328       GetContentClient()->browser()->GetAcceptLangs(
   1329           web_contents()->GetBrowserContext());
   1330 
   1331   BrowserThread::PostTask(
   1332       BrowserThread::FILE, FROM_HERE,
   1333       base::Bind(&SavePackage::CreateDirectoryOnFileThread, this,
   1334           website_save_dir, download_save_dir, skip_dir_check,
   1335           mime_type, accept_languages));
   1336 }
   1337 
   1338 void SavePackage::CreateDirectoryOnFileThread(
   1339     const base::FilePath& website_save_dir,
   1340     const base::FilePath& download_save_dir,
   1341     bool skip_dir_check,
   1342     const std::string& mime_type,
   1343     const std::string& accept_langs) {
   1344   base::FilePath save_dir;
   1345   // If the default html/websites save folder doesn't exist...
   1346   // We skip the directory check for gdata directories on ChromeOS.
   1347   if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) {
   1348     // If the default download dir doesn't exist, create it.
   1349     if (!base::DirectoryExists(download_save_dir)) {
   1350       bool res = base::CreateDirectory(download_save_dir);
   1351       DCHECK(res);
   1352     }
   1353     save_dir = download_save_dir;
   1354   } else {
   1355     // If it does exist, use the default save dir param.
   1356     save_dir = website_save_dir;
   1357   }
   1358 
   1359   bool can_save_as_complete = CanSaveAsComplete(mime_type);
   1360   base::FilePath suggested_filename = GetSuggestedNameForSaveAs(
   1361       can_save_as_complete, mime_type, accept_langs);
   1362   base::FilePath::StringType pure_file_name =
   1363       suggested_filename.RemoveExtension().BaseName().value();
   1364   base::FilePath::StringType file_name_ext = suggested_filename.Extension();
   1365 
   1366   // Need to make sure the suggested file name is not too long.
   1367   uint32 max_path = GetMaxPathLengthForDirectory(save_dir);
   1368 
   1369   if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) {
   1370     save_dir = save_dir.Append(pure_file_name + file_name_ext);
   1371   } else {
   1372     // Cannot create a shorter filename. This will cause the save as operation
   1373     // to fail unless the user pick a shorter name. Continuing even though it
   1374     // will fail because returning means no save as popup for the user, which
   1375     // is even more confusing. This case should be rare though.
   1376     save_dir = save_dir.Append(suggested_filename);
   1377   }
   1378 
   1379   BrowserThread::PostTask(
   1380       BrowserThread::UI, FROM_HERE,
   1381       base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir,
   1382                  can_save_as_complete));
   1383 }
   1384 
   1385 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path,
   1386                                       bool can_save_as_complete) {
   1387 
   1388   // The WebContents which owns this SavePackage may have disappeared during
   1389   // the UI->FILE->UI thread hop of
   1390   // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
   1391   if (!web_contents() || !download_manager_->GetDelegate())
   1392     return;
   1393 
   1394   base::FilePath::StringType default_extension;
   1395   if (can_save_as_complete)
   1396     default_extension = kDefaultHtmlExtension;
   1397 
   1398   download_manager_->GetDelegate()->ChooseSavePath(
   1399       web_contents(),
   1400       suggested_path,
   1401       default_extension,
   1402       can_save_as_complete,
   1403       base::Bind(&SavePackage::OnPathPicked, AsWeakPtr()));
   1404 }
   1405 
   1406 void SavePackage::OnPathPicked(
   1407     const base::FilePath& final_name,
   1408     SavePageType type,
   1409     const SavePackageDownloadCreatedCallback& download_created_callback) {
   1410   DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
   1411          (type == SAVE_PAGE_TYPE_AS_MHTML) ||
   1412          (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type;
   1413   // Ensure the filename is safe.
   1414   saved_main_file_path_ = final_name;
   1415   // TODO(asanka): This call may block on IO and shouldn't be made
   1416   // from the UI thread.  See http://crbug.com/61827.
   1417   net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false,
   1418                             &saved_main_file_path_);
   1419 
   1420   saved_main_directory_path_ = saved_main_file_path_.DirName();
   1421   save_type_ = type;
   1422   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
   1423     // Make new directory for saving complete file.
   1424     saved_main_directory_path_ = saved_main_directory_path_.Append(
   1425         saved_main_file_path_.RemoveExtension().BaseName().value() +
   1426         FILE_PATH_LITERAL("_files"));
   1427   }
   1428 
   1429   Init(download_created_callback);
   1430 }
   1431 
   1432 void SavePackage::StopObservation() {
   1433   DCHECK(download_);
   1434   DCHECK(download_manager_);
   1435 
   1436   download_->RemoveObserver(this);
   1437   download_ = NULL;
   1438   download_manager_ = NULL;
   1439 }
   1440 
   1441 void SavePackage::OnDownloadDestroyed(DownloadItem* download) {
   1442   StopObservation();
   1443 }
   1444 
   1445 void SavePackage::FinalizeDownloadEntry() {
   1446   DCHECK(download_);
   1447   DCHECK(download_manager_);
   1448 
   1449   download_manager_->OnSavePackageSuccessfullyFinished(download_);
   1450   StopObservation();
   1451 }
   1452 
   1453 }  // namespace content
   1454