Home | History | Annotate | Download | only in download
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "content/browser/download/save_package.h"
      6 
      7 #include <algorithm>
      8 
      9 #include "base/bind.h"
     10 #include "base/files/file_path.h"
     11 #include "base/files/file_util.h"
     12 #include "base/i18n/file_util_icu.h"
     13 #include "base/logging.h"
     14 #include "base/message_loop/message_loop.h"
     15 #include "base/stl_util.h"
     16 #include "base/strings/string_piece.h"
     17 #include "base/strings/string_split.h"
     18 #include "base/strings/sys_string_conversions.h"
     19 #include "base/strings/utf_string_conversions.h"
     20 #include "base/threading/thread.h"
     21 #include "content/browser/download/download_item_impl.h"
     22 #include "content/browser/download/download_manager_impl.h"
     23 #include "content/browser/download/download_stats.h"
     24 #include "content/browser/download/save_file.h"
     25 #include "content/browser/download/save_file_manager.h"
     26 #include "content/browser/download/save_item.h"
     27 #include "content/browser/loader/resource_dispatcher_host_impl.h"
     28 #include "content/browser/renderer_host/render_process_host_impl.h"
     29 #include "content/browser/renderer_host/render_view_host_delegate.h"
     30 #include "content/browser/renderer_host/render_view_host_impl.h"
     31 #include "content/common/view_messages.h"
     32 #include "content/public/browser/browser_context.h"
     33 #include "content/public/browser/browser_thread.h"
     34 #include "content/public/browser/content_browser_client.h"
     35 #include "content/public/browser/download_manager_delegate.h"
     36 #include "content/public/browser/navigation_entry.h"
     37 #include "content/public/browser/notification_service.h"
     38 #include "content/public/browser/notification_types.h"
     39 #include "content/public/browser/resource_context.h"
     40 #include "content/public/browser/web_contents.h"
     41 #include "net/base/filename_util.h"
     42 #include "net/base/io_buffer.h"
     43 #include "net/base/mime_util.h"
     44 #include "net/url_request/url_request_context.h"
     45 #include "third_party/WebKit/public/web/WebPageSerializerClient.h"
     46 #include "url/url_constants.h"
     47 
     48 using base::Time;
     49 using blink::WebPageSerializerClient;
     50 
     51 namespace content {
     52 namespace {
     53 
     54 // A counter for uniquely identifying each save package.
     55 int g_save_package_id = 0;
     56 
     57 // Default name which will be used when we can not get proper name from
     58 // resource URL.
     59 const char kDefaultSaveName[] = "saved_resource";
     60 
     61 // Maximum number of file ordinal number. I think it's big enough for resolving
     62 // name-conflict files which has same base file name.
     63 const int32 kMaxFileOrdinalNumber = 9999;
     64 
     65 // Maximum length for file path. Since Windows have MAX_PATH limitation for
     66 // file path, we need to make sure length of file path of every saved file
     67 // is less than MAX_PATH
     68 #if defined(OS_WIN)
     69 const uint32 kMaxFilePathLength = MAX_PATH - 1;
     70 #elif defined(OS_POSIX)
     71 const uint32 kMaxFilePathLength = PATH_MAX - 1;
     72 #endif
     73 
     74 // Maximum length for file ordinal number part. Since we only support the
     75 // maximum 9999 for ordinal number, which means maximum file ordinal number part
     76 // should be "(9998)", so the value is 6.
     77 const uint32 kMaxFileOrdinalNumberPartLength = 6;
     78 
     79 // Strip current ordinal number, if any. Should only be used on pure
     80 // file names, i.e. those stripped of their extensions.
     81 // TODO(estade): improve this to not choke on alternate encodings.
     82 base::FilePath::StringType StripOrdinalNumber(
     83     const base::FilePath::StringType& pure_file_name) {
     84   base::FilePath::StringType::size_type r_paren_index =
     85       pure_file_name.rfind(FILE_PATH_LITERAL(')'));
     86   base::FilePath::StringType::size_type l_paren_index =
     87       pure_file_name.rfind(FILE_PATH_LITERAL('('));
     88   if (l_paren_index >= r_paren_index)
     89     return pure_file_name;
     90 
     91   for (base::FilePath::StringType::size_type i = l_paren_index + 1;
     92        i != r_paren_index; ++i) {
     93     if (!IsAsciiDigit(pure_file_name[i]))
     94       return pure_file_name;
     95   }
     96 
     97   return pure_file_name.substr(0, l_paren_index);
     98 }
     99 
    100 // Check whether we can save page as complete-HTML for the contents which
    101 // have specified a MIME type. Now only contents which have the MIME type
    102 // "text/html" can be saved as complete-HTML.
    103 bool CanSaveAsComplete(const std::string& contents_mime_type) {
    104   return contents_mime_type == "text/html" ||
    105          contents_mime_type == "application/xhtml+xml";
    106 }
    107 
    108 // Request handle for SavePackage downloads. Currently doesn't support
    109 // pause/resume/cancel, but returns a WebContents.
    110 class SavePackageRequestHandle : public DownloadRequestHandleInterface {
    111  public:
    112   SavePackageRequestHandle(base::WeakPtr<SavePackage> save_package)
    113       : save_package_(save_package) {}
    114 
    115   // DownloadRequestHandleInterface
    116   virtual WebContents* GetWebContents() const OVERRIDE {
    117     return save_package_.get() ? save_package_->web_contents() : NULL;
    118   }
    119   virtual DownloadManager* GetDownloadManager() const OVERRIDE {
    120     return NULL;
    121   }
    122   virtual void PauseRequest() const OVERRIDE {}
    123   virtual void ResumeRequest() const OVERRIDE {}
    124   virtual void CancelRequest() const OVERRIDE {}
    125   virtual std::string DebugString() const OVERRIDE {
    126     return "SavePackage DownloadRequestHandle";
    127   }
    128 
    129  private:
    130   base::WeakPtr<SavePackage> save_package_;
    131 };
    132 
    133 }  // namespace
    134 
    135 const base::FilePath::CharType SavePackage::kDefaultHtmlExtension[] =
    136     FILE_PATH_LITERAL("html");
    137 
    138 SavePackage::SavePackage(WebContents* web_contents,
    139                          SavePageType save_type,
    140                          const base::FilePath& file_full_path,
    141                          const base::FilePath& directory_full_path)
    142     : WebContentsObserver(web_contents),
    143       file_manager_(NULL),
    144       download_manager_(NULL),
    145       download_(NULL),
    146       page_url_(GetUrlToBeSaved()),
    147       saved_main_file_path_(file_full_path),
    148       saved_main_directory_path_(directory_full_path),
    149       title_(web_contents->GetTitle()),
    150       start_tick_(base::TimeTicks::Now()),
    151       finished_(false),
    152       mhtml_finishing_(false),
    153       user_canceled_(false),
    154       disk_error_occurred_(false),
    155       save_type_(save_type),
    156       all_save_items_count_(0),
    157       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
    158       wait_state_(INITIALIZE),
    159       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
    160       unique_id_(g_save_package_id++),
    161       wrote_to_completed_file_(false),
    162       wrote_to_failed_file_(false) {
    163   DCHECK(page_url_.is_valid());
    164   DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
    165          (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
    166          (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML));
    167   DCHECK(!saved_main_file_path_.empty() &&
    168          saved_main_file_path_.value().length() <= kMaxFilePathLength);
    169   DCHECK(!saved_main_directory_path_.empty() &&
    170          saved_main_directory_path_.value().length() < kMaxFilePathLength);
    171   InternalInit();
    172 }
    173 
    174 SavePackage::SavePackage(WebContents* web_contents)
    175     : WebContentsObserver(web_contents),
    176       file_manager_(NULL),
    177       download_manager_(NULL),
    178       download_(NULL),
    179       page_url_(GetUrlToBeSaved()),
    180       title_(web_contents->GetTitle()),
    181       start_tick_(base::TimeTicks::Now()),
    182       finished_(false),
    183       mhtml_finishing_(false),
    184       user_canceled_(false),
    185       disk_error_occurred_(false),
    186       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
    187       all_save_items_count_(0),
    188       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
    189       wait_state_(INITIALIZE),
    190       contents_id_(web_contents->GetRenderProcessHost()->GetID()),
    191       unique_id_(g_save_package_id++),
    192       wrote_to_completed_file_(false),
    193       wrote_to_failed_file_(false) {
    194   DCHECK(page_url_.is_valid());
    195   InternalInit();
    196 }
    197 
    198 // This is for testing use. Set |finished_| as true because we don't want
    199 // method Cancel to be be called in destructor in test mode.
    200 // We also don't call InternalInit().
    201 SavePackage::SavePackage(WebContents* web_contents,
    202                          const base::FilePath& file_full_path,
    203                          const base::FilePath& directory_full_path)
    204     : WebContentsObserver(web_contents),
    205       file_manager_(NULL),
    206       download_manager_(NULL),
    207       download_(NULL),
    208       saved_main_file_path_(file_full_path),
    209       saved_main_directory_path_(directory_full_path),
    210       start_tick_(base::TimeTicks::Now()),
    211       finished_(true),
    212       mhtml_finishing_(false),
    213       user_canceled_(false),
    214       disk_error_occurred_(false),
    215       save_type_(SAVE_PAGE_TYPE_UNKNOWN),
    216       all_save_items_count_(0),
    217       file_name_set_(&base::FilePath::CompareLessIgnoreCase),
    218       wait_state_(INITIALIZE),
    219       contents_id_(0),
    220       unique_id_(g_save_package_id++),
    221       wrote_to_completed_file_(false),
    222       wrote_to_failed_file_(false) {
    223 }
    224 
    225 SavePackage::~SavePackage() {
    226   // Stop receiving saving job's updates
    227   if (!finished_ && !canceled()) {
    228     // Unexpected quit.
    229     Cancel(true);
    230   }
    231 
    232   // We should no longer be observing the DownloadItem at this point.
    233   CHECK(!download_);
    234 
    235   DCHECK(all_save_items_count_ == (waiting_item_queue_.size() +
    236                                    completed_count() +
    237                                    in_process_count()));
    238   // Free all SaveItems.
    239   while (!waiting_item_queue_.empty()) {
    240     // We still have some items which are waiting for start to save.
    241     SaveItem* save_item = waiting_item_queue_.front();
    242     waiting_item_queue_.pop();
    243     delete save_item;
    244   }
    245 
    246   STLDeleteValues(&saved_success_items_);
    247   STLDeleteValues(&in_progress_items_);
    248   STLDeleteValues(&saved_failed_items_);
    249 
    250   file_manager_ = NULL;
    251 }
    252 
    253 GURL SavePackage::GetUrlToBeSaved() {
    254   // Instead of using web_contents_.GetURL here, we use url() (which is the
    255   // "real" url of the page) from the NavigationEntry because it reflects its
    256   // origin rather than the displayed one (returned by GetURL) which may be
    257   // different (like having "view-source:" on the front).
    258   NavigationEntry* visible_entry =
    259       web_contents()->GetController().GetVisibleEntry();
    260   return visible_entry->GetURL();
    261 }
    262 
    263 void SavePackage::Cancel(bool user_action) {
    264   if (!canceled()) {
    265     if (user_action)
    266       user_canceled_ = true;
    267     else
    268       disk_error_occurred_ = true;
    269     Stop();
    270   }
    271   RecordSavePackageEvent(SAVE_PACKAGE_CANCELLED);
    272 }
    273 
    274 // Init() can be called directly, or indirectly via GetSaveInfo(). In both
    275 // cases, we need file_manager_ to be initialized, so we do this first.
    276 void SavePackage::InternalInit() {
    277   ResourceDispatcherHostImpl* rdh = ResourceDispatcherHostImpl::Get();
    278   if (!rdh) {
    279     NOTREACHED();
    280     return;
    281   }
    282 
    283   file_manager_ = rdh->save_file_manager();
    284   DCHECK(file_manager_);
    285 
    286   download_manager_ = static_cast<DownloadManagerImpl*>(
    287       BrowserContext::GetDownloadManager(
    288           web_contents()->GetBrowserContext()));
    289   DCHECK(download_manager_);
    290 
    291   RecordSavePackageEvent(SAVE_PACKAGE_STARTED);
    292 }
    293 
    294 bool SavePackage::Init(
    295     const SavePackageDownloadCreatedCallback& download_created_callback) {
    296   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    297   // Set proper running state.
    298   if (wait_state_ != INITIALIZE)
    299     return false;
    300 
    301   wait_state_ = START_PROCESS;
    302 
    303   // Initialize the request context and resource dispatcher.
    304   BrowserContext* browser_context = web_contents()->GetBrowserContext();
    305   if (!browser_context) {
    306     NOTREACHED();
    307     return false;
    308   }
    309 
    310   scoped_ptr<DownloadRequestHandleInterface> request_handle(
    311       new SavePackageRequestHandle(AsWeakPtr()));
    312   // The download manager keeps ownership but adds us as an observer.
    313   download_manager_->CreateSavePackageDownloadItem(
    314       saved_main_file_path_,
    315       page_url_,
    316       ((save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ?
    317        "multipart/related" : "text/html"),
    318       request_handle.Pass(),
    319       base::Bind(&SavePackage::InitWithDownloadItem, AsWeakPtr(),
    320                  download_created_callback));
    321   return true;
    322 }
    323 
    324 void SavePackage::InitWithDownloadItem(
    325     const SavePackageDownloadCreatedCallback& download_created_callback,
    326     DownloadItemImpl* item) {
    327   DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
    328   DCHECK(item);
    329   download_ = item;
    330   download_->AddObserver(this);
    331   // Confirm above didn't delete the tab out from under us.
    332   if (!download_created_callback.is_null())
    333     download_created_callback.Run(download_);
    334 
    335   // Check save type and process the save page job.
    336   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
    337     // Get directory
    338     DCHECK(!saved_main_directory_path_.empty());
    339     GetAllSavableResourceLinksForCurrentPage();
    340   } else if (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) {
    341     web_contents()->GenerateMHTML(saved_main_file_path_, base::Bind(
    342         &SavePackage::OnMHTMLGenerated, this));
    343   } else {
    344     DCHECK_EQ(SAVE_PAGE_TYPE_AS_ONLY_HTML, save_type_) << save_type_;
    345     wait_state_ = NET_FILES;
    346     SaveFileCreateInfo::SaveFileSource save_source = page_url_.SchemeIsFile() ?
    347         SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
    348         SaveFileCreateInfo::SAVE_FILE_FROM_NET;
    349     SaveItem* save_item = new SaveItem(page_url_,
    350                                        Referrer(),
    351                                        this,
    352                                        save_source);
    353     // Add this item to waiting list.
    354     waiting_item_queue_.push(save_item);
    355     all_save_items_count_ = 1;
    356     download_->SetTotalBytes(1);
    357 
    358     DoSavingProcess();
    359   }
    360 }
    361 
    362 void SavePackage::OnMHTMLGenerated(int64 size) {
    363   if (size <= 0) {
    364     Cancel(false);
    365     return;
    366   }
    367   wrote_to_completed_file_ = true;
    368 
    369   // Hack to avoid touching download_ after user cancel.
    370   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
    371   // with SavePackage flow.
    372   if (download_->GetState() == DownloadItem::IN_PROGRESS) {
    373     download_->SetTotalBytes(size);
    374     download_->DestinationUpdate(size, 0, std::string());
    375     // Must call OnAllDataSaved here in order for
    376     // GDataDownloadObserver::ShouldUpload() to return true.
    377     // ShouldCompleteDownload() may depend on the gdata uploader to finish.
    378     download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
    379   }
    380 
    381   if (!download_manager_->GetDelegate()) {
    382     Finish();
    383     return;
    384   }
    385 
    386   if (download_manager_->GetDelegate()->ShouldCompleteDownload(
    387           download_, base::Bind(&SavePackage::Finish, this))) {
    388     Finish();
    389   }
    390 }
    391 
    392 // On POSIX, the length of |pure_file_name| + |file_name_ext| is further
    393 // restricted by NAME_MAX. The maximum allowed path looks like:
    394 // '/path/to/save_dir' + '/' + NAME_MAX.
    395 uint32 SavePackage::GetMaxPathLengthForDirectory(
    396     const base::FilePath& base_dir) {
    397 #if defined(OS_POSIX)
    398   return std::min(kMaxFilePathLength,
    399                   static_cast<uint32>(base_dir.value().length()) +
    400                   NAME_MAX + 1);
    401 #else
    402   return kMaxFilePathLength;
    403 #endif
    404 }
    405 
    406 // File name is considered being consist of pure file name, dot and file
    407 // extension name. File name might has no dot and file extension, or has
    408 // multiple dot inside file name. The dot, which separates the pure file
    409 // name and file extension name, is last dot in the whole file name.
    410 // This function is for making sure the length of specified file path is not
    411 // great than the specified maximum length of file path and getting safe pure
    412 // file name part if the input pure file name is too long.
    413 // The parameter |dir_path| specifies directory part of the specified
    414 // file path. The parameter |file_name_ext| specifies file extension
    415 // name part of the specified file path (including start dot). The parameter
    416 // |max_file_path_len| specifies maximum length of the specified file path.
    417 // The parameter |pure_file_name| input pure file name part of the specified
    418 // file path. If the length of specified file path is great than
    419 // |max_file_path_len|, the |pure_file_name| will output new pure file name
    420 // part for making sure the length of specified file path is less than
    421 // specified maximum length of file path. Return false if the function can
    422 // not get a safe pure file name, otherwise it returns true.
    423 bool SavePackage::GetSafePureFileName(
    424     const base::FilePath& dir_path,
    425     const base::FilePath::StringType& file_name_ext,
    426     uint32 max_file_path_len,
    427     base::FilePath::StringType* pure_file_name) {
    428   DCHECK(!pure_file_name->empty());
    429   int available_length = static_cast<int>(max_file_path_len -
    430                                           dir_path.value().length() -
    431                                           file_name_ext.length());
    432   // Need an extra space for the separator.
    433   if (!dir_path.EndsWithSeparator())
    434     --available_length;
    435 
    436   // Plenty of room.
    437   if (static_cast<int>(pure_file_name->length()) <= available_length)
    438     return true;
    439 
    440   // Limited room. Truncate |pure_file_name| to fit.
    441   if (available_length > 0) {
    442     *pure_file_name = pure_file_name->substr(0, available_length);
    443     return true;
    444   }
    445 
    446   // Not enough room to even use a shortened |pure_file_name|.
    447   pure_file_name->clear();
    448   return false;
    449 }
    450 
    451 // Generate name for saving resource.
    452 bool SavePackage::GenerateFileName(const std::string& disposition,
    453                                    const GURL& url,
    454                                    bool need_html_ext,
    455                                    base::FilePath::StringType* generated_name) {
    456   // TODO(jungshik): Figure out the referrer charset when having one
    457   // makes sense and pass it to GenerateFileName.
    458   base::FilePath file_path = net::GenerateFileName(url,
    459                                                    disposition,
    460                                                    std::string(),
    461                                                    std::string(),
    462                                                    std::string(),
    463                                                    kDefaultSaveName);
    464 
    465   DCHECK(!file_path.empty());
    466   base::FilePath::StringType pure_file_name =
    467       file_path.RemoveExtension().BaseName().value();
    468   base::FilePath::StringType file_name_ext = file_path.Extension();
    469 
    470   // If it is HTML resource, use ".html" as its extension.
    471   if (need_html_ext) {
    472     file_name_ext = FILE_PATH_LITERAL(".");
    473     file_name_ext.append(kDefaultHtmlExtension);
    474   }
    475 
    476   // Need to make sure the suggested file name is not too long.
    477   uint32 max_path = GetMaxPathLengthForDirectory(saved_main_directory_path_);
    478 
    479   // Get safe pure file name.
    480   if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
    481                            max_path, &pure_file_name))
    482     return false;
    483 
    484   base::FilePath::StringType file_name = pure_file_name + file_name_ext;
    485 
    486   // Check whether we already have same name in a case insensitive manner.
    487   FileNameSet::const_iterator iter = file_name_set_.find(file_name);
    488   if (iter == file_name_set_.end()) {
    489     file_name_set_.insert(file_name);
    490   } else {
    491     // Found same name, increase the ordinal number for the file name.
    492     pure_file_name =
    493         base::FilePath(*iter).RemoveExtension().BaseName().value();
    494     base::FilePath::StringType base_file_name =
    495         StripOrdinalNumber(pure_file_name);
    496 
    497     // We need to make sure the length of base file name plus maximum ordinal
    498     // number path will be less than or equal to kMaxFilePathLength.
    499     if (!GetSafePureFileName(saved_main_directory_path_, file_name_ext,
    500         max_path - kMaxFileOrdinalNumberPartLength, &base_file_name))
    501       return false;
    502 
    503     // Prepare the new ordinal number.
    504     uint32 ordinal_number;
    505     FileNameCountMap::iterator it = file_name_count_map_.find(base_file_name);
    506     if (it == file_name_count_map_.end()) {
    507       // First base-name-conflict resolving, use 1 as initial ordinal number.
    508       file_name_count_map_[base_file_name] = 1;
    509       ordinal_number = 1;
    510     } else {
    511       // We have met same base-name conflict, use latest ordinal number.
    512       ordinal_number = it->second;
    513     }
    514 
    515     if (ordinal_number > (kMaxFileOrdinalNumber - 1)) {
    516       // Use a random file from temporary file.
    517       base::FilePath temp_file;
    518       base::CreateTemporaryFile(&temp_file);
    519       file_name = temp_file.RemoveExtension().BaseName().value();
    520       // Get safe pure file name.
    521       if (!GetSafePureFileName(saved_main_directory_path_,
    522                                base::FilePath::StringType(),
    523                                max_path, &file_name))
    524         return false;
    525     } else {
    526       for (int i = ordinal_number; i < kMaxFileOrdinalNumber; ++i) {
    527         base::FilePath::StringType new_name = base_file_name +
    528             base::StringPrintf(FILE_PATH_LITERAL("(%d)"), i) + file_name_ext;
    529         if (file_name_set_.find(new_name) == file_name_set_.end()) {
    530           // Resolved name conflict.
    531           file_name = new_name;
    532           file_name_count_map_[base_file_name] = ++i;
    533           break;
    534         }
    535       }
    536     }
    537 
    538     file_name_set_.insert(file_name);
    539   }
    540 
    541   DCHECK(!file_name.empty());
    542   generated_name->assign(file_name);
    543 
    544   return true;
    545 }
    546 
    547 // We have received a message from SaveFileManager about a new saving job. We
    548 // create a SaveItem and store it in our in_progress list.
    549 void SavePackage::StartSave(const SaveFileCreateInfo* info) {
    550   DCHECK(info && !info->url.is_empty());
    551 
    552   SaveUrlItemMap::iterator it = in_progress_items_.find(info->url.spec());
    553   if (it == in_progress_items_.end()) {
    554     // If not found, we must have cancel action.
    555     DCHECK(canceled());
    556     return;
    557   }
    558   SaveItem* save_item = it->second;
    559 
    560   DCHECK(!saved_main_file_path_.empty());
    561 
    562   save_item->SetSaveId(info->save_id);
    563   save_item->SetTotalBytes(info->total_bytes);
    564 
    565   // Determine the proper path for a saving job, by choosing either the default
    566   // save directory, or prompting the user.
    567   DCHECK(!save_item->has_final_name());
    568   if (info->url != page_url_) {
    569     base::FilePath::StringType generated_name;
    570     // For HTML resource file, make sure it will have .htm as extension name,
    571     // otherwise, when you open the saved page in Chrome again, download
    572     // file manager will treat it as downloadable resource, and download it
    573     // instead of opening it as HTML.
    574     bool need_html_ext =
    575         info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM;
    576     if (!GenerateFileName(info->content_disposition,
    577                           GURL(info->url),
    578                           need_html_ext,
    579                           &generated_name)) {
    580       // We can not generate file name for this SaveItem, so we cancel the
    581       // saving page job if the save source is from serialized DOM data.
    582       // Otherwise, it means this SaveItem is sub-resource type, we treat it
    583       // as an error happened on saving. We can ignore this type error for
    584       // sub-resource links which will be resolved as absolute links instead
    585       // of local links in final saved contents.
    586       if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)
    587         Cancel(true);
    588       else
    589         SaveFinished(save_item->save_id(), 0, false);
    590       return;
    591     }
    592 
    593     // When saving page as only-HTML, we only have a SaveItem whose url
    594     // must be page_url_.
    595     DCHECK(save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
    596     DCHECK(!saved_main_directory_path_.empty());
    597 
    598     // Now we get final name retrieved from GenerateFileName, we will use it
    599     // rename the SaveItem.
    600     base::FilePath final_name =
    601         saved_main_directory_path_.Append(generated_name);
    602     save_item->Rename(final_name);
    603   } else {
    604     // It is the main HTML file, use the name chosen by the user.
    605     save_item->Rename(saved_main_file_path_);
    606   }
    607 
    608   // If the save source is from file system, inform SaveFileManager to copy
    609   // corresponding file to the file path which this SaveItem specifies.
    610   if (info->save_source == SaveFileCreateInfo::SAVE_FILE_FROM_FILE) {
    611     BrowserThread::PostTask(
    612         BrowserThread::FILE, FROM_HERE,
    613         base::Bind(&SaveFileManager::SaveLocalFile,
    614                    file_manager_,
    615                    save_item->url(),
    616                    save_item->save_id(),
    617                    contents_id()));
    618     return;
    619   }
    620 
    621   // Check whether we begin to require serialized HTML data.
    622   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
    623       wait_state_ == HTML_DATA) {
    624     // Inform backend to serialize the all frames' DOM and send serialized
    625     // HTML data back.
    626     GetSerializedHtmlDataForCurrentPageWithLocalLinks();
    627   }
    628 }
    629 
    630 SaveItem* SavePackage::LookupItemInProcessBySaveId(int32 save_id) {
    631   if (in_process_count()) {
    632     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
    633         it != in_progress_items_.end(); ++it) {
    634       SaveItem* save_item = it->second;
    635       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
    636       if (save_item->save_id() == save_id)
    637         return save_item;
    638     }
    639   }
    640   return NULL;
    641 }
    642 
    643 void SavePackage::PutInProgressItemToSavedMap(SaveItem* save_item) {
    644   SaveUrlItemMap::iterator it = in_progress_items_.find(
    645       save_item->url().spec());
    646   DCHECK(it != in_progress_items_.end());
    647   DCHECK(save_item == it->second);
    648   in_progress_items_.erase(it);
    649 
    650   if (save_item->success()) {
    651     // Add it to saved_success_items_.
    652     DCHECK(saved_success_items_.find(save_item->save_id()) ==
    653            saved_success_items_.end());
    654     saved_success_items_[save_item->save_id()] = save_item;
    655   } else {
    656     // Add it to saved_failed_items_.
    657     DCHECK(saved_failed_items_.find(save_item->url().spec()) ==
    658            saved_failed_items_.end());
    659     saved_failed_items_[save_item->url().spec()] = save_item;
    660   }
    661 }
    662 
    663 // Called for updating saving state.
    664 bool SavePackage::UpdateSaveProgress(int32 save_id,
    665                                      int64 size,
    666                                      bool write_success) {
    667   // Because we might have canceled this saving job before,
    668   // so we might not find corresponding SaveItem.
    669   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
    670   if (!save_item)
    671     return false;
    672 
    673   save_item->Update(size);
    674 
    675   // If we got disk error, cancel whole save page job.
    676   if (!write_success) {
    677     // Cancel job with reason of disk error.
    678     Cancel(false);
    679   }
    680   return true;
    681 }
    682 
    683 // Stop all page saving jobs that are in progress and instruct the file thread
    684 // to delete all saved  files.
    685 void SavePackage::Stop() {
    686   // If we haven't moved out of the initial state, there's nothing to cancel and
    687   // there won't be valid pointers for file_manager_ or download_.
    688   if (wait_state_ == INITIALIZE)
    689     return;
    690 
    691   // When stopping, if it still has some items in in_progress, cancel them.
    692   DCHECK(canceled());
    693   if (in_process_count()) {
    694     SaveUrlItemMap::iterator it = in_progress_items_.begin();
    695     for (; it != in_progress_items_.end(); ++it) {
    696       SaveItem* save_item = it->second;
    697       DCHECK(save_item->state() == SaveItem::IN_PROGRESS);
    698       save_item->Cancel();
    699     }
    700     // Remove all in progress item to saved map. For failed items, they will
    701     // be put into saved_failed_items_, for successful item, they will be put
    702     // into saved_success_items_.
    703     while (in_process_count())
    704       PutInProgressItemToSavedMap(in_progress_items_.begin()->second);
    705   }
    706 
    707   // This vector contains the save ids of the save files which SaveFileManager
    708   // needs to remove from its save_file_map_.
    709   SaveIDList save_ids;
    710   for (SavedItemMap::iterator it = saved_success_items_.begin();
    711       it != saved_success_items_.end(); ++it)
    712     save_ids.push_back(it->first);
    713   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
    714       it != saved_failed_items_.end(); ++it)
    715     save_ids.push_back(it->second->save_id());
    716 
    717   BrowserThread::PostTask(
    718       BrowserThread::FILE, FROM_HERE,
    719       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
    720                  file_manager_,
    721                  save_ids));
    722 
    723   finished_ = true;
    724   wait_state_ = FAILED;
    725 
    726   // Inform the DownloadItem we have canceled whole save page job.
    727   if (download_) {
    728     download_->Cancel(false);
    729     FinalizeDownloadEntry();
    730   }
    731 }
    732 
    733 void SavePackage::CheckFinish() {
    734   if (in_process_count() || finished_)
    735     return;
    736 
    737   base::FilePath dir = (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML &&
    738                         saved_success_items_.size() > 1) ?
    739                         saved_main_directory_path_ : base::FilePath();
    740 
    741   // This vector contains the final names of all the successfully saved files
    742   // along with their save ids. It will be passed to SaveFileManager to do the
    743   // renaming job.
    744   FinalNameList final_names;
    745   for (SavedItemMap::iterator it = saved_success_items_.begin();
    746       it != saved_success_items_.end(); ++it)
    747     final_names.push_back(std::make_pair(it->first,
    748                                          it->second->full_path()));
    749 
    750   BrowserThread::PostTask(
    751       BrowserThread::FILE, FROM_HERE,
    752       base::Bind(&SaveFileManager::RenameAllFiles,
    753                  file_manager_,
    754                  final_names,
    755                  dir,
    756                  web_contents()->GetRenderProcessHost()->GetID(),
    757                  web_contents()->GetRenderViewHost()->GetRoutingID(),
    758                  id()));
    759 }
    760 
    761 // Successfully finished all items of this SavePackage.
    762 void SavePackage::Finish() {
    763   // User may cancel the job when we're moving files to the final directory.
    764   if (canceled())
    765     return;
    766 
    767   wait_state_ = SUCCESSFUL;
    768   finished_ = true;
    769 
    770   // Record finish.
    771   RecordSavePackageEvent(SAVE_PACKAGE_FINISHED);
    772 
    773   // Record any errors that occurred.
    774   if (wrote_to_completed_file_) {
    775     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_COMPLETED);
    776   }
    777 
    778   if (wrote_to_failed_file_) {
    779     RecordSavePackageEvent(SAVE_PACKAGE_WRITE_TO_FAILED);
    780   }
    781 
    782   // This vector contains the save ids of the save files which SaveFileManager
    783   // needs to remove from its save_file_map_.
    784   SaveIDList save_ids;
    785   for (SaveUrlItemMap::iterator it = saved_failed_items_.begin();
    786        it != saved_failed_items_.end(); ++it)
    787     save_ids.push_back(it->second->save_id());
    788 
    789   BrowserThread::PostTask(
    790       BrowserThread::FILE, FROM_HERE,
    791       base::Bind(&SaveFileManager::RemoveSavedFileFromFileMap,
    792                  file_manager_,
    793                  save_ids));
    794 
    795   if (download_) {
    796     // Hack to avoid touching download_ after user cancel.
    797     // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
    798     // with SavePackage flow.
    799     if (download_->GetState() == DownloadItem::IN_PROGRESS) {
    800       if (save_type_ != SAVE_PAGE_TYPE_AS_MHTML) {
    801         download_->DestinationUpdate(
    802             all_save_items_count_, CurrentSpeed(), std::string());
    803         download_->OnAllDataSaved(DownloadItem::kEmptyFileHash);
    804       }
    805       download_->MarkAsComplete();
    806     }
    807     FinalizeDownloadEntry();
    808   }
    809 }
    810 
    811 // Called for updating end state.
    812 void SavePackage::SaveFinished(int32 save_id, int64 size, bool is_success) {
    813   // Because we might have canceled this saving job before,
    814   // so we might not find corresponding SaveItem. Just ignore it.
    815   SaveItem* save_item = LookupItemInProcessBySaveId(save_id);
    816   if (!save_item)
    817     return;
    818 
    819   // Let SaveItem set end state.
    820   save_item->Finish(size, is_success);
    821   // Remove the associated save id and SavePackage.
    822   file_manager_->RemoveSaveFile(save_id, save_item->url(), this);
    823 
    824   PutInProgressItemToSavedMap(save_item);
    825 
    826   // Inform the DownloadItem to update UI.
    827   // We use the received bytes as number of saved files.
    828   // Hack to avoid touching download_ after user cancel.
    829   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
    830   // with SavePackage flow.
    831   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
    832     download_->DestinationUpdate(
    833         completed_count(), CurrentSpeed(), std::string());
    834   }
    835 
    836   if (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM &&
    837       save_item->url() == page_url_ && !save_item->received_bytes()) {
    838     // If size of main HTML page is 0, treat it as disk error.
    839     Cancel(false);
    840     return;
    841   }
    842 
    843   if (canceled()) {
    844     DCHECK(finished_);
    845     return;
    846   }
    847 
    848   // Continue processing the save page job.
    849   DoSavingProcess();
    850 
    851   // Check whether we can successfully finish whole job.
    852   CheckFinish();
    853 }
    854 
    855 // Sometimes, the net io will only call SaveFileManager::SaveFinished with
    856 // save id -1 when it encounters error. Since in this case, save id will be
    857 // -1, so we can only use URL to find which SaveItem is associated with
    858 // this error.
    859 // Saving an item failed. If it's a sub-resource, ignore it. If the error comes
    860 // from serializing HTML data, then cancel saving page.
    861 void SavePackage::SaveFailed(const GURL& save_url) {
    862   SaveUrlItemMap::iterator it = in_progress_items_.find(save_url.spec());
    863   if (it == in_progress_items_.end()) {
    864     NOTREACHED();  // Should not exist!
    865     return;
    866   }
    867   SaveItem* save_item = it->second;
    868 
    869   save_item->Finish(0, false);
    870 
    871   PutInProgressItemToSavedMap(save_item);
    872 
    873   // Inform the DownloadItem to update UI.
    874   // We use the received bytes as number of saved files.
    875   // Hack to avoid touching download_ after user cancel.
    876   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
    877   // with SavePackage flow.
    878   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS)) {
    879     download_->DestinationUpdate(
    880         completed_count(), CurrentSpeed(), std::string());
    881   }
    882 
    883   if ((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
    884       (save_type_ == SAVE_PAGE_TYPE_AS_MHTML) ||
    885       (save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM)) {
    886     // We got error when saving page. Treat it as disk error.
    887     Cancel(true);
    888   }
    889 
    890   if (canceled()) {
    891     DCHECK(finished_);
    892     return;
    893   }
    894 
    895   // Continue processing the save page job.
    896   DoSavingProcess();
    897 
    898   CheckFinish();
    899 }
    900 
    901 void SavePackage::SaveCanceled(SaveItem* save_item) {
    902   // Call the RemoveSaveFile in UI thread.
    903   file_manager_->RemoveSaveFile(save_item->save_id(),
    904                                 save_item->url(),
    905                                 this);
    906   if (save_item->save_id() != -1)
    907     BrowserThread::PostTask(
    908         BrowserThread::FILE, FROM_HERE,
    909         base::Bind(&SaveFileManager::CancelSave,
    910                    file_manager_,
    911                    save_item->save_id()));
    912 }
    913 
    914 // Initiate a saving job of a specific URL. We send the request to
    915 // SaveFileManager, which will dispatch it to different approach according to
    916 // the save source. Parameter process_all_remaining_items indicates whether
    917 // we need to save all remaining items.
    918 void SavePackage::SaveNextFile(bool process_all_remaining_items) {
    919   DCHECK(web_contents());
    920   DCHECK(waiting_item_queue_.size());
    921 
    922   do {
    923     // Pop SaveItem from waiting list.
    924     SaveItem* save_item = waiting_item_queue_.front();
    925     waiting_item_queue_.pop();
    926 
    927     // Add the item to in_progress_items_.
    928     SaveUrlItemMap::iterator it = in_progress_items_.find(
    929         save_item->url().spec());
    930     DCHECK(it == in_progress_items_.end());
    931     in_progress_items_[save_item->url().spec()] = save_item;
    932     save_item->Start();
    933     file_manager_->SaveURL(save_item->url(),
    934                            save_item->referrer(),
    935                            web_contents()->GetRenderProcessHost()->GetID(),
    936                            routing_id(),
    937                            save_item->save_source(),
    938                            save_item->full_path(),
    939                            web_contents()->
    940                                GetBrowserContext()->GetResourceContext(),
    941                            this);
    942   } while (process_all_remaining_items && waiting_item_queue_.size());
    943 }
    944 
    945 // Calculate the percentage of whole save page job.
    946 int SavePackage::PercentComplete() {
    947   if (!all_save_items_count_)
    948     return 0;
    949   else if (!in_process_count())
    950     return 100;
    951   else
    952     return completed_count() / all_save_items_count_;
    953 }
    954 
    955 int64 SavePackage::CurrentSpeed() const {
    956   base::TimeDelta diff = base::TimeTicks::Now() - start_tick_;
    957   int64 diff_ms = diff.InMilliseconds();
    958   return diff_ms == 0 ? 0 : completed_count() * 1000 / diff_ms;
    959 }
    960 
    961 // Continue processing the save page job after one SaveItem has been
    962 // finished.
    963 void SavePackage::DoSavingProcess() {
    964   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
    965     // We guarantee that images and JavaScripts must be downloaded first.
    966     // So when finishing all those sub-resources, we will know which
    967     // sub-resource's link can be replaced with local file path, which
    968     // sub-resource's link need to be replaced with absolute URL which
    969     // point to its internet address because it got error when saving its data.
    970 
    971     // Start a new SaveItem job if we still have job in waiting queue.
    972     if (waiting_item_queue_.size()) {
    973       DCHECK(wait_state_ == NET_FILES);
    974       SaveItem* save_item = waiting_item_queue_.front();
    975       if (save_item->save_source() != SaveFileCreateInfo::SAVE_FILE_FROM_DOM) {
    976         SaveNextFile(false);
    977       } else if (!in_process_count()) {
    978         // If there is no in-process SaveItem, it means all sub-resources
    979         // have been processed. Now we need to start serializing HTML DOM
    980         // for the current page to get the generated HTML data.
    981         wait_state_ = HTML_DATA;
    982         // All non-HTML resources have been finished, start all remaining
    983         // HTML files.
    984         SaveNextFile(true);
    985       }
    986     } else if (in_process_count()) {
    987       // Continue asking for HTML data.
    988       DCHECK(wait_state_ == HTML_DATA);
    989     }
    990   } else {
    991     // Save as HTML only or MHTML.
    992     DCHECK(wait_state_ == NET_FILES);
    993     DCHECK((save_type_ == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
    994            (save_type_ == SAVE_PAGE_TYPE_AS_MHTML));
    995     if (waiting_item_queue_.size()) {
    996       DCHECK(all_save_items_count_ == waiting_item_queue_.size());
    997       SaveNextFile(false);
    998     }
    999   }
   1000 }
   1001 
   1002 bool SavePackage::OnMessageReceived(const IPC::Message& message) {
   1003   bool handled = true;
   1004   IPC_BEGIN_MESSAGE_MAP(SavePackage, message)
   1005     IPC_MESSAGE_HANDLER(ViewHostMsg_SendCurrentPageAllSavableResourceLinks,
   1006                         OnReceivedSavableResourceLinksForCurrentPage)
   1007     IPC_MESSAGE_HANDLER(ViewHostMsg_SendSerializedHtmlData,
   1008                         OnReceivedSerializedHtmlData)
   1009     IPC_MESSAGE_UNHANDLED(handled = false)
   1010   IPC_END_MESSAGE_MAP()
   1011   return handled;
   1012 }
   1013 
   1014 // After finishing all SaveItems which need to get data from net.
   1015 // We collect all URLs which have local storage and send the
   1016 // map:(originalURL:currentLocalPath) to render process (backend).
   1017 // Then render process will serialize DOM and send data to us.
   1018 void SavePackage::GetSerializedHtmlDataForCurrentPageWithLocalLinks() {
   1019   if (wait_state_ != HTML_DATA)
   1020     return;
   1021   std::vector<GURL> saved_links;
   1022   std::vector<base::FilePath> saved_file_paths;
   1023   int successful_started_items_count = 0;
   1024 
   1025   // Collect all saved items which have local storage.
   1026   // First collect the status of all the resource files and check whether they
   1027   // have created local files although they have not been completely saved.
   1028   // If yes, the file can be saved. Otherwise, there is a disk error, so we
   1029   // need to cancel the page saving job.
   1030   for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
   1031        it != in_progress_items_.end(); ++it) {
   1032     DCHECK(it->second->save_source() ==
   1033            SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
   1034     if (it->second->has_final_name())
   1035       successful_started_items_count++;
   1036     saved_links.push_back(it->second->url());
   1037     saved_file_paths.push_back(it->second->file_name());
   1038   }
   1039 
   1040   // If not all file of HTML resource have been started, then wait.
   1041   if (successful_started_items_count != in_process_count())
   1042     return;
   1043 
   1044   // Collect all saved success items.
   1045   for (SavedItemMap::iterator it = saved_success_items_.begin();
   1046        it != saved_success_items_.end(); ++it) {
   1047     DCHECK(it->second->has_final_name());
   1048     saved_links.push_back(it->second->url());
   1049     saved_file_paths.push_back(it->second->file_name());
   1050   }
   1051 
   1052   // Get the relative directory name.
   1053   base::FilePath relative_dir_name = saved_main_directory_path_.BaseName();
   1054 
   1055   Send(new ViewMsg_GetSerializedHtmlDataForCurrentPageWithLocalLinks(
   1056       routing_id(), saved_links, saved_file_paths, relative_dir_name));
   1057 }
   1058 
   1059 // Process the serialized HTML content data of a specified web page
   1060 // retrieved from render process.
   1061 void SavePackage::OnReceivedSerializedHtmlData(const GURL& frame_url,
   1062                                                const std::string& data,
   1063                                                int32 status) {
   1064   WebPageSerializerClient::PageSerializationStatus flag =
   1065       static_cast<WebPageSerializerClient::PageSerializationStatus>(status);
   1066   // Check current state.
   1067   if (wait_state_ != HTML_DATA)
   1068     return;
   1069 
   1070   int id = contents_id();
   1071   // If the all frames are finished saving, we need to close the
   1072   // remaining SaveItems.
   1073   if (flag == WebPageSerializerClient::AllFramesAreFinished) {
   1074     for (SaveUrlItemMap::iterator it = in_progress_items_.begin();
   1075          it != in_progress_items_.end(); ++it) {
   1076       VLOG(20) << " " << __FUNCTION__ << "()"
   1077                << " save_id = " << it->second->save_id()
   1078                << " url = \"" << it->second->url().spec() << "\"";
   1079       BrowserThread::PostTask(
   1080           BrowserThread::FILE, FROM_HERE,
   1081           base::Bind(&SaveFileManager::SaveFinished,
   1082                      file_manager_,
   1083                      it->second->save_id(),
   1084                      it->second->url(),
   1085                      id,
   1086                      true));
   1087     }
   1088     return;
   1089   }
   1090 
   1091   SaveUrlItemMap::iterator it = in_progress_items_.find(frame_url.spec());
   1092   if (it == in_progress_items_.end()) {
   1093     for (SavedItemMap::iterator saved_it = saved_success_items_.begin();
   1094       saved_it != saved_success_items_.end(); ++saved_it) {
   1095       if (saved_it->second->url() == frame_url) {
   1096         wrote_to_completed_file_ = true;
   1097         break;
   1098       }
   1099     }
   1100 
   1101     it = saved_failed_items_.find(frame_url.spec());
   1102     if (it != saved_failed_items_.end())
   1103       wrote_to_failed_file_ = true;
   1104 
   1105     return;
   1106   }
   1107 
   1108   SaveItem* save_item = it->second;
   1109   DCHECK(save_item->save_source() == SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
   1110 
   1111   if (!data.empty()) {
   1112     // Prepare buffer for saving HTML data.
   1113     scoped_refptr<net::IOBuffer> new_data(new net::IOBuffer(data.size()));
   1114     memcpy(new_data->data(), data.data(), data.size());
   1115 
   1116     // Call write file functionality in file thread.
   1117     BrowserThread::PostTask(
   1118         BrowserThread::FILE, FROM_HERE,
   1119         base::Bind(&SaveFileManager::UpdateSaveProgress,
   1120                    file_manager_,
   1121                    save_item->save_id(),
   1122                    new_data,
   1123                    static_cast<int>(data.size())));
   1124   }
   1125 
   1126   // Current frame is completed saving, call finish in file thread.
   1127   if (flag == WebPageSerializerClient::CurrentFrameIsFinished) {
   1128     VLOG(20) << " " << __FUNCTION__ << "()"
   1129              << " save_id = " << save_item->save_id()
   1130              << " url = \"" << save_item->url().spec() << "\"";
   1131     BrowserThread::PostTask(
   1132         BrowserThread::FILE, FROM_HERE,
   1133         base::Bind(&SaveFileManager::SaveFinished,
   1134                    file_manager_,
   1135                    save_item->save_id(),
   1136                    save_item->url(),
   1137                    id,
   1138                    true));
   1139   }
   1140 }
   1141 
   1142 // Ask for all savable resource links from backend, include main frame and
   1143 // sub-frame.
   1144 void SavePackage::GetAllSavableResourceLinksForCurrentPage() {
   1145   if (wait_state_ != START_PROCESS)
   1146     return;
   1147 
   1148   wait_state_ = RESOURCES_LIST;
   1149   Send(new ViewMsg_GetAllSavableResourceLinksForCurrentPage(routing_id(),
   1150                                                             page_url_));
   1151 }
   1152 
   1153 // Give backend the lists which contain all resource links that have local
   1154 // storage, after which, render process will serialize DOM for generating
   1155 // HTML data.
   1156 void SavePackage::OnReceivedSavableResourceLinksForCurrentPage(
   1157     const std::vector<GURL>& resources_list,
   1158     const std::vector<Referrer>& referrers_list,
   1159     const std::vector<GURL>& frames_list) {
   1160   if (wait_state_ != RESOURCES_LIST)
   1161     return;
   1162 
   1163   if (resources_list.size() != referrers_list.size())
   1164     return;
   1165 
   1166   all_save_items_count_ = static_cast<int>(resources_list.size()) +
   1167                            static_cast<int>(frames_list.size());
   1168 
   1169   // We use total bytes as the total number of files we want to save.
   1170   // Hack to avoid touching download_ after user cancel.
   1171   // TODO(rdsmith/benjhayden): Integrate canceling on DownloadItem
   1172   // with SavePackage flow.
   1173   if (download_ && (download_->GetState() == DownloadItem::IN_PROGRESS))
   1174     download_->SetTotalBytes(all_save_items_count_);
   1175 
   1176   if (all_save_items_count_) {
   1177     // Put all sub-resources to wait list.
   1178     for (int i = 0; i < static_cast<int>(resources_list.size()); ++i) {
   1179       const GURL& u = resources_list[i];
   1180       DCHECK(u.is_valid());
   1181       SaveFileCreateInfo::SaveFileSource save_source = u.SchemeIsFile() ?
   1182           SaveFileCreateInfo::SAVE_FILE_FROM_FILE :
   1183           SaveFileCreateInfo::SAVE_FILE_FROM_NET;
   1184       SaveItem* save_item = new SaveItem(u, referrers_list[i],
   1185                                          this, save_source);
   1186       waiting_item_queue_.push(save_item);
   1187     }
   1188     // Put all HTML resources to wait list.
   1189     for (int i = 0; i < static_cast<int>(frames_list.size()); ++i) {
   1190       const GURL& u = frames_list[i];
   1191       DCHECK(u.is_valid());
   1192       SaveItem* save_item = new SaveItem(
   1193           u, Referrer(), this, SaveFileCreateInfo::SAVE_FILE_FROM_DOM);
   1194       waiting_item_queue_.push(save_item);
   1195     }
   1196     wait_state_ = NET_FILES;
   1197     DoSavingProcess();
   1198   } else {
   1199     // No resource files need to be saved, treat it as user cancel.
   1200     Cancel(true);
   1201   }
   1202 }
   1203 
   1204 base::FilePath SavePackage::GetSuggestedNameForSaveAs(
   1205     bool can_save_as_complete,
   1206     const std::string& contents_mime_type,
   1207     const std::string& accept_langs) {
   1208   base::FilePath name_with_proper_ext = base::FilePath::FromUTF16Unsafe(title_);
   1209 
   1210   // If the page's title matches its URL, use the URL. Try to use the last path
   1211   // component or if there is none, the domain as the file name.
   1212   // Normally we want to base the filename on the page title, or if it doesn't
   1213   // exist, on the URL. It's not easy to tell if the page has no title, because
   1214   // if the page has no title, WebContents::GetTitle() will return the page's
   1215   // URL (adjusted for display purposes). Therefore, we convert the "title"
   1216   // back to a URL, and if it matches the original page URL, we know the page
   1217   // had no title (or had a title equal to its URL, which is fine to treat
   1218   // similarly).
   1219   if (title_ == net::FormatUrl(page_url_, accept_langs)) {
   1220     std::string url_path;
   1221     if (!page_url_.SchemeIs(url::kDataScheme)) {
   1222       std::vector<std::string> url_parts;
   1223       base::SplitString(page_url_.path(), '/', &url_parts);
   1224       if (!url_parts.empty()) {
   1225         for (int i = static_cast<int>(url_parts.size()) - 1; i >= 0; --i) {
   1226           url_path = url_parts[i];
   1227           if (!url_path.empty())
   1228             break;
   1229         }
   1230       }
   1231       if (url_path.empty())
   1232         url_path = page_url_.host();
   1233     } else {
   1234       url_path = "dataurl";
   1235     }
   1236     name_with_proper_ext = base::FilePath::FromUTF8Unsafe(url_path);
   1237   }
   1238 
   1239   // Ask user for getting final saving name.
   1240   name_with_proper_ext = EnsureMimeExtension(name_with_proper_ext,
   1241                                              contents_mime_type);
   1242   // Adjust extension for complete types.
   1243   if (can_save_as_complete)
   1244     name_with_proper_ext = EnsureHtmlExtension(name_with_proper_ext);
   1245 
   1246   base::FilePath::StringType file_name = name_with_proper_ext.value();
   1247   base::i18n::ReplaceIllegalCharactersInPath(&file_name, ' ');
   1248   return base::FilePath(file_name);
   1249 }
   1250 
   1251 base::FilePath SavePackage::EnsureHtmlExtension(const base::FilePath& name) {
   1252   // If the file name doesn't have an extension suitable for HTML files,
   1253   // append one.
   1254   base::FilePath::StringType ext = name.Extension();
   1255   if (!ext.empty())
   1256     ext.erase(ext.begin());  // Erase preceding '.'.
   1257   std::string mime_type;
   1258   if (!net::GetMimeTypeFromExtension(ext, &mime_type) ||
   1259       !CanSaveAsComplete(mime_type)) {
   1260     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
   1261                           kDefaultHtmlExtension);
   1262   }
   1263   return name;
   1264 }
   1265 
   1266 base::FilePath SavePackage::EnsureMimeExtension(const base::FilePath& name,
   1267     const std::string& contents_mime_type) {
   1268   // Start extension at 1 to skip over period if non-empty.
   1269   base::FilePath::StringType ext = name.Extension().length() ?
   1270       name.Extension().substr(1) : name.Extension();
   1271   base::FilePath::StringType suggested_extension =
   1272       ExtensionForMimeType(contents_mime_type);
   1273   std::string mime_type;
   1274   if (!suggested_extension.empty() &&
   1275       !net::GetMimeTypeFromExtension(ext, &mime_type)) {
   1276     // Extension is absent or needs to be updated.
   1277     return base::FilePath(name.value() + FILE_PATH_LITERAL(".") +
   1278                     suggested_extension);
   1279   }
   1280   return name;
   1281 }
   1282 
   1283 const base::FilePath::CharType* SavePackage::ExtensionForMimeType(
   1284     const std::string& contents_mime_type) {
   1285   static const struct {
   1286     const base::FilePath::CharType *mime_type;
   1287     const base::FilePath::CharType *suggested_extension;
   1288   } extensions[] = {
   1289     { FILE_PATH_LITERAL("text/html"), kDefaultHtmlExtension },
   1290     { FILE_PATH_LITERAL("text/xml"), FILE_PATH_LITERAL("xml") },
   1291     { FILE_PATH_LITERAL("application/xhtml+xml"), FILE_PATH_LITERAL("xhtml") },
   1292     { FILE_PATH_LITERAL("text/plain"), FILE_PATH_LITERAL("txt") },
   1293     { FILE_PATH_LITERAL("text/css"), FILE_PATH_LITERAL("css") },
   1294   };
   1295 #if defined(OS_POSIX)
   1296   base::FilePath::StringType mime_type(contents_mime_type);
   1297 #elif defined(OS_WIN)
   1298   base::FilePath::StringType mime_type(base::UTF8ToWide(contents_mime_type));
   1299 #endif  // OS_WIN
   1300   for (uint32 i = 0; i < ARRAYSIZE_UNSAFE(extensions); ++i) {
   1301     if (mime_type == extensions[i].mime_type)
   1302       return extensions[i].suggested_extension;
   1303   }
   1304   return FILE_PATH_LITERAL("");
   1305 }
   1306 
   1307 void SavePackage::GetSaveInfo() {
   1308   // Can't use web_contents_ in the file thread, so get the data that we need
   1309   // before calling to it.
   1310   base::FilePath website_save_dir, download_save_dir;
   1311   bool skip_dir_check = false;
   1312   DCHECK(download_manager_);
   1313   if (download_manager_->GetDelegate()) {
   1314     download_manager_->GetDelegate()->GetSaveDir(
   1315         web_contents()->GetBrowserContext(), &website_save_dir,
   1316         &download_save_dir, &skip_dir_check);
   1317   }
   1318   std::string mime_type = web_contents()->GetContentsMimeType();
   1319   std::string accept_languages =
   1320       GetContentClient()->browser()->GetAcceptLangs(
   1321           web_contents()->GetBrowserContext());
   1322 
   1323   BrowserThread::PostTask(
   1324       BrowserThread::FILE, FROM_HERE,
   1325       base::Bind(&SavePackage::CreateDirectoryOnFileThread, this,
   1326           website_save_dir, download_save_dir, skip_dir_check,
   1327           mime_type, accept_languages));
   1328 }
   1329 
   1330 void SavePackage::CreateDirectoryOnFileThread(
   1331     const base::FilePath& website_save_dir,
   1332     const base::FilePath& download_save_dir,
   1333     bool skip_dir_check,
   1334     const std::string& mime_type,
   1335     const std::string& accept_langs) {
   1336   base::FilePath save_dir;
   1337   // If the default html/websites save folder doesn't exist...
   1338   // We skip the directory check for gdata directories on ChromeOS.
   1339   if (!skip_dir_check && !base::DirectoryExists(website_save_dir)) {
   1340     // If the default download dir doesn't exist, create it.
   1341     if (!base::DirectoryExists(download_save_dir)) {
   1342       bool res = base::CreateDirectory(download_save_dir);
   1343       DCHECK(res);
   1344     }
   1345     save_dir = download_save_dir;
   1346   } else {
   1347     // If it does exist, use the default save dir param.
   1348     save_dir = website_save_dir;
   1349   }
   1350 
   1351   bool can_save_as_complete = CanSaveAsComplete(mime_type);
   1352   base::FilePath suggested_filename = GetSuggestedNameForSaveAs(
   1353       can_save_as_complete, mime_type, accept_langs);
   1354   base::FilePath::StringType pure_file_name =
   1355       suggested_filename.RemoveExtension().BaseName().value();
   1356   base::FilePath::StringType file_name_ext = suggested_filename.Extension();
   1357 
   1358   // Need to make sure the suggested file name is not too long.
   1359   uint32 max_path = GetMaxPathLengthForDirectory(save_dir);
   1360 
   1361   if (GetSafePureFileName(save_dir, file_name_ext, max_path, &pure_file_name)) {
   1362     save_dir = save_dir.Append(pure_file_name + file_name_ext);
   1363   } else {
   1364     // Cannot create a shorter filename. This will cause the save as operation
   1365     // to fail unless the user pick a shorter name. Continuing even though it
   1366     // will fail because returning means no save as popup for the user, which
   1367     // is even more confusing. This case should be rare though.
   1368     save_dir = save_dir.Append(suggested_filename);
   1369   }
   1370 
   1371   BrowserThread::PostTask(
   1372       BrowserThread::UI, FROM_HERE,
   1373       base::Bind(&SavePackage::ContinueGetSaveInfo, this, save_dir,
   1374                  can_save_as_complete));
   1375 }
   1376 
   1377 void SavePackage::ContinueGetSaveInfo(const base::FilePath& suggested_path,
   1378                                       bool can_save_as_complete) {
   1379 
   1380   // The WebContents which owns this SavePackage may have disappeared during
   1381   // the UI->FILE->UI thread hop of
   1382   // GetSaveInfo->CreateDirectoryOnFileThread->ContinueGetSaveInfo.
   1383   if (!web_contents() || !download_manager_->GetDelegate())
   1384     return;
   1385 
   1386   base::FilePath::StringType default_extension;
   1387   if (can_save_as_complete)
   1388     default_extension = kDefaultHtmlExtension;
   1389 
   1390   download_manager_->GetDelegate()->ChooseSavePath(
   1391       web_contents(),
   1392       suggested_path,
   1393       default_extension,
   1394       can_save_as_complete,
   1395       base::Bind(&SavePackage::OnPathPicked, AsWeakPtr()));
   1396 }
   1397 
   1398 void SavePackage::OnPathPicked(
   1399     const base::FilePath& final_name,
   1400     SavePageType type,
   1401     const SavePackageDownloadCreatedCallback& download_created_callback) {
   1402   DCHECK((type == SAVE_PAGE_TYPE_AS_ONLY_HTML) ||
   1403          (type == SAVE_PAGE_TYPE_AS_MHTML) ||
   1404          (type == SAVE_PAGE_TYPE_AS_COMPLETE_HTML)) << type;
   1405   // Ensure the filename is safe.
   1406   saved_main_file_path_ = final_name;
   1407   // TODO(asanka): This call may block on IO and shouldn't be made
   1408   // from the UI thread.  See http://crbug.com/61827.
   1409   net::GenerateSafeFileName(web_contents()->GetContentsMimeType(), false,
   1410                             &saved_main_file_path_);
   1411 
   1412   saved_main_directory_path_ = saved_main_file_path_.DirName();
   1413   save_type_ = type;
   1414   if (save_type_ == SAVE_PAGE_TYPE_AS_COMPLETE_HTML) {
   1415     // Make new directory for saving complete file.
   1416     saved_main_directory_path_ = saved_main_directory_path_.Append(
   1417         saved_main_file_path_.RemoveExtension().BaseName().value() +
   1418         FILE_PATH_LITERAL("_files"));
   1419   }
   1420 
   1421   Init(download_created_callback);
   1422 }
   1423 
   1424 void SavePackage::StopObservation() {
   1425   DCHECK(download_);
   1426   DCHECK(download_manager_);
   1427 
   1428   download_->RemoveObserver(this);
   1429   download_ = NULL;
   1430   download_manager_ = NULL;
   1431 }
   1432 
   1433 void SavePackage::OnDownloadDestroyed(DownloadItem* download) {
   1434   StopObservation();
   1435 }
   1436 
   1437 void SavePackage::FinalizeDownloadEntry() {
   1438   DCHECK(download_);
   1439   DCHECK(download_manager_);
   1440 
   1441   download_manager_->OnSavePackageSuccessfullyFinished(download_);
   1442   StopObservation();
   1443 }
   1444 
   1445 }  // namespace content
   1446