Home | History | Annotate | Download | only in download
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
      6 #define CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
      7 
      8 #include <queue>
      9 #include <set>
     10 #include <string>
     11 #include <vector>
     12 
     13 #include "base/basictypes.h"
     14 #include "base/containers/hash_tables.h"
     15 #include "base/files/file_path.h"
     16 #include "base/gtest_prod_util.h"
     17 #include "base/memory/ref_counted.h"
     18 #include "base/memory/weak_ptr.h"
     19 #include "base/time/time.h"
     20 #include "content/common/content_export.h"
     21 #include "content/public/browser/download_item.h"
     22 #include "content/public/browser/download_manager_delegate.h"
     23 #include "content/public/browser/save_page_type.h"
     24 #include "content/public/browser/web_contents_observer.h"
     25 #include "content/public/common/referrer.h"
     26 #include "net/base/net_errors.h"
     27 #include "url/gurl.h"
     28 
     29 class GURL;
     30 
     31 namespace content {
     32 class DownloadItemImpl;
     33 class DownloadManagerImpl;
     34 class WebContents;
     35 class SaveFileManager;
     36 class SaveItem;
     37 class SavePackage;
     38 struct SaveFileCreateInfo;
     39 
     40 // The SavePackage object manages the process of saving a page as only-html or
     41 // complete-html or MHTML and providing the information for displaying saving
     42 // status.  Saving page as only-html means means that we save web page to a
     43 // single HTML file regardless internal sub resources and sub frames.  Saving
     44 // page as complete-html page means we save not only the main html file the user
     45 // told it to save but also a directory for the auxiliary files such as all
     46 // sub-frame html files, image files, css files and js files.  Saving page as
     47 // MHTML means the same thing as complete-html, but it uses the MHTML format to
     48 // contain the html and all auxiliary files in a single text file.
     49 //
     50 // Each page saving job may include one or multiple files which need to be
     51 // saved. Each file is represented by a SaveItem, and all SaveItems are owned
     52 // by the SavePackage. SaveItems are created when a user initiates a page
     53 // saving job, and exist for the duration of one contents's life time.
     54 class CONTENT_EXPORT SavePackage
     55     : public base::RefCountedThreadSafe<SavePackage>,
     56       public WebContentsObserver,
     57       public DownloadItem::Observer,
     58       public base::SupportsWeakPtr<SavePackage> {
     59  public:
     60   enum WaitState {
     61     // State when created but not initialized.
     62     INITIALIZE = 0,
     63     // State when after initializing, but not yet saving.
     64     START_PROCESS,
     65     // Waiting on a list of savable resources from the backend.
     66     RESOURCES_LIST,
     67     // Waiting for data sent from net IO or from file system.
     68     NET_FILES,
     69     // Waiting for html DOM data sent from render process.
     70     HTML_DATA,
     71     // Saving page finished successfully.
     72     SUCCESSFUL,
     73     // Failed to save page.
     74     FAILED
     75   };
     76 
     77   static const base::FilePath::CharType kDefaultHtmlExtension[];
     78 
     79   // Constructor for user initiated page saving. This constructor results in a
     80   // SavePackage that will generate and sanitize a suggested name for the user
     81   // in the "Save As" dialog box.
     82   explicit SavePackage(WebContents* web_contents);
     83 
     84   // This contructor is used only for testing. We can bypass the file and
     85   // directory name generation / sanitization by providing well known paths
     86   // better suited for tests.
     87   SavePackage(WebContents* web_contents,
     88               SavePageType save_type,
     89               const base::FilePath& file_full_path,
     90               const base::FilePath& directory_full_path);
     91 
     92   // Initialize the SavePackage. Returns true if it initializes properly.  Need
     93   // to make sure that this method must be called in the UI thread because using
     94   // g_browser_process on a non-UI thread can cause crashes during shutdown.
     95   // |cb| will be called when the DownloadItem is created, before data is
     96   // written to disk.
     97   bool Init(const SavePackageDownloadCreatedCallback& cb);
     98 
     99   // Cancel all in progress request, might be called by user or internal error.
    100   void Cancel(bool user_action);
    101 
    102   void Finish();
    103 
    104   // Notifications sent from the file thread to the UI thread.
    105   void StartSave(const SaveFileCreateInfo* info);
    106   bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success);
    107   void SaveFinished(int32 save_id, int64 size, bool is_success);
    108   void SaveFailed(const GURL& save_url);
    109   void SaveCanceled(SaveItem* save_item);
    110 
    111   // Rough percent complete, -1 means we don't know (since we didn't receive a
    112   // total size).
    113   int PercentComplete();
    114 
    115   bool canceled() const { return user_canceled_ || disk_error_occurred_; }
    116   bool finished() const { return finished_; }
    117   SavePageType save_type() const { return save_type_; }
    118   int contents_id() const { return contents_id_; }
    119   int id() const { return unique_id_; }
    120 
    121   void GetSaveInfo();
    122 
    123  private:
    124   friend class base::RefCountedThreadSafe<SavePackage>;
    125 
    126   void InitWithDownloadItem(
    127       const SavePackageDownloadCreatedCallback& download_created_callback,
    128       DownloadItemImpl* item);
    129 
    130   // Callback for WebContents::GenerateMHTML().
    131   void OnMHTMLGenerated(int64 size);
    132 
    133   // For testing only.
    134   SavePackage(WebContents* web_contents,
    135               const base::FilePath& file_full_path,
    136               const base::FilePath& directory_full_path);
    137 
    138   virtual ~SavePackage();
    139 
    140   // Notes from Init() above applies here as well.
    141   void InternalInit();
    142 
    143   void Stop();
    144   void CheckFinish();
    145   void SaveNextFile(bool process_all_remainder_items);
    146   void DoSavingProcess();
    147 
    148   // WebContentsObserver implementation.
    149   virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE;
    150 
    151   // DownloadItem::Observer implementation.
    152   virtual void OnDownloadDestroyed(DownloadItem* download) OVERRIDE;
    153 
    154   // Update the download history of this item upon completion.
    155   void FinalizeDownloadEntry();
    156 
    157   // Detach from DownloadManager.
    158   void StopObservation();
    159 
    160   // Return max length of a path for a specific base directory.
    161   // This is needed on POSIX, which restrict the length of file names in
    162   // addition to the restriction on the length of path names.
    163   // |base_dir| is assumed to be a directory name with no trailing slash.
    164   static uint32 GetMaxPathLengthForDirectory(const base::FilePath& base_dir);
    165 
    166   static bool GetSafePureFileName(
    167       const base::FilePath& dir_path,
    168       const base::FilePath::StringType& file_name_ext,
    169       uint32 max_file_path_len,
    170       base::FilePath::StringType* pure_file_name);
    171 
    172   // Create a file name based on the response from the server.
    173   bool GenerateFileName(const std::string& disposition,
    174                         const GURL& url,
    175                         bool need_html_ext,
    176                         base::FilePath::StringType* generated_name);
    177 
    178   // Get all savable resource links from current web page, include main
    179   // frame and sub-frame.
    180   void GetAllSavableResourceLinksForCurrentPage();
    181   // Get html data by serializing all frames of current page with lists
    182   // which contain all resource links that have local copy.
    183   void GetSerializedHtmlDataForCurrentPageWithLocalLinks();
    184 
    185   // Look up SaveItem by save id from in progress map.
    186   SaveItem* LookupItemInProcessBySaveId(int32 save_id);
    187 
    188   // Remove SaveItem from in progress map and put it to saved map.
    189   void PutInProgressItemToSavedMap(SaveItem* save_item);
    190 
    191   // Retrieves the URL to be saved from the WebContents.
    192   GURL GetUrlToBeSaved();
    193 
    194   void CreateDirectoryOnFileThread(const base::FilePath& website_save_dir,
    195                                    const base::FilePath& download_save_dir,
    196                                    bool skip_dir_check,
    197                                    const std::string& mime_type,
    198                                    const std::string& accept_langs);
    199   void ContinueGetSaveInfo(const base::FilePath& suggested_path,
    200                            bool can_save_as_complete);
    201   void OnPathPicked(
    202       const base::FilePath& final_name,
    203       SavePageType type,
    204       const SavePackageDownloadCreatedCallback& cb);
    205   void OnReceivedSavableResourceLinksForCurrentPage(
    206       const std::vector<GURL>& resources_list,
    207       const std::vector<Referrer>& referrers_list,
    208       const std::vector<GURL>& frames_list);
    209 
    210   void OnReceivedSerializedHtmlData(const GURL& frame_url,
    211                                     const std::string& data,
    212                                     int32 status);
    213 
    214   typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap;
    215   // in_progress_items_ is map of all saving job in in-progress state.
    216   SaveUrlItemMap in_progress_items_;
    217   // saved_failed_items_ is map of all saving job which are failed.
    218   SaveUrlItemMap saved_failed_items_;
    219 
    220   // The number of in process SaveItems.
    221   int in_process_count() const {
    222     return static_cast<int>(in_progress_items_.size());
    223   }
    224 
    225   // The number of all SaveItems which have completed, including success items
    226   // and failed items.
    227   int completed_count() const {
    228     return static_cast<int>(saved_success_items_.size() +
    229                             saved_failed_items_.size());
    230   }
    231 
    232   // The current speed in files per second. This is used to update the
    233   // DownloadItem associated to this SavePackage. The files per second is
    234   // presented by the DownloadItem to the UI as bytes per second, which is
    235   // not correct but matches the way the total and received number of files is
    236   // presented as the total and received bytes.
    237   int64 CurrentSpeed() const;
    238 
    239   // Helper function for preparing suggested name for the SaveAs Dialog. The
    240   // suggested name is determined by the web document's title.
    241   base::FilePath GetSuggestedNameForSaveAs(
    242       bool can_save_as_complete,
    243       const std::string& contents_mime_type,
    244       const std::string& accept_langs);
    245 
    246   // Ensures that the file name has a proper extension for HTML by adding ".htm"
    247   // if necessary.
    248   static base::FilePath EnsureHtmlExtension(const base::FilePath& name);
    249 
    250   // Ensures that the file name has a proper extension for supported formats
    251   // if necessary.
    252   static base::FilePath EnsureMimeExtension(const base::FilePath& name,
    253       const std::string& contents_mime_type);
    254 
    255   // Returns extension for supported MIME types (for example, for "text/plain"
    256   // it returns "txt").
    257   static const base::FilePath::CharType* ExtensionForMimeType(
    258       const std::string& contents_mime_type);
    259 
    260   typedef std::queue<SaveItem*> SaveItemQueue;
    261   // A queue for items we are about to start saving.
    262   SaveItemQueue waiting_item_queue_;
    263 
    264   typedef base::hash_map<int32, SaveItem*> SavedItemMap;
    265   // saved_success_items_ is map of all saving job which are successfully saved.
    266   SavedItemMap saved_success_items_;
    267 
    268   // Non-owning pointer for handling file writing on the file thread.
    269   SaveFileManager* file_manager_;
    270 
    271   // DownloadManager owns the DownloadItem and handles history and UI.
    272   DownloadManagerImpl* download_manager_;
    273   DownloadItemImpl* download_;
    274 
    275   // The URL of the page the user wants to save.
    276   GURL page_url_;
    277   base::FilePath saved_main_file_path_;
    278   base::FilePath saved_main_directory_path_;
    279 
    280   // The title of the page the user wants to save.
    281   base::string16 title_;
    282 
    283   // Used to calculate package download speed (in files per second).
    284   base::TimeTicks start_tick_;
    285 
    286   // Indicates whether the actual saving job is finishing or not.
    287   bool finished_;
    288 
    289   // Indicates whether a call to Finish() has been scheduled.
    290   bool mhtml_finishing_;
    291 
    292   // Indicates whether user canceled the saving job.
    293   bool user_canceled_;
    294 
    295   // Indicates whether user get disk error.
    296   bool disk_error_occurred_;
    297 
    298   // Type about saving page as only-html or complete-html.
    299   SavePageType save_type_;
    300 
    301   // Number of all need to be saved resources.
    302   size_t all_save_items_count_;
    303 
    304   typedef std::set<base::FilePath::StringType,
    305                    bool (*)(const base::FilePath::StringType&,
    306                             const base::FilePath::StringType&)> FileNameSet;
    307   // This set is used to eliminate duplicated file names in saving directory.
    308   FileNameSet file_name_set_;
    309 
    310   typedef base::hash_map<base::FilePath::StringType, uint32> FileNameCountMap;
    311   // This map is used to track serial number for specified filename.
    312   FileNameCountMap file_name_count_map_;
    313 
    314   // Indicates current waiting state when SavePackage try to get something
    315   // from outside.
    316   WaitState wait_state_;
    317 
    318   // Since for one contents, it can only have one SavePackage in same time.
    319   // Now we actually use render_process_id as the contents's unique id.
    320   const int contents_id_;
    321 
    322   // Unique ID for this SavePackage.
    323   const int unique_id_;
    324 
    325   // Variables to record errors that happened so we can record them via
    326   // UMA statistics.
    327   bool wrote_to_completed_file_;
    328   bool wrote_to_failed_file_;
    329 
    330   friend class SavePackageTest;
    331   FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames);
    332   FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename);
    333 
    334   DISALLOW_COPY_AND_ASSIGN(SavePackage);
    335 };
    336 
    337 }  // namespace content
    338 
    339 #endif  // CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_
    340