1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 6 #define CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 7 8 #include <queue> 9 #include <set> 10 #include <string> 11 #include <vector> 12 13 #include "base/basictypes.h" 14 #include "base/containers/hash_tables.h" 15 #include "base/files/file_path.h" 16 #include "base/gtest_prod_util.h" 17 #include "base/memory/ref_counted.h" 18 #include "base/memory/weak_ptr.h" 19 #include "base/time/time.h" 20 #include "content/common/content_export.h" 21 #include "content/public/browser/download_item.h" 22 #include "content/public/browser/download_manager_delegate.h" 23 #include "content/public/browser/save_page_type.h" 24 #include "content/public/browser/web_contents_observer.h" 25 #include "content/public/common/referrer.h" 26 #include "net/base/net_errors.h" 27 #include "url/gurl.h" 28 29 class GURL; 30 31 namespace content { 32 class DownloadItemImpl; 33 class DownloadManagerImpl; 34 class WebContents; 35 class SaveFileManager; 36 class SaveItem; 37 class SavePackage; 38 struct SaveFileCreateInfo; 39 40 // The SavePackage object manages the process of saving a page as only-html or 41 // complete-html or MHTML and providing the information for displaying saving 42 // status. Saving page as only-html means means that we save web page to a 43 // single HTML file regardless internal sub resources and sub frames. Saving 44 // page as complete-html page means we save not only the main html file the user 45 // told it to save but also a directory for the auxiliary files such as all 46 // sub-frame html files, image files, css files and js files. Saving page as 47 // MHTML means the same thing as complete-html, but it uses the MHTML format to 48 // contain the html and all auxiliary files in a single text file. 49 // 50 // Each page saving job may include one or multiple files which need to be 51 // saved. Each file is represented by a SaveItem, and all SaveItems are owned 52 // by the SavePackage. SaveItems are created when a user initiates a page 53 // saving job, and exist for the duration of one contents's life time. 54 class CONTENT_EXPORT SavePackage 55 : public base::RefCountedThreadSafe<SavePackage>, 56 public WebContentsObserver, 57 public DownloadItem::Observer, 58 public base::SupportsWeakPtr<SavePackage> { 59 public: 60 enum WaitState { 61 // State when created but not initialized. 62 INITIALIZE = 0, 63 // State when after initializing, but not yet saving. 64 START_PROCESS, 65 // Waiting on a list of savable resources from the backend. 66 RESOURCES_LIST, 67 // Waiting for data sent from net IO or from file system. 68 NET_FILES, 69 // Waiting for html DOM data sent from render process. 70 HTML_DATA, 71 // Saving page finished successfully. 72 SUCCESSFUL, 73 // Failed to save page. 74 FAILED 75 }; 76 77 static const base::FilePath::CharType kDefaultHtmlExtension[]; 78 79 // Constructor for user initiated page saving. This constructor results in a 80 // SavePackage that will generate and sanitize a suggested name for the user 81 // in the "Save As" dialog box. 82 explicit SavePackage(WebContents* web_contents); 83 84 // This contructor is used only for testing. We can bypass the file and 85 // directory name generation / sanitization by providing well known paths 86 // better suited for tests. 87 SavePackage(WebContents* web_contents, 88 SavePageType save_type, 89 const base::FilePath& file_full_path, 90 const base::FilePath& directory_full_path); 91 92 // Initialize the SavePackage. Returns true if it initializes properly. Need 93 // to make sure that this method must be called in the UI thread because using 94 // g_browser_process on a non-UI thread can cause crashes during shutdown. 95 // |cb| will be called when the DownloadItem is created, before data is 96 // written to disk. 97 bool Init(const SavePackageDownloadCreatedCallback& cb); 98 99 // Cancel all in progress request, might be called by user or internal error. 100 void Cancel(bool user_action); 101 102 void Finish(); 103 104 // Notifications sent from the file thread to the UI thread. 105 void StartSave(const SaveFileCreateInfo* info); 106 bool UpdateSaveProgress(int32 save_id, int64 size, bool write_success); 107 void SaveFinished(int32 save_id, int64 size, bool is_success); 108 void SaveFailed(const GURL& save_url); 109 void SaveCanceled(SaveItem* save_item); 110 111 // Rough percent complete, -1 means we don't know (since we didn't receive a 112 // total size). 113 int PercentComplete(); 114 115 bool canceled() const { return user_canceled_ || disk_error_occurred_; } 116 bool finished() const { return finished_; } 117 SavePageType save_type() const { return save_type_; } 118 int contents_id() const { return contents_id_; } 119 int id() const { return unique_id_; } 120 121 void GetSaveInfo(); 122 123 private: 124 friend class base::RefCountedThreadSafe<SavePackage>; 125 126 void InitWithDownloadItem( 127 const SavePackageDownloadCreatedCallback& download_created_callback, 128 DownloadItemImpl* item); 129 130 // Callback for WebContents::GenerateMHTML(). 131 void OnMHTMLGenerated(int64 size); 132 133 // For testing only. 134 SavePackage(WebContents* web_contents, 135 const base::FilePath& file_full_path, 136 const base::FilePath& directory_full_path); 137 138 virtual ~SavePackage(); 139 140 // Notes from Init() above applies here as well. 141 void InternalInit(); 142 143 void Stop(); 144 void CheckFinish(); 145 void SaveNextFile(bool process_all_remainder_items); 146 void DoSavingProcess(); 147 148 // WebContentsObserver implementation. 149 virtual bool OnMessageReceived(const IPC::Message& message) OVERRIDE; 150 151 // DownloadItem::Observer implementation. 152 virtual void OnDownloadDestroyed(DownloadItem* download) OVERRIDE; 153 154 // Update the download history of this item upon completion. 155 void FinalizeDownloadEntry(); 156 157 // Detach from DownloadManager. 158 void StopObservation(); 159 160 // Return max length of a path for a specific base directory. 161 // This is needed on POSIX, which restrict the length of file names in 162 // addition to the restriction on the length of path names. 163 // |base_dir| is assumed to be a directory name with no trailing slash. 164 static uint32 GetMaxPathLengthForDirectory(const base::FilePath& base_dir); 165 166 static bool GetSafePureFileName( 167 const base::FilePath& dir_path, 168 const base::FilePath::StringType& file_name_ext, 169 uint32 max_file_path_len, 170 base::FilePath::StringType* pure_file_name); 171 172 // Create a file name based on the response from the server. 173 bool GenerateFileName(const std::string& disposition, 174 const GURL& url, 175 bool need_html_ext, 176 base::FilePath::StringType* generated_name); 177 178 // Get all savable resource links from current web page, include main 179 // frame and sub-frame. 180 void GetAllSavableResourceLinksForCurrentPage(); 181 // Get html data by serializing all frames of current page with lists 182 // which contain all resource links that have local copy. 183 void GetSerializedHtmlDataForCurrentPageWithLocalLinks(); 184 185 // Look up SaveItem by save id from in progress map. 186 SaveItem* LookupItemInProcessBySaveId(int32 save_id); 187 188 // Remove SaveItem from in progress map and put it to saved map. 189 void PutInProgressItemToSavedMap(SaveItem* save_item); 190 191 // Retrieves the URL to be saved from the WebContents. 192 GURL GetUrlToBeSaved(); 193 194 void CreateDirectoryOnFileThread(const base::FilePath& website_save_dir, 195 const base::FilePath& download_save_dir, 196 bool skip_dir_check, 197 const std::string& mime_type, 198 const std::string& accept_langs); 199 void ContinueGetSaveInfo(const base::FilePath& suggested_path, 200 bool can_save_as_complete); 201 void OnPathPicked( 202 const base::FilePath& final_name, 203 SavePageType type, 204 const SavePackageDownloadCreatedCallback& cb); 205 void OnReceivedSavableResourceLinksForCurrentPage( 206 const std::vector<GURL>& resources_list, 207 const std::vector<Referrer>& referrers_list, 208 const std::vector<GURL>& frames_list); 209 210 void OnReceivedSerializedHtmlData(const GURL& frame_url, 211 const std::string& data, 212 int32 status); 213 214 typedef base::hash_map<std::string, SaveItem*> SaveUrlItemMap; 215 // in_progress_items_ is map of all saving job in in-progress state. 216 SaveUrlItemMap in_progress_items_; 217 // saved_failed_items_ is map of all saving job which are failed. 218 SaveUrlItemMap saved_failed_items_; 219 220 // The number of in process SaveItems. 221 int in_process_count() const { 222 return static_cast<int>(in_progress_items_.size()); 223 } 224 225 // The number of all SaveItems which have completed, including success items 226 // and failed items. 227 int completed_count() const { 228 return static_cast<int>(saved_success_items_.size() + 229 saved_failed_items_.size()); 230 } 231 232 // The current speed in files per second. This is used to update the 233 // DownloadItem associated to this SavePackage. The files per second is 234 // presented by the DownloadItem to the UI as bytes per second, which is 235 // not correct but matches the way the total and received number of files is 236 // presented as the total and received bytes. 237 int64 CurrentSpeed() const; 238 239 // Helper function for preparing suggested name for the SaveAs Dialog. The 240 // suggested name is determined by the web document's title. 241 base::FilePath GetSuggestedNameForSaveAs( 242 bool can_save_as_complete, 243 const std::string& contents_mime_type, 244 const std::string& accept_langs); 245 246 // Ensures that the file name has a proper extension for HTML by adding ".htm" 247 // if necessary. 248 static base::FilePath EnsureHtmlExtension(const base::FilePath& name); 249 250 // Ensures that the file name has a proper extension for supported formats 251 // if necessary. 252 static base::FilePath EnsureMimeExtension(const base::FilePath& name, 253 const std::string& contents_mime_type); 254 255 // Returns extension for supported MIME types (for example, for "text/plain" 256 // it returns "txt"). 257 static const base::FilePath::CharType* ExtensionForMimeType( 258 const std::string& contents_mime_type); 259 260 typedef std::queue<SaveItem*> SaveItemQueue; 261 // A queue for items we are about to start saving. 262 SaveItemQueue waiting_item_queue_; 263 264 typedef base::hash_map<int32, SaveItem*> SavedItemMap; 265 // saved_success_items_ is map of all saving job which are successfully saved. 266 SavedItemMap saved_success_items_; 267 268 // Non-owning pointer for handling file writing on the file thread. 269 SaveFileManager* file_manager_; 270 271 // DownloadManager owns the DownloadItem and handles history and UI. 272 DownloadManagerImpl* download_manager_; 273 DownloadItemImpl* download_; 274 275 // The URL of the page the user wants to save. 276 GURL page_url_; 277 base::FilePath saved_main_file_path_; 278 base::FilePath saved_main_directory_path_; 279 280 // The title of the page the user wants to save. 281 base::string16 title_; 282 283 // Used to calculate package download speed (in files per second). 284 base::TimeTicks start_tick_; 285 286 // Indicates whether the actual saving job is finishing or not. 287 bool finished_; 288 289 // Indicates whether a call to Finish() has been scheduled. 290 bool mhtml_finishing_; 291 292 // Indicates whether user canceled the saving job. 293 bool user_canceled_; 294 295 // Indicates whether user get disk error. 296 bool disk_error_occurred_; 297 298 // Type about saving page as only-html or complete-html. 299 SavePageType save_type_; 300 301 // Number of all need to be saved resources. 302 size_t all_save_items_count_; 303 304 typedef std::set<base::FilePath::StringType, 305 bool (*)(const base::FilePath::StringType&, 306 const base::FilePath::StringType&)> FileNameSet; 307 // This set is used to eliminate duplicated file names in saving directory. 308 FileNameSet file_name_set_; 309 310 typedef base::hash_map<base::FilePath::StringType, uint32> FileNameCountMap; 311 // This map is used to track serial number for specified filename. 312 FileNameCountMap file_name_count_map_; 313 314 // Indicates current waiting state when SavePackage try to get something 315 // from outside. 316 WaitState wait_state_; 317 318 // Since for one contents, it can only have one SavePackage in same time. 319 // Now we actually use render_process_id as the contents's unique id. 320 const int contents_id_; 321 322 // Unique ID for this SavePackage. 323 const int unique_id_; 324 325 // Variables to record errors that happened so we can record them via 326 // UMA statistics. 327 bool wrote_to_completed_file_; 328 bool wrote_to_failed_file_; 329 330 friend class SavePackageTest; 331 FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestSuggestedSaveNames); 332 FRIEND_TEST_ALL_PREFIXES(SavePackageTest, TestLongSafePureFilename); 333 334 DISALLOW_COPY_AND_ASSIGN(SavePackage); 335 }; 336 337 } // namespace content 338 339 #endif // CONTENT_BROWSER_DOWNLOAD_SAVE_PACKAGE_H_ 340