1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "chrome/utility/importer/firefox_importer.h" 6 7 #include <set> 8 9 #include "base/file_util.h" 10 #include "base/files/file_enumerator.h" 11 #include "base/memory/scoped_ptr.h" 12 #include "base/message_loop/message_loop.h" 13 #include "base/stl_util.h" 14 #include "base/strings/string_util.h" 15 #include "base/strings/utf_string_conversions.h" 16 #include "chrome/common/importer/firefox_importer_utils.h" 17 #include "chrome/common/importer/firefox_importer_utils.h" 18 #include "chrome/common/importer/imported_bookmark_entry.h" 19 #include "chrome/common/importer/imported_favicon_usage.h" 20 #include "chrome/common/importer/importer_bridge.h" 21 #include "chrome/common/importer/importer_url_row.h" 22 #include "chrome/utility/importer/bookmark_html_reader.h" 23 #include "chrome/utility/importer/favicon_reencode.h" 24 #include "chrome/utility/importer/nss_decryptor.h" 25 #include "components/autofill/core/common/password_form.h" 26 #include "grit/generated_resources.h" 27 #include "sql/connection.h" 28 #include "sql/statement.h" 29 #include "url/gurl.h" 30 31 namespace { 32 33 // Original definition is in http://mxr.mozilla.org/firefox/source/toolkit/ 34 // components/places/public/nsINavBookmarksService.idl 35 enum BookmarkItemType { 36 TYPE_BOOKMARK = 1, 37 TYPE_FOLDER = 2, 38 TYPE_SEPARATOR = 3, 39 TYPE_DYNAMIC_CONTAINER = 4 40 }; 41 42 // Loads the default bookmarks in the Firefox installed at |app_path|, 43 // and stores their locations in |urls|. 44 void LoadDefaultBookmarks(const base::FilePath& app_path, 45 std::set<GURL>* urls) { 46 base::FilePath file = app_path.AppendASCII("defaults") 47 .AppendASCII("profile") 48 .AppendASCII("bookmarks.html"); 49 urls->clear(); 50 51 std::vector<ImportedBookmarkEntry> bookmarks; 52 bookmark_html_reader::ImportBookmarksFile(base::Callback<bool(void)>(), 53 base::Callback<bool(const GURL&)>(), 54 file, 55 &bookmarks, 56 NULL); 57 for (size_t i = 0; i < bookmarks.size(); ++i) 58 urls->insert(bookmarks[i].url); 59 } 60 61 // Returns true if |url| has a valid scheme that we allow to import. We 62 // filter out the URL with a unsupported scheme. 63 bool CanImportURL(const GURL& url) { 64 // The URL is not valid. 65 if (!url.is_valid()) 66 return false; 67 68 // Filter out the URLs with unsupported schemes. 69 const char* const kInvalidSchemes[] = {"wyciwyg", "place", "about", "chrome"}; 70 for (size_t i = 0; i < arraysize(kInvalidSchemes); ++i) { 71 if (url.SchemeIs(kInvalidSchemes[i])) 72 return false; 73 } 74 75 return true; 76 } 77 78 } // namespace 79 80 struct FirefoxImporter::BookmarkItem { 81 int parent; 82 int id; 83 GURL url; 84 base::string16 title; 85 BookmarkItemType type; 86 std::string keyword; 87 base::Time date_added; 88 int64 favicon; 89 bool empty_folder; 90 }; 91 92 FirefoxImporter::FirefoxImporter() { 93 } 94 95 FirefoxImporter::~FirefoxImporter() { 96 } 97 98 void FirefoxImporter::StartImport( 99 const importer::SourceProfile& source_profile, 100 uint16 items, 101 ImporterBridge* bridge) { 102 bridge_ = bridge; 103 source_path_ = source_profile.source_path; 104 app_path_ = source_profile.app_path; 105 106 #if defined(OS_POSIX) 107 locale_ = source_profile.locale; 108 #endif 109 110 // The order here is important! 111 bridge_->NotifyStarted(); 112 if ((items & importer::HOME_PAGE) && !cancelled()) { 113 bridge_->NotifyItemStarted(importer::HOME_PAGE); 114 ImportHomepage(); // Doesn't have a UI item. 115 bridge_->NotifyItemEnded(importer::HOME_PAGE); 116 } 117 118 // Note history should be imported before bookmarks because bookmark import 119 // will also import favicons and we store favicon for a URL only if the URL 120 // exist in history or bookmarks. 121 if ((items & importer::HISTORY) && !cancelled()) { 122 bridge_->NotifyItemStarted(importer::HISTORY); 123 ImportHistory(); 124 bridge_->NotifyItemEnded(importer::HISTORY); 125 } 126 127 if ((items & importer::FAVORITES) && !cancelled()) { 128 bridge_->NotifyItemStarted(importer::FAVORITES); 129 ImportBookmarks(); 130 bridge_->NotifyItemEnded(importer::FAVORITES); 131 } 132 if ((items & importer::SEARCH_ENGINES) && !cancelled()) { 133 bridge_->NotifyItemStarted(importer::SEARCH_ENGINES); 134 ImportSearchEngines(); 135 bridge_->NotifyItemEnded(importer::SEARCH_ENGINES); 136 } 137 if ((items & importer::PASSWORDS) && !cancelled()) { 138 bridge_->NotifyItemStarted(importer::PASSWORDS); 139 ImportPasswords(); 140 bridge_->NotifyItemEnded(importer::PASSWORDS); 141 } 142 bridge_->NotifyEnded(); 143 } 144 145 void FirefoxImporter::ImportHistory() { 146 base::FilePath file = source_path_.AppendASCII("places.sqlite"); 147 if (!base::PathExists(file)) 148 return; 149 150 sql::Connection db; 151 if (!db.Open(file)) 152 return; 153 154 // |visit_type| represent the transition type of URLs (typed, click, 155 // redirect, bookmark, etc.) We eliminate some URLs like sub-frames and 156 // redirects, since we don't want them to appear in history. 157 // Firefox transition types are defined in: 158 // toolkit/components/places/public/nsINavHistoryService.idl 159 const char* query = "SELECT h.url, h.title, h.visit_count, " 160 "h.hidden, h.typed, v.visit_date " 161 "FROM moz_places h JOIN moz_historyvisits v " 162 "ON h.id = v.place_id " 163 "WHERE v.visit_type <= 3"; 164 165 sql::Statement s(db.GetUniqueStatement(query)); 166 167 std::vector<ImporterURLRow> rows; 168 while (s.Step() && !cancelled()) { 169 GURL url(s.ColumnString(0)); 170 171 // Filter out unwanted URLs. 172 if (!CanImportURL(url)) 173 continue; 174 175 ImporterURLRow row(url); 176 row.title = s.ColumnString16(1); 177 row.visit_count = s.ColumnInt(2); 178 row.hidden = s.ColumnInt(3) == 1; 179 row.typed_count = s.ColumnInt(4); 180 row.last_visit = base::Time::FromTimeT(s.ColumnInt64(5)/1000000); 181 182 rows.push_back(row); 183 } 184 185 if (!rows.empty() && !cancelled()) 186 bridge_->SetHistoryItems(rows, importer::VISIT_SOURCE_FIREFOX_IMPORTED); 187 } 188 189 void FirefoxImporter::ImportBookmarks() { 190 base::FilePath file = source_path_.AppendASCII("places.sqlite"); 191 if (!base::PathExists(file)) 192 return; 193 194 sql::Connection db; 195 if (!db.Open(file)) 196 return; 197 198 // Get the bookmark folders that we are interested in. 199 int toolbar_folder_id = -1; 200 int menu_folder_id = -1; 201 int unsorted_folder_id = -1; 202 LoadRootNodeID(&db, &toolbar_folder_id, &menu_folder_id, &unsorted_folder_id); 203 204 // Load livemark IDs. 205 std::set<int> livemark_id; 206 LoadLivemarkIDs(&db, &livemark_id); 207 208 // Load the default bookmarks. 209 std::set<GURL> default_urls; 210 LoadDefaultBookmarks(app_path_, &default_urls); 211 212 BookmarkList list; 213 GetTopBookmarkFolder(&db, toolbar_folder_id, &list); 214 GetTopBookmarkFolder(&db, menu_folder_id, &list); 215 GetTopBookmarkFolder(&db, unsorted_folder_id, &list); 216 size_t count = list.size(); 217 for (size_t i = 0; i < count; ++i) 218 GetWholeBookmarkFolder(&db, &list, i, NULL); 219 220 std::vector<ImportedBookmarkEntry> bookmarks; 221 std::vector<importer::URLKeywordInfo> url_keywords; 222 FaviconMap favicon_map; 223 224 // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based 225 // keywords yet. We won't include them in the list. 226 std::set<int> post_keyword_ids; 227 const char* query = "SELECT b.id FROM moz_bookmarks b " 228 "INNER JOIN moz_items_annos ia ON ia.item_id = b.id " 229 "INNER JOIN moz_anno_attributes aa ON ia.anno_attribute_id = aa.id " 230 "WHERE aa.name = 'bookmarkProperties/POSTData'"; 231 sql::Statement s(db.GetUniqueStatement(query)); 232 233 if (!s.is_valid()) 234 return; 235 236 while (s.Step() && !cancelled()) 237 post_keyword_ids.insert(s.ColumnInt(0)); 238 239 for (size_t i = 0; i < list.size(); ++i) { 240 BookmarkItem* item = list[i]; 241 242 if (item->type == TYPE_FOLDER) { 243 // Folders are added implicitly on adding children, so we only explicitly 244 // add empty folders. 245 if (!item->empty_folder) 246 continue; 247 } else if (item->type == TYPE_BOOKMARK) { 248 // Import only valid bookmarks 249 if (!CanImportURL(item->url)) 250 continue; 251 } else { 252 continue; 253 } 254 255 // Skip the default bookmarks and unwanted URLs. 256 if (default_urls.find(item->url) != default_urls.end() || 257 post_keyword_ids.find(item->id) != post_keyword_ids.end()) 258 continue; 259 260 // Find the bookmark path by tracing their links to parent folders. 261 std::vector<base::string16> path; 262 BookmarkItem* child = item; 263 bool found_path = false; 264 bool is_in_toolbar = false; 265 while (child->parent >= 0) { 266 BookmarkItem* parent = list[child->parent]; 267 if (livemark_id.find(parent->id) != livemark_id.end()) { 268 // Don't import live bookmarks. 269 break; 270 } 271 272 if (parent->id != menu_folder_id) { 273 // To avoid excessive nesting, omit the name for the bookmarks menu 274 // folder. 275 path.insert(path.begin(), parent->title); 276 } 277 278 if (parent->id == toolbar_folder_id) 279 is_in_toolbar = true; 280 281 if (parent->id == toolbar_folder_id || 282 parent->id == menu_folder_id || 283 parent->id == unsorted_folder_id) { 284 // We've reached a root node, hooray! 285 found_path = true; 286 break; 287 } 288 289 child = parent; 290 } 291 292 if (!found_path) 293 continue; 294 295 ImportedBookmarkEntry entry; 296 entry.creation_time = item->date_added; 297 entry.title = item->title; 298 entry.url = item->url; 299 entry.path = path; 300 entry.in_toolbar = is_in_toolbar; 301 entry.is_folder = item->type == TYPE_FOLDER; 302 303 bookmarks.push_back(entry); 304 305 if (item->type == TYPE_BOOKMARK) { 306 if (item->favicon) 307 favicon_map[item->favicon].insert(item->url); 308 309 // This bookmark has a keyword, we should import it. 310 if (!item->keyword.empty() && item->url.is_valid()) { 311 importer::URLKeywordInfo url_keyword_info; 312 url_keyword_info.url = item->url; 313 url_keyword_info.keyword.assign(base::UTF8ToUTF16(item->keyword)); 314 url_keyword_info.display_name = item->title; 315 url_keywords.push_back(url_keyword_info); 316 } 317 } 318 } 319 320 STLDeleteElements(&list); 321 322 // Write into profile. 323 if (!bookmarks.empty() && !cancelled()) { 324 const base::string16& first_folder_name = 325 bridge_->GetLocalizedString(IDS_BOOKMARK_GROUP_FROM_FIREFOX); 326 bridge_->AddBookmarks(bookmarks, first_folder_name); 327 } 328 if (!url_keywords.empty() && !cancelled()) { 329 bridge_->SetKeywords(url_keywords, false); 330 } 331 if (!favicon_map.empty() && !cancelled()) { 332 std::vector<ImportedFaviconUsage> favicons; 333 LoadFavicons(&db, favicon_map, &favicons); 334 bridge_->SetFavicons(favicons); 335 } 336 } 337 338 void FirefoxImporter::ImportPasswords() { 339 // Initializes NSS3. 340 NSSDecryptor decryptor; 341 if (!decryptor.Init(source_path_, source_path_) && 342 !decryptor.Init(app_path_, source_path_)) { 343 return; 344 } 345 346 std::vector<autofill::PasswordForm> forms; 347 base::FilePath source_path = source_path_; 348 base::FilePath file = source_path.AppendASCII("signons.sqlite"); 349 if (base::PathExists(file)) { 350 // Since Firefox 3.1, passwords are in signons.sqlite db. 351 decryptor.ReadAndParseSignons(file, &forms); 352 } else { 353 // Firefox 3.0 uses signons3.txt to store the passwords. 354 file = source_path.AppendASCII("signons3.txt"); 355 if (!base::PathExists(file)) 356 file = source_path.AppendASCII("signons2.txt"); 357 358 std::string content; 359 base::ReadFileToString(file, &content); 360 decryptor.ParseSignons(content, &forms); 361 } 362 363 if (!cancelled()) { 364 for (size_t i = 0; i < forms.size(); ++i) { 365 bridge_->SetPasswordForm(forms[i]); 366 } 367 } 368 } 369 370 void FirefoxImporter::ImportSearchEngines() { 371 std::vector<std::string> search_engine_data; 372 GetSearchEnginesXMLData(&search_engine_data); 373 374 bridge_->SetFirefoxSearchEnginesXMLData(search_engine_data); 375 } 376 377 void FirefoxImporter::ImportHomepage() { 378 GURL home_page = GetHomepage(source_path_); 379 if (home_page.is_valid() && !IsDefaultHomepage(home_page, app_path_)) { 380 bridge_->AddHomePage(home_page); 381 } 382 } 383 384 void FirefoxImporter::GetSearchEnginesXMLData( 385 std::vector<std::string>* search_engine_data) { 386 // TODO(mpawlowski): This may no longer work, search engines are stored in 387 // search.json since Firefox 3.5, not in search.sqlite. XML definitions are 388 // still necessary. http://crbug.com/329175 389 base::FilePath file = source_path_.AppendASCII("search.sqlite"); 390 if (!base::PathExists(file)) 391 return; 392 393 sql::Connection db; 394 if (!db.Open(file)) 395 return; 396 397 const char* query = "SELECT engineid FROM engine_data " 398 "WHERE engineid NOT IN " 399 "(SELECT engineid FROM engine_data " 400 "WHERE name='hidden') " 401 "ORDER BY value ASC"; 402 403 sql::Statement s(db.GetUniqueStatement(query)); 404 if (!s.is_valid()) 405 return; 406 407 const base::FilePath searchplugins_path(FILE_PATH_LITERAL("searchplugins")); 408 // Search engine definitions are XMLs stored in two directories. Default 409 // engines are in the app directory (app_path_) and custom engines are 410 // in the profile directory (source_path_). 411 412 // Since Firefox 21, app_path_ engines are in 'browser' subdirectory: 413 base::FilePath app_path = 414 app_path_.AppendASCII("browser").Append(searchplugins_path); 415 if (!base::PathExists(app_path)) { 416 // This might be an older Firefox, try old location without the 'browser' 417 // path component: 418 app_path = app_path_.Append(searchplugins_path); 419 } 420 421 base::FilePath profile_path = source_path_.Append(searchplugins_path); 422 423 // Firefox doesn't store a search engine in its sqlite database unless the 424 // user has added a engine. So we get search engines from sqlite db as well 425 // as from the file system. 426 if (s.Step()) { 427 const std::string kAppPrefix("[app]/"); 428 const std::string kProfilePrefix("[profile]/"); 429 do { 430 base::FilePath file; 431 std::string engine(s.ColumnString(0)); 432 433 // The string contains [app]/<name>.xml or [profile]/<name>.xml where 434 // the [app] and [profile] need to be replaced with the actual app or 435 // profile path. 436 size_t index = engine.find(kAppPrefix); 437 if (index != std::string::npos) { 438 // Remove '[app]/'. 439 file = app_path.AppendASCII(engine.substr(index + kAppPrefix.length())); 440 } else if ((index = engine.find(kProfilePrefix)) != std::string::npos) { 441 // Remove '[profile]/'. 442 file = profile_path.AppendASCII( 443 engine.substr(index + kProfilePrefix.length())); 444 } else { 445 // Looks like absolute path to the file. 446 file = base::FilePath::FromUTF8Unsafe(engine); 447 } 448 std::string file_data; 449 base::ReadFileToString(file, &file_data); 450 search_engine_data->push_back(file_data); 451 } while (s.Step() && !cancelled()); 452 } 453 454 #if defined(OS_POSIX) 455 // Ubuntu-flavored Firefox supports locale-specific search engines via 456 // locale-named subdirectories. They fall back to en-US. 457 // See http://crbug.com/53899 458 // TODO(jshin): we need to make sure our locale code matches that of 459 // Firefox. 460 DCHECK(!locale_.empty()); 461 base::FilePath locale_app_path = app_path.AppendASCII(locale_); 462 base::FilePath default_locale_app_path = app_path.AppendASCII("en-US"); 463 if (base::DirectoryExists(locale_app_path)) 464 app_path = locale_app_path; 465 else if (base::DirectoryExists(default_locale_app_path)) 466 app_path = default_locale_app_path; 467 #endif 468 469 // Get search engine definition from file system. 470 base::FileEnumerator engines(app_path, false, base::FileEnumerator::FILES); 471 for (base::FilePath engine_path = engines.Next(); 472 !engine_path.value().empty(); engine_path = engines.Next()) { 473 std::string file_data; 474 base::ReadFileToString(file, &file_data); 475 search_engine_data->push_back(file_data); 476 } 477 } 478 479 void FirefoxImporter::LoadRootNodeID(sql::Connection* db, 480 int* toolbar_folder_id, 481 int* menu_folder_id, 482 int* unsorted_folder_id) { 483 static const char* kToolbarFolderName = "toolbar"; 484 static const char* kMenuFolderName = "menu"; 485 static const char* kUnsortedFolderName = "unfiled"; 486 487 const char* query = "SELECT root_name, folder_id FROM moz_bookmarks_roots"; 488 sql::Statement s(db->GetUniqueStatement(query)); 489 490 while (s.Step()) { 491 std::string folder = s.ColumnString(0); 492 int id = s.ColumnInt(1); 493 if (folder == kToolbarFolderName) 494 *toolbar_folder_id = id; 495 else if (folder == kMenuFolderName) 496 *menu_folder_id = id; 497 else if (folder == kUnsortedFolderName) 498 *unsorted_folder_id = id; 499 } 500 } 501 502 void FirefoxImporter::LoadLivemarkIDs(sql::Connection* db, 503 std::set<int>* livemark) { 504 static const char* kFeedAnnotation = "livemark/feedURI"; 505 livemark->clear(); 506 507 const char* query = "SELECT b.item_id " 508 "FROM moz_anno_attributes a " 509 "JOIN moz_items_annos b ON a.id = b.anno_attribute_id " 510 "WHERE a.name = ? "; 511 sql::Statement s(db->GetUniqueStatement(query)); 512 s.BindString(0, kFeedAnnotation); 513 514 while (s.Step() && !cancelled()) 515 livemark->insert(s.ColumnInt(0)); 516 } 517 518 void FirefoxImporter::GetTopBookmarkFolder(sql::Connection* db, 519 int folder_id, 520 BookmarkList* list) { 521 const char* query = "SELECT b.title " 522 "FROM moz_bookmarks b " 523 "WHERE b.type = 2 AND b.id = ? " 524 "ORDER BY b.position"; 525 sql::Statement s(db->GetUniqueStatement(query)); 526 s.BindInt(0, folder_id); 527 528 if (s.Step()) { 529 BookmarkItem* item = new BookmarkItem; 530 item->parent = -1; // The top level folder has no parent. 531 item->id = folder_id; 532 item->title = s.ColumnString16(0); 533 item->type = TYPE_FOLDER; 534 item->favicon = 0; 535 item->empty_folder = true; 536 list->push_back(item); 537 } 538 } 539 540 void FirefoxImporter::GetWholeBookmarkFolder(sql::Connection* db, 541 BookmarkList* list, 542 size_t position, 543 bool* empty_folder) { 544 if (position >= list->size()) { 545 NOTREACHED(); 546 return; 547 } 548 549 const char* query = "SELECT b.id, h.url, COALESCE(b.title, h.title), " 550 "b.type, k.keyword, b.dateAdded, h.favicon_id " 551 "FROM moz_bookmarks b " 552 "LEFT JOIN moz_places h ON b.fk = h.id " 553 "LEFT JOIN moz_keywords k ON k.id = b.keyword_id " 554 "WHERE b.type IN (1,2) AND b.parent = ? " 555 "ORDER BY b.position"; 556 sql::Statement s(db->GetUniqueStatement(query)); 557 s.BindInt(0, (*list)[position]->id); 558 559 BookmarkList temp_list; 560 while (s.Step()) { 561 BookmarkItem* item = new BookmarkItem; 562 item->parent = static_cast<int>(position); 563 item->id = s.ColumnInt(0); 564 item->url = GURL(s.ColumnString(1)); 565 item->title = s.ColumnString16(2); 566 item->type = static_cast<BookmarkItemType>(s.ColumnInt(3)); 567 item->keyword = s.ColumnString(4); 568 item->date_added = base::Time::FromTimeT(s.ColumnInt64(5)/1000000); 569 item->favicon = s.ColumnInt64(6); 570 item->empty_folder = true; 571 572 temp_list.push_back(item); 573 if (empty_folder != NULL) 574 *empty_folder = false; 575 } 576 577 // Appends all items to the list. 578 for (BookmarkList::iterator i = temp_list.begin(); 579 i != temp_list.end(); ++i) { 580 list->push_back(*i); 581 // Recursive add bookmarks in sub-folders. 582 if ((*i)->type == TYPE_FOLDER) 583 GetWholeBookmarkFolder(db, list, list->size() - 1, &(*i)->empty_folder); 584 } 585 } 586 587 void FirefoxImporter::LoadFavicons( 588 sql::Connection* db, 589 const FaviconMap& favicon_map, 590 std::vector<ImportedFaviconUsage>* favicons) { 591 const char* query = "SELECT url, data FROM moz_favicons WHERE id=?"; 592 sql::Statement s(db->GetUniqueStatement(query)); 593 594 if (!s.is_valid()) 595 return; 596 597 for (FaviconMap::const_iterator i = favicon_map.begin(); 598 i != favicon_map.end(); ++i) { 599 s.BindInt64(0, i->first); 600 if (s.Step()) { 601 ImportedFaviconUsage usage; 602 603 usage.favicon_url = GURL(s.ColumnString(0)); 604 if (!usage.favicon_url.is_valid()) 605 continue; // Don't bother importing favicons with invalid URLs. 606 607 std::vector<unsigned char> data; 608 s.ColumnBlobAsVector(1, &data); 609 if (data.empty()) 610 continue; // Data definitely invalid. 611 612 if (!importer::ReencodeFavicon(&data[0], data.size(), &usage.png_data)) 613 continue; // Unable to decode. 614 615 usage.urls = i->second; 616 favicons->push_back(usage); 617 } 618 s.Reset(true); 619 } 620 } 621