Home | History | Annotate | Download | only in importer
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/utility/importer/firefox_importer.h"
      6 
      7 #include <set>
      8 
      9 #include "base/file_util.h"
     10 #include "base/files/file_enumerator.h"
     11 #include "base/memory/scoped_ptr.h"
     12 #include "base/message_loop/message_loop.h"
     13 #include "base/stl_util.h"
     14 #include "base/strings/string_util.h"
     15 #include "base/strings/utf_string_conversions.h"
     16 #include "chrome/common/importer/firefox_importer_utils.h"
     17 #include "chrome/common/importer/firefox_importer_utils.h"
     18 #include "chrome/common/importer/imported_bookmark_entry.h"
     19 #include "chrome/common/importer/imported_favicon_usage.h"
     20 #include "chrome/common/importer/importer_bridge.h"
     21 #include "chrome/common/importer/importer_url_row.h"
     22 #include "chrome/utility/importer/bookmark_html_reader.h"
     23 #include "chrome/utility/importer/favicon_reencode.h"
     24 #include "chrome/utility/importer/nss_decryptor.h"
     25 #include "components/autofill/core/common/password_form.h"
     26 #include "grit/generated_resources.h"
     27 #include "sql/connection.h"
     28 #include "sql/statement.h"
     29 #include "url/gurl.h"
     30 
     31 namespace {
     32 
     33 // Original definition is in http://mxr.mozilla.org/firefox/source/toolkit/
     34 //  components/places/public/nsINavBookmarksService.idl
     35 enum BookmarkItemType {
     36   TYPE_BOOKMARK = 1,
     37   TYPE_FOLDER = 2,
     38   TYPE_SEPARATOR = 3,
     39   TYPE_DYNAMIC_CONTAINER = 4
     40 };
     41 
     42 // Loads the default bookmarks in the Firefox installed at |app_path|,
     43 // and stores their locations in |urls|.
     44 void LoadDefaultBookmarks(const base::FilePath& app_path,
     45                           std::set<GURL>* urls) {
     46   base::FilePath file = app_path.AppendASCII("defaults")
     47       .AppendASCII("profile")
     48       .AppendASCII("bookmarks.html");
     49   urls->clear();
     50 
     51   std::vector<ImportedBookmarkEntry> bookmarks;
     52   bookmark_html_reader::ImportBookmarksFile(base::Callback<bool(void)>(),
     53                                             base::Callback<bool(const GURL&)>(),
     54                                             file,
     55                                             &bookmarks,
     56                                             NULL);
     57   for (size_t i = 0; i < bookmarks.size(); ++i)
     58     urls->insert(bookmarks[i].url);
     59 }
     60 
     61 // Returns true if |url| has a valid scheme that we allow to import. We
     62 // filter out the URL with a unsupported scheme.
     63 bool CanImportURL(const GURL& url) {
     64   // The URL is not valid.
     65   if (!url.is_valid())
     66     return false;
     67 
     68   // Filter out the URLs with unsupported schemes.
     69   const char* const kInvalidSchemes[] = {"wyciwyg", "place", "about", "chrome"};
     70   for (size_t i = 0; i < arraysize(kInvalidSchemes); ++i) {
     71     if (url.SchemeIs(kInvalidSchemes[i]))
     72       return false;
     73   }
     74 
     75   return true;
     76 }
     77 
     78 }  // namespace
     79 
     80 struct FirefoxImporter::BookmarkItem {
     81   int parent;
     82   int id;
     83   GURL url;
     84   string16 title;
     85   BookmarkItemType type;
     86   std::string keyword;
     87   base::Time date_added;
     88   int64 favicon;
     89   bool empty_folder;
     90 };
     91 
     92 FirefoxImporter::FirefoxImporter() {
     93 }
     94 
     95 FirefoxImporter::~FirefoxImporter() {
     96 }
     97 
     98 void FirefoxImporter::StartImport(
     99     const importer::SourceProfile& source_profile,
    100     uint16 items,
    101     ImporterBridge* bridge) {
    102   bridge_ = bridge;
    103   source_path_ = source_profile.source_path;
    104   app_path_ = source_profile.app_path;
    105 
    106 #if defined(OS_POSIX)
    107   locale_ = source_profile.locale;
    108 #endif
    109 
    110   // The order here is important!
    111   bridge_->NotifyStarted();
    112   if ((items & importer::HOME_PAGE) && !cancelled()) {
    113     bridge_->NotifyItemStarted(importer::HOME_PAGE);
    114     ImportHomepage();  // Doesn't have a UI item.
    115     bridge_->NotifyItemEnded(importer::HOME_PAGE);
    116   }
    117 
    118   // Note history should be imported before bookmarks because bookmark import
    119   // will also import favicons and we store favicon for a URL only if the URL
    120   // exist in history or bookmarks.
    121   if ((items & importer::HISTORY) && !cancelled()) {
    122     bridge_->NotifyItemStarted(importer::HISTORY);
    123     ImportHistory();
    124     bridge_->NotifyItemEnded(importer::HISTORY);
    125   }
    126 
    127   if ((items & importer::FAVORITES) && !cancelled()) {
    128     bridge_->NotifyItemStarted(importer::FAVORITES);
    129     ImportBookmarks();
    130     bridge_->NotifyItemEnded(importer::FAVORITES);
    131   }
    132   if ((items & importer::SEARCH_ENGINES) && !cancelled()) {
    133     bridge_->NotifyItemStarted(importer::SEARCH_ENGINES);
    134     ImportSearchEngines();
    135     bridge_->NotifyItemEnded(importer::SEARCH_ENGINES);
    136   }
    137   if ((items & importer::PASSWORDS) && !cancelled()) {
    138     bridge_->NotifyItemStarted(importer::PASSWORDS);
    139     ImportPasswords();
    140     bridge_->NotifyItemEnded(importer::PASSWORDS);
    141   }
    142   bridge_->NotifyEnded();
    143 }
    144 
    145 void FirefoxImporter::ImportHistory() {
    146   base::FilePath file = source_path_.AppendASCII("places.sqlite");
    147   if (!base::PathExists(file))
    148     return;
    149 
    150   sql::Connection db;
    151   if (!db.Open(file))
    152     return;
    153 
    154   // |visit_type| represent the transition type of URLs (typed, click,
    155   // redirect, bookmark, etc.) We eliminate some URLs like sub-frames and
    156   // redirects, since we don't want them to appear in history.
    157   // Firefox transition types are defined in:
    158   //   toolkit/components/places/public/nsINavHistoryService.idl
    159   const char* query = "SELECT h.url, h.title, h.visit_count, "
    160                       "h.hidden, h.typed, v.visit_date "
    161                       "FROM moz_places h JOIN moz_historyvisits v "
    162                       "ON h.id = v.place_id "
    163                       "WHERE v.visit_type <= 3";
    164 
    165   sql::Statement s(db.GetUniqueStatement(query));
    166 
    167   std::vector<ImporterURLRow> rows;
    168   while (s.Step() && !cancelled()) {
    169     GURL url(s.ColumnString(0));
    170 
    171     // Filter out unwanted URLs.
    172     if (!CanImportURL(url))
    173       continue;
    174 
    175     ImporterURLRow row(url);
    176     row.title = s.ColumnString16(1);
    177     row.visit_count = s.ColumnInt(2);
    178     row.hidden = s.ColumnInt(3) == 1;
    179     row.typed_count = s.ColumnInt(4);
    180     row.last_visit = base::Time::FromTimeT(s.ColumnInt64(5)/1000000);
    181 
    182     rows.push_back(row);
    183   }
    184 
    185   if (!rows.empty() && !cancelled())
    186     bridge_->SetHistoryItems(rows, importer::VISIT_SOURCE_FIREFOX_IMPORTED);
    187 }
    188 
    189 void FirefoxImporter::ImportBookmarks() {
    190   base::FilePath file = source_path_.AppendASCII("places.sqlite");
    191   if (!base::PathExists(file))
    192     return;
    193 
    194   sql::Connection db;
    195   if (!db.Open(file))
    196     return;
    197 
    198   // Get the bookmark folders that we are interested in.
    199   int toolbar_folder_id = -1;
    200   int menu_folder_id = -1;
    201   int unsorted_folder_id = -1;
    202   LoadRootNodeID(&db, &toolbar_folder_id, &menu_folder_id, &unsorted_folder_id);
    203 
    204   // Load livemark IDs.
    205   std::set<int> livemark_id;
    206   LoadLivemarkIDs(&db, &livemark_id);
    207 
    208   // Load the default bookmarks.
    209   std::set<GURL> default_urls;
    210   LoadDefaultBookmarks(app_path_, &default_urls);
    211 
    212   BookmarkList list;
    213   GetTopBookmarkFolder(&db, toolbar_folder_id, &list);
    214   GetTopBookmarkFolder(&db, menu_folder_id, &list);
    215   GetTopBookmarkFolder(&db, unsorted_folder_id, &list);
    216   size_t count = list.size();
    217   for (size_t i = 0; i < count; ++i)
    218     GetWholeBookmarkFolder(&db, &list, i, NULL);
    219 
    220   std::vector<ImportedBookmarkEntry> bookmarks;
    221   std::vector<importer::URLKeywordInfo> url_keywords;
    222   FaviconMap favicon_map;
    223 
    224   // TODO(jcampan): http://b/issue?id=1196285 we do not support POST based
    225   //                keywords yet.  We won't include them in the list.
    226   std::set<int> post_keyword_ids;
    227   const char* query = "SELECT b.id FROM moz_bookmarks b "
    228       "INNER JOIN moz_items_annos ia ON ia.item_id = b.id "
    229       "INNER JOIN moz_anno_attributes aa ON ia.anno_attribute_id = aa.id "
    230       "WHERE aa.name = 'bookmarkProperties/POSTData'";
    231   sql::Statement s(db.GetUniqueStatement(query));
    232 
    233   if (!s.is_valid())
    234     return;
    235 
    236   while (s.Step() && !cancelled())
    237     post_keyword_ids.insert(s.ColumnInt(0));
    238 
    239   for (size_t i = 0; i < list.size(); ++i) {
    240     BookmarkItem* item = list[i];
    241 
    242     if (item->type == TYPE_FOLDER) {
    243       // Folders are added implicitly on adding children, so we only explicitly
    244       // add empty folders.
    245       if (!item->empty_folder)
    246         continue;
    247     } else if (item->type == TYPE_BOOKMARK) {
    248       // Import only valid bookmarks
    249       if (!CanImportURL(item->url))
    250         continue;
    251     } else {
    252       continue;
    253     }
    254 
    255     // Skip the default bookmarks and unwanted URLs.
    256     if (default_urls.find(item->url) != default_urls.end() ||
    257         post_keyword_ids.find(item->id) != post_keyword_ids.end())
    258       continue;
    259 
    260     // Find the bookmark path by tracing their links to parent folders.
    261     std::vector<string16> path;
    262     BookmarkItem* child = item;
    263     bool found_path = false;
    264     bool is_in_toolbar = false;
    265     while (child->parent >= 0) {
    266       BookmarkItem* parent = list[child->parent];
    267       if (livemark_id.find(parent->id) != livemark_id.end()) {
    268         // Don't import live bookmarks.
    269         break;
    270       }
    271 
    272       if (parent->id != menu_folder_id) {
    273         // To avoid excessive nesting, omit the name for the bookmarks menu
    274         // folder.
    275         path.insert(path.begin(), parent->title);
    276       }
    277 
    278       if (parent->id == toolbar_folder_id)
    279         is_in_toolbar = true;
    280 
    281       if (parent->id == toolbar_folder_id ||
    282           parent->id == menu_folder_id ||
    283           parent->id == unsorted_folder_id) {
    284         // We've reached a root node, hooray!
    285         found_path = true;
    286         break;
    287       }
    288 
    289       child = parent;
    290     }
    291 
    292     if (!found_path)
    293       continue;
    294 
    295     ImportedBookmarkEntry entry;
    296     entry.creation_time = item->date_added;
    297     entry.title = item->title;
    298     entry.url = item->url;
    299     entry.path = path;
    300     entry.in_toolbar = is_in_toolbar;
    301     entry.is_folder = item->type == TYPE_FOLDER;
    302 
    303     bookmarks.push_back(entry);
    304 
    305     if (item->type == TYPE_BOOKMARK) {
    306       if (item->favicon)
    307         favicon_map[item->favicon].insert(item->url);
    308 
    309       // This bookmark has a keyword, we should import it.
    310       if (!item->keyword.empty() && item->url.is_valid()) {
    311         importer::URLKeywordInfo url_keyword_info;
    312         url_keyword_info.url = item->url;
    313         url_keyword_info.keyword.assign(UTF8ToUTF16(item->keyword));
    314         url_keyword_info.display_name = item->title;
    315         url_keywords.push_back(url_keyword_info);
    316       }
    317     }
    318   }
    319 
    320   STLDeleteElements(&list);
    321 
    322   // Write into profile.
    323   if (!bookmarks.empty() && !cancelled()) {
    324     const string16& first_folder_name =
    325         bridge_->GetLocalizedString(IDS_BOOKMARK_GROUP_FROM_FIREFOX);
    326     bridge_->AddBookmarks(bookmarks, first_folder_name);
    327   }
    328   if (!url_keywords.empty() && !cancelled()) {
    329     bridge_->SetKeywords(url_keywords, false);
    330   }
    331   if (!favicon_map.empty() && !cancelled()) {
    332     std::vector<ImportedFaviconUsage> favicons;
    333     LoadFavicons(&db, favicon_map, &favicons);
    334     bridge_->SetFavicons(favicons);
    335   }
    336 }
    337 
    338 void FirefoxImporter::ImportPasswords() {
    339   // Initializes NSS3.
    340   NSSDecryptor decryptor;
    341   if (!decryptor.Init(source_path_, source_path_) &&
    342       !decryptor.Init(app_path_, source_path_)) {
    343     return;
    344   }
    345 
    346   std::vector<autofill::PasswordForm> forms;
    347   base::FilePath source_path = source_path_;
    348   base::FilePath file = source_path.AppendASCII("signons.sqlite");
    349   if (base::PathExists(file)) {
    350     // Since Firefox 3.1, passwords are in signons.sqlite db.
    351     decryptor.ReadAndParseSignons(file, &forms);
    352   } else {
    353     // Firefox 3.0 uses signons3.txt to store the passwords.
    354     file = source_path.AppendASCII("signons3.txt");
    355     if (!base::PathExists(file))
    356       file = source_path.AppendASCII("signons2.txt");
    357 
    358     std::string content;
    359     base::ReadFileToString(file, &content);
    360     decryptor.ParseSignons(content, &forms);
    361   }
    362 
    363   if (!cancelled()) {
    364     for (size_t i = 0; i < forms.size(); ++i) {
    365       bridge_->SetPasswordForm(forms[i]);
    366     }
    367   }
    368 }
    369 
    370 void FirefoxImporter::ImportSearchEngines() {
    371   std::vector<std::string> search_engine_data;
    372   GetSearchEnginesXMLData(&search_engine_data);
    373 
    374   bridge_->SetFirefoxSearchEnginesXMLData(search_engine_data);
    375 }
    376 
    377 void FirefoxImporter::ImportHomepage() {
    378   GURL home_page = GetHomepage(source_path_);
    379   if (home_page.is_valid() && !IsDefaultHomepage(home_page, app_path_)) {
    380     bridge_->AddHomePage(home_page);
    381   }
    382 }
    383 
    384 void FirefoxImporter::GetSearchEnginesXMLData(
    385     std::vector<std::string>* search_engine_data) {
    386   base::FilePath file = source_path_.AppendASCII("search.sqlite");
    387   if (!base::PathExists(file))
    388     return;
    389 
    390   sql::Connection db;
    391   if (!db.Open(file))
    392     return;
    393 
    394   const char* query = "SELECT engineid FROM engine_data "
    395                       "WHERE engineid NOT IN "
    396                       "(SELECT engineid FROM engine_data "
    397                       "WHERE name='hidden') "
    398                       "ORDER BY value ASC";
    399 
    400   sql::Statement s(db.GetUniqueStatement(query));
    401   if (!s.is_valid())
    402     return;
    403 
    404   base::FilePath app_path = app_path_.AppendASCII("searchplugins");
    405   base::FilePath profile_path = source_path_.AppendASCII("searchplugins");
    406 
    407   // Firefox doesn't store a search engine in its sqlite database unless the
    408   // user has added a engine. So we get search engines from sqlite db as well
    409   // as from the file system.
    410   if (s.Step()) {
    411     const std::string kAppPrefix("[app]/");
    412     const std::string kProfilePrefix("[profile]/");
    413     do {
    414       base::FilePath file;
    415       std::string engine(s.ColumnString(0));
    416 
    417       // The string contains [app]/<name>.xml or [profile]/<name>.xml where
    418       // the [app] and [profile] need to be replaced with the actual app or
    419       // profile path.
    420       size_t index = engine.find(kAppPrefix);
    421       if (index != std::string::npos) {
    422         // Remove '[app]/'.
    423         file = app_path.AppendASCII(engine.substr(index + kAppPrefix.length()));
    424       } else if ((index = engine.find(kProfilePrefix)) != std::string::npos) {
    425         // Remove '[profile]/'.
    426           file = profile_path.AppendASCII(
    427               engine.substr(index + kProfilePrefix.length()));
    428       } else {
    429         // Looks like absolute path to the file.
    430         file = base::FilePath::FromUTF8Unsafe(engine);
    431       }
    432       std::string file_data;
    433       base::ReadFileToString(file, &file_data);
    434       search_engine_data->push_back(file_data);
    435     } while (s.Step() && !cancelled());
    436   }
    437 
    438 #if defined(OS_POSIX)
    439   // Ubuntu-flavored Firefox supports locale-specific search engines via
    440   // locale-named subdirectories. They fall back to en-US.
    441   // See http://crbug.com/53899
    442   // TODO(jshin): we need to make sure our locale code matches that of
    443   // Firefox.
    444   DCHECK(!locale_.empty());
    445   base::FilePath locale_app_path = app_path.AppendASCII(locale_);
    446   base::FilePath default_locale_app_path = app_path.AppendASCII("en-US");
    447   if (base::DirectoryExists(locale_app_path))
    448     app_path = locale_app_path;
    449   else if (base::DirectoryExists(default_locale_app_path))
    450     app_path = default_locale_app_path;
    451 #endif
    452 
    453   // Get search engine definition from file system.
    454   base::FileEnumerator engines(app_path, false, base::FileEnumerator::FILES);
    455   for (base::FilePath engine_path = engines.Next();
    456        !engine_path.value().empty(); engine_path = engines.Next()) {
    457     std::string file_data;
    458     base::ReadFileToString(file, &file_data);
    459     search_engine_data->push_back(file_data);
    460   }
    461 }
    462 
    463 void FirefoxImporter::LoadRootNodeID(sql::Connection* db,
    464                                       int* toolbar_folder_id,
    465                                       int* menu_folder_id,
    466                                       int* unsorted_folder_id) {
    467   static const char* kToolbarFolderName = "toolbar";
    468   static const char* kMenuFolderName = "menu";
    469   static const char* kUnsortedFolderName = "unfiled";
    470 
    471   const char* query = "SELECT root_name, folder_id FROM moz_bookmarks_roots";
    472   sql::Statement s(db->GetUniqueStatement(query));
    473 
    474   while (s.Step()) {
    475     std::string folder = s.ColumnString(0);
    476     int id = s.ColumnInt(1);
    477     if (folder == kToolbarFolderName)
    478       *toolbar_folder_id = id;
    479     else if (folder == kMenuFolderName)
    480       *menu_folder_id = id;
    481     else if (folder == kUnsortedFolderName)
    482       *unsorted_folder_id = id;
    483   }
    484 }
    485 
    486 void FirefoxImporter::LoadLivemarkIDs(sql::Connection* db,
    487                                        std::set<int>* livemark) {
    488   static const char* kFeedAnnotation = "livemark/feedURI";
    489   livemark->clear();
    490 
    491   const char* query = "SELECT b.item_id "
    492                       "FROM moz_anno_attributes a "
    493                       "JOIN moz_items_annos b ON a.id = b.anno_attribute_id "
    494                       "WHERE a.name = ? ";
    495   sql::Statement s(db->GetUniqueStatement(query));
    496   s.BindString(0, kFeedAnnotation);
    497 
    498   while (s.Step() && !cancelled())
    499     livemark->insert(s.ColumnInt(0));
    500 }
    501 
    502 void FirefoxImporter::GetTopBookmarkFolder(sql::Connection* db,
    503                                             int folder_id,
    504                                             BookmarkList* list) {
    505   const char* query = "SELECT b.title "
    506                      "FROM moz_bookmarks b "
    507                      "WHERE b.type = 2 AND b.id = ? "
    508                      "ORDER BY b.position";
    509   sql::Statement s(db->GetUniqueStatement(query));
    510   s.BindInt(0, folder_id);
    511 
    512   if (s.Step()) {
    513     BookmarkItem* item = new BookmarkItem;
    514     item->parent = -1;  // The top level folder has no parent.
    515     item->id = folder_id;
    516     item->title = s.ColumnString16(0);
    517     item->type = TYPE_FOLDER;
    518     item->favicon = 0;
    519     item->empty_folder = true;
    520     list->push_back(item);
    521   }
    522 }
    523 
    524 void FirefoxImporter::GetWholeBookmarkFolder(sql::Connection* db,
    525                                               BookmarkList* list,
    526                                               size_t position,
    527                                               bool* empty_folder) {
    528   if (position >= list->size()) {
    529     NOTREACHED();
    530     return;
    531   }
    532 
    533   const char* query = "SELECT b.id, h.url, COALESCE(b.title, h.title), "
    534          "b.type, k.keyword, b.dateAdded, h.favicon_id "
    535          "FROM moz_bookmarks b "
    536          "LEFT JOIN moz_places h ON b.fk = h.id "
    537          "LEFT JOIN moz_keywords k ON k.id = b.keyword_id "
    538          "WHERE b.type IN (1,2) AND b.parent = ? "
    539          "ORDER BY b.position";
    540   sql::Statement s(db->GetUniqueStatement(query));
    541   s.BindInt(0, (*list)[position]->id);
    542 
    543   BookmarkList temp_list;
    544   while (s.Step()) {
    545     BookmarkItem* item = new BookmarkItem;
    546     item->parent = static_cast<int>(position);
    547     item->id = s.ColumnInt(0);
    548     item->url = GURL(s.ColumnString(1));
    549     item->title = s.ColumnString16(2);
    550     item->type = static_cast<BookmarkItemType>(s.ColumnInt(3));
    551     item->keyword = s.ColumnString(4);
    552     item->date_added = base::Time::FromTimeT(s.ColumnInt64(5)/1000000);
    553     item->favicon = s.ColumnInt64(6);
    554     item->empty_folder = true;
    555 
    556     temp_list.push_back(item);
    557     if (empty_folder != NULL)
    558       *empty_folder = false;
    559   }
    560 
    561   // Appends all items to the list.
    562   for (BookmarkList::iterator i = temp_list.begin();
    563        i != temp_list.end(); ++i) {
    564     list->push_back(*i);
    565     // Recursive add bookmarks in sub-folders.
    566     if ((*i)->type == TYPE_FOLDER)
    567       GetWholeBookmarkFolder(db, list, list->size() - 1, &(*i)->empty_folder);
    568   }
    569 }
    570 
    571 void FirefoxImporter::LoadFavicons(
    572     sql::Connection* db,
    573     const FaviconMap& favicon_map,
    574     std::vector<ImportedFaviconUsage>* favicons) {
    575   const char* query = "SELECT url, data FROM moz_favicons WHERE id=?";
    576   sql::Statement s(db->GetUniqueStatement(query));
    577 
    578   if (!s.is_valid())
    579     return;
    580 
    581   for (FaviconMap::const_iterator i = favicon_map.begin();
    582        i != favicon_map.end(); ++i) {
    583     s.BindInt64(0, i->first);
    584     if (s.Step()) {
    585       ImportedFaviconUsage usage;
    586 
    587       usage.favicon_url = GURL(s.ColumnString(0));
    588       if (!usage.favicon_url.is_valid())
    589         continue;  // Don't bother importing favicons with invalid URLs.
    590 
    591       std::vector<unsigned char> data;
    592       s.ColumnBlobAsVector(1, &data);
    593       if (data.empty())
    594         continue;  // Data definitely invalid.
    595 
    596       if (!importer::ReencodeFavicon(&data[0], data.size(), &usage.png_data))
    597         continue;  // Unable to decode.
    598 
    599       usage.urls = i->second;
    600       favicons->push_back(usage);
    601     }
    602     s.Reset(true);
    603   }
    604 }
    605