Home | History | Annotate | Download | only in importer
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/utility/importer/bookmark_html_reader.h"
      6 
      7 #include "base/bind.h"
      8 #include "base/bind_helpers.h"
      9 #include "base/callback.h"
     10 #include "base/files/file_path.h"
     11 #include "base/path_service.h"
     12 #include "base/strings/string16.h"
     13 #include "base/strings/string_util.h"
     14 #include "base/strings/utf_string_conversions.h"
     15 #include "chrome/common/chrome_paths.h"
     16 #include "chrome/common/importer/imported_bookmark_entry.h"
     17 #include "testing/gtest/include/gtest/gtest.h"
     18 
     19 using base::ASCIIToUTF16;
     20 using base::UTF16ToWide;
     21 
     22 namespace bookmark_html_reader {
     23 
     24 TEST(BookmarkHTMLReaderTest, ParseTests) {
     25   bool result;
     26 
     27   // Tests charset.
     28   std::string charset;
     29   result = internal::ParseCharsetFromLine(
     30       "<META HTTP-EQUIV=\"Content-Type\" "
     31       "CONTENT=\"text/html; charset=UTF-8\">",
     32       &charset);
     33   EXPECT_TRUE(result);
     34   EXPECT_EQ("UTF-8", charset);
     35 
     36   // Escaped characters in name.
     37   base::string16 folder_name;
     38   bool is_toolbar_folder;
     39   base::Time folder_add_date;
     40   result = internal::ParseFolderNameFromLine(
     41       "<DT><H3 ADD_DATE=\"1207558707\" >&lt; &gt;"
     42       " &amp; &quot; &#39; \\ /</H3>",
     43       charset, &folder_name, &is_toolbar_folder, &folder_add_date);
     44   EXPECT_TRUE(result);
     45   EXPECT_EQ(ASCIIToUTF16("< > & \" ' \\ /"), folder_name);
     46   EXPECT_FALSE(is_toolbar_folder);
     47   EXPECT_TRUE(base::Time::FromTimeT(1207558707) == folder_add_date);
     48 
     49   // Empty name and toolbar folder attribute.
     50   result = internal::ParseFolderNameFromLine(
     51       "<DT><H3 PERSONAL_TOOLBAR_FOLDER=\"true\"></H3>",
     52       charset, &folder_name, &is_toolbar_folder, &folder_add_date);
     53   EXPECT_TRUE(result);
     54   EXPECT_EQ(base::string16(), folder_name);
     55   EXPECT_TRUE(is_toolbar_folder);
     56 
     57   // Unicode characters in title and shortcut.
     58   base::string16 title;
     59   GURL url, favicon;
     60   base::string16 shortcut;
     61   base::string16 post_data;
     62   base::Time add_date;
     63   result = internal::ParseBookmarkFromLine(
     64       "<DT><A HREF=\"http://chinese.site.cn/path?query=1#ref\" "
     65       "SHORTCUTURL=\"\xE4\xB8\xAD\">\xE4\xB8\xAD\xE6\x96\x87</A>",
     66       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
     67   EXPECT_TRUE(result);
     68   EXPECT_EQ(L"\x4E2D\x6587", UTF16ToWide(title));
     69   EXPECT_EQ("http://chinese.site.cn/path?query=1#ref", url.spec());
     70   EXPECT_EQ(L"\x4E2D", UTF16ToWide(shortcut));
     71   EXPECT_EQ(base::string16(), post_data);
     72   EXPECT_TRUE(base::Time() == add_date);
     73 
     74   // No shortcut, and url contains %22 ('"' character).
     75   result = internal::ParseBookmarkFromLine(
     76       "<DT><A HREF=\"http://domain.com/?q=%22<>%22\">name</A>",
     77       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
     78   EXPECT_TRUE(result);
     79   EXPECT_EQ(ASCIIToUTF16("name"), title);
     80   EXPECT_EQ("http://domain.com/?q=%22%3C%3E%22", url.spec());
     81   EXPECT_EQ(base::string16(), shortcut);
     82   EXPECT_EQ(base::string16(), post_data);
     83   EXPECT_TRUE(base::Time() == add_date);
     84 
     85   result = internal::ParseBookmarkFromLine(
     86       "<DT><A HREF=\"http://domain.com/?g=";\"\">name</A>",
     87       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
     88   EXPECT_TRUE(result);
     89   EXPECT_EQ(ASCIIToUTF16("name"), title);
     90   EXPECT_EQ("http://domain.com/?g=%22", url.spec());
     91   EXPECT_EQ(base::string16(), shortcut);
     92   EXPECT_EQ(base::string16(), post_data);
     93   EXPECT_TRUE(base::Time() == add_date);
     94 
     95   // Creation date.
     96   result = internal::ParseBookmarkFromLine(
     97       "<DT><A HREF=\"http://site/\" ADD_DATE=\"1121301154\">name</A>",
     98       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
     99   EXPECT_TRUE(result);
    100   EXPECT_EQ(ASCIIToUTF16("name"), title);
    101   EXPECT_EQ(GURL("http://site/"), url);
    102   EXPECT_EQ(base::string16(), shortcut);
    103   EXPECT_EQ(base::string16(), post_data);
    104   EXPECT_TRUE(base::Time::FromTimeT(1121301154) == add_date);
    105 
    106   // Post-data
    107   result = internal::ParseBookmarkFromLine(
    108       "<DT><A HREF=\"http://localhost:8080/test/hello.html\" ADD_DATE=\""
    109       "1212447159\" LAST_VISIT=\"1212447251\" LAST_MODIFIED=\"1212447248\""
    110       "SHORTCUTURL=\"post\" ICON=\"data:\" POST_DATA=\"lname%3D%25s\""
    111       "LAST_CHARSET=\"UTF-8\" ID=\"rdf:#$weKaR3\">Test Post keyword</A>",
    112       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
    113   EXPECT_TRUE(result);
    114   EXPECT_EQ(ASCIIToUTF16("Test Post keyword"), title);
    115   EXPECT_EQ("http://localhost:8080/test/hello.html", url.spec());
    116   EXPECT_EQ(ASCIIToUTF16("post"), shortcut);
    117   EXPECT_EQ(ASCIIToUTF16("lname%3D%25s"), post_data);
    118   EXPECT_TRUE(base::Time::FromTimeT(1212447159) == add_date);
    119 
    120   // Invalid case.
    121   result = internal::ParseBookmarkFromLine(
    122       "<DT><A HREF=\"http://domain.com/?q=%22",
    123       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
    124   EXPECT_FALSE(result);
    125   EXPECT_EQ(base::string16(), title);
    126   EXPECT_EQ("", url.spec());
    127   EXPECT_EQ(base::string16(), shortcut);
    128   EXPECT_EQ(base::string16(), post_data);
    129   EXPECT_TRUE(base::Time() == add_date);
    130 
    131   // Epiphany format.
    132   result = internal::ParseMinimumBookmarkFromLine(
    133       "<dt><a href=\"http://www.google.com/\">Google</a></dt>",
    134       charset, &title, &url);
    135   EXPECT_TRUE(result);
    136   EXPECT_EQ(ASCIIToUTF16("Google"), title);
    137   EXPECT_EQ("http://www.google.com/", url.spec());
    138 }
    139 
    140 namespace {
    141 
    142 class BookmarkHTMLReaderTestWithData : public testing::Test {
    143  public:
    144   virtual void SetUp() OVERRIDE;
    145 
    146  protected:
    147   void ExpectFirstFirefox2Bookmark(const ImportedBookmarkEntry& entry);
    148   void ExpectSecondFirefox2Bookmark(const ImportedBookmarkEntry& entry);
    149   void ExpectThirdFirefox2Bookmark(const ImportedBookmarkEntry& entry);
    150   void ExpectFirstEpiphanyBookmark(const ImportedBookmarkEntry& entry);
    151   void ExpectSecondEpiphanyBookmark(const ImportedBookmarkEntry& entry);
    152   void ExpectFirstFirefox23Bookmark(const ImportedBookmarkEntry& entry);
    153   void ExpectSecondFirefox23Bookmark(const ImportedBookmarkEntry& entry);
    154   void ExpectThirdFirefox23Bookmark(const ImportedBookmarkEntry& entry);
    155 
    156   base::FilePath test_data_path_;
    157 };
    158 
    159 void BookmarkHTMLReaderTestWithData::SetUp() {
    160   ASSERT_TRUE(PathService::Get(chrome::DIR_TEST_DATA, &test_data_path_));
    161   test_data_path_ = test_data_path_.AppendASCII("bookmark_html_reader");
    162 }
    163 
    164 void BookmarkHTMLReaderTestWithData::ExpectFirstFirefox2Bookmark(
    165     const ImportedBookmarkEntry& entry) {
    166   EXPECT_EQ(ASCIIToUTF16("Empty"), entry.title);
    167   EXPECT_TRUE(entry.is_folder);
    168   EXPECT_EQ(base::Time::FromTimeT(1295938143), entry.creation_time);
    169   EXPECT_EQ(1U, entry.path.size());
    170   if (entry.path.size() == 1)
    171     EXPECT_EQ(ASCIIToUTF16("Empty's Parent"), entry.path.front());
    172 }
    173 
    174 void BookmarkHTMLReaderTestWithData::ExpectSecondFirefox2Bookmark(
    175     const ImportedBookmarkEntry& entry) {
    176   EXPECT_EQ(ASCIIToUTF16("[Tamura Yukari.com]"), entry.title);
    177   EXPECT_FALSE(entry.is_folder);
    178   EXPECT_EQ(base::Time::FromTimeT(1234567890), entry.creation_time);
    179   EXPECT_EQ(1U, entry.path.size());
    180   if (entry.path.size() == 1)
    181     EXPECT_EQ(ASCIIToUTF16("Not Empty"), entry.path.front());
    182   EXPECT_EQ("http://www.tamurayukari.com/", entry.url.spec());
    183 }
    184 
    185 void BookmarkHTMLReaderTestWithData::ExpectThirdFirefox2Bookmark(
    186     const ImportedBookmarkEntry& entry) {
    187   EXPECT_EQ(ASCIIToUTF16("Google"), entry.title);
    188   EXPECT_FALSE(entry.is_folder);
    189   EXPECT_EQ(base::Time::FromTimeT(0000000000), entry.creation_time);
    190   EXPECT_EQ(1U, entry.path.size());
    191   if (entry.path.size() == 1)
    192     EXPECT_EQ(ASCIIToUTF16("Not Empty But Default"), entry.path.front());
    193   EXPECT_EQ("http://www.google.com/", entry.url.spec());
    194 }
    195 
    196 void BookmarkHTMLReaderTestWithData::ExpectFirstEpiphanyBookmark(
    197     const ImportedBookmarkEntry& entry) {
    198   EXPECT_EQ(ASCIIToUTF16("[Tamura Yukari.com]"), entry.title);
    199   EXPECT_EQ("http://www.tamurayukari.com/", entry.url.spec());
    200   EXPECT_EQ(0U, entry.path.size());
    201 }
    202 
    203 void BookmarkHTMLReaderTestWithData::ExpectSecondEpiphanyBookmark(
    204     const ImportedBookmarkEntry& entry) {
    205   EXPECT_EQ(ASCIIToUTF16("Google"), entry.title);
    206   EXPECT_EQ("http://www.google.com/", entry.url.spec());
    207   EXPECT_EQ(0U, entry.path.size());
    208 }
    209 
    210 void BookmarkHTMLReaderTestWithData::ExpectFirstFirefox23Bookmark(
    211     const ImportedBookmarkEntry& entry) {
    212   EXPECT_EQ(ASCIIToUTF16("Google"), entry.title);
    213   EXPECT_FALSE(entry.is_folder);
    214   EXPECT_EQ(base::Time::FromTimeT(1376102167), entry.creation_time);
    215   EXPECT_EQ(0U, entry.path.size());
    216   EXPECT_EQ("https://www.google.com/", entry.url.spec());
    217 }
    218 
    219 void BookmarkHTMLReaderTestWithData::ExpectSecondFirefox23Bookmark(
    220     const ImportedBookmarkEntry& entry) {
    221   EXPECT_EQ(ASCIIToUTF16("Issues"), entry.title);
    222   EXPECT_FALSE(entry.is_folder);
    223   EXPECT_EQ(base::Time::FromTimeT(1376102304), entry.creation_time);
    224   EXPECT_EQ(1U, entry.path.size());
    225   EXPECT_EQ(ASCIIToUTF16("Chromium"), entry.path.front());
    226   EXPECT_EQ("https://code.google.com/p/chromium/issues/list", entry.url.spec());
    227 }
    228 
    229 void BookmarkHTMLReaderTestWithData::ExpectThirdFirefox23Bookmark(
    230     const ImportedBookmarkEntry& entry) {
    231   EXPECT_EQ(ASCIIToUTF16("CodeSearch"), entry.title);
    232   EXPECT_FALSE(entry.is_folder);
    233   EXPECT_EQ(base::Time::FromTimeT(1376102224), entry.creation_time);
    234   EXPECT_EQ(1U, entry.path.size());
    235   EXPECT_EQ(ASCIIToUTF16("Chromium"), entry.path.front());
    236   EXPECT_EQ("http://code.google.com/p/chromium/codesearch", entry.url.spec());
    237 }
    238 
    239 }  // namespace
    240 
    241 TEST_F(BookmarkHTMLReaderTestWithData, Firefox2BookmarkFileImport) {
    242   base::FilePath path = test_data_path_.AppendASCII("firefox2.html");
    243 
    244   std::vector<ImportedBookmarkEntry> bookmarks;
    245   ImportBookmarksFile(base::Callback<bool(void)>(),
    246                       base::Callback<bool(const GURL&)>(),
    247                       path, &bookmarks, NULL);
    248 
    249   ASSERT_EQ(3U, bookmarks.size());
    250   ExpectFirstFirefox2Bookmark(bookmarks[0]);
    251   ExpectSecondFirefox2Bookmark(bookmarks[1]);
    252   ExpectThirdFirefox2Bookmark(bookmarks[2]);
    253 }
    254 
    255 TEST_F(BookmarkHTMLReaderTestWithData, BookmarkFileWithHrTagImport) {
    256   base::FilePath path = test_data_path_.AppendASCII("firefox23.html");
    257 
    258   std::vector<ImportedBookmarkEntry> bookmarks;
    259   ImportBookmarksFile(base::Callback<bool(void)>(),
    260                       base::Callback<bool(const GURL&)>(),
    261                       path, &bookmarks, NULL);
    262 
    263   ASSERT_EQ(3U, bookmarks.size());
    264   ExpectFirstFirefox23Bookmark(bookmarks[0]);
    265   ExpectSecondFirefox23Bookmark(bookmarks[1]);
    266   ExpectThirdFirefox23Bookmark(bookmarks[2]);
    267 }
    268 
    269 TEST_F(BookmarkHTMLReaderTestWithData, EpiphanyBookmarkFileImport) {
    270   base::FilePath path = test_data_path_.AppendASCII("epiphany.html");
    271 
    272   std::vector<ImportedBookmarkEntry> bookmarks;
    273   ImportBookmarksFile(base::Callback<bool(void)>(),
    274                       base::Callback<bool(const GURL&)>(),
    275                       path, &bookmarks, NULL);
    276 
    277   ASSERT_EQ(2U, bookmarks.size());
    278   ExpectFirstEpiphanyBookmark(bookmarks[0]);
    279   ExpectSecondEpiphanyBookmark(bookmarks[1]);
    280 }
    281 
    282 namespace {
    283 
    284 class CancelAfterFifteenCalls {
    285   int count;
    286  public:
    287   CancelAfterFifteenCalls() : count(0) { }
    288   bool ShouldCancel() {
    289     return ++count > 16;
    290   }
    291 };
    292 
    293 }  // namespace
    294 
    295 TEST_F(BookmarkHTMLReaderTestWithData, CancellationCallback) {
    296   // Use a file for testing that has multiple bookmarks.
    297   base::FilePath path = test_data_path_.AppendASCII("firefox2.html");
    298 
    299   std::vector<ImportedBookmarkEntry> bookmarks;
    300   CancelAfterFifteenCalls cancel_fifteen;
    301   ImportBookmarksFile(base::Bind(&CancelAfterFifteenCalls::ShouldCancel,
    302                                  base::Unretained(&cancel_fifteen)),
    303                       base::Callback<bool(const GURL&)>(),
    304                       path, &bookmarks, NULL);
    305 
    306   // The cancellation callback is checked before each line is read, so fifteen
    307   // lines are imported. The first fifteen lines of firefox2.html include only
    308   // one bookmark.
    309   ASSERT_EQ(1U, bookmarks.size());
    310   ExpectFirstFirefox2Bookmark(bookmarks[0]);
    311 }
    312 
    313 namespace {
    314 
    315 bool IsURLValid(const GURL& url) {
    316   // No offense to whomever owns this domain...
    317   return !url.DomainIs("tamurayukari.com");
    318 }
    319 
    320 }  // namespace
    321 
    322 TEST_F(BookmarkHTMLReaderTestWithData, ValidURLCallback) {
    323   // Use a file for testing that has multiple bookmarks.
    324   base::FilePath path = test_data_path_.AppendASCII("firefox2.html");
    325 
    326   std::vector<ImportedBookmarkEntry> bookmarks;
    327   ImportBookmarksFile(base::Callback<bool(void)>(),
    328                       base::Bind(&IsURLValid),
    329                       path, &bookmarks, NULL);
    330 
    331   ASSERT_EQ(2U, bookmarks.size());
    332   ExpectFirstFirefox2Bookmark(bookmarks[0]);
    333   ExpectThirdFirefox2Bookmark(bookmarks[1]);
    334 }
    335 
    336 }  // namespace bookmark_html_reader
    337