Home | History | Annotate | Download | only in importer
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "chrome/utility/importer/bookmark_html_reader.h"
      6 
      7 #include "base/bind.h"
      8 #include "base/bind_helpers.h"
      9 #include "base/callback.h"
     10 #include "base/files/file_path.h"
     11 #include "base/path_service.h"
     12 #include "base/strings/string16.h"
     13 #include "base/strings/string_util.h"
     14 #include "base/strings/utf_string_conversions.h"
     15 #include "chrome/common/chrome_paths.h"
     16 #include "chrome/common/importer/imported_bookmark_entry.h"
     17 #include "testing/gtest/include/gtest/gtest.h"
     18 
     19 namespace bookmark_html_reader {
     20 
     21 TEST(BookmarkHTMLReaderTest, ParseTests) {
     22   bool result;
     23 
     24   // Tests charset.
     25   std::string charset;
     26   result = internal::ParseCharsetFromLine(
     27       "<META HTTP-EQUIV=\"Content-Type\" "
     28       "CONTENT=\"text/html; charset=UTF-8\">",
     29       &charset);
     30   EXPECT_TRUE(result);
     31   EXPECT_EQ("UTF-8", charset);
     32 
     33   // Escaped characters in name.
     34   base::string16 folder_name;
     35   bool is_toolbar_folder;
     36   base::Time folder_add_date;
     37   result = internal::ParseFolderNameFromLine(
     38       "<DT><H3 ADD_DATE=\"1207558707\" >&lt; &gt;"
     39       " &amp; &quot; &#39; \\ /</H3>",
     40       charset, &folder_name, &is_toolbar_folder, &folder_add_date);
     41   EXPECT_TRUE(result);
     42   EXPECT_EQ(ASCIIToUTF16("< > & \" ' \\ /"), folder_name);
     43   EXPECT_FALSE(is_toolbar_folder);
     44   EXPECT_TRUE(base::Time::FromTimeT(1207558707) == folder_add_date);
     45 
     46   // Empty name and toolbar folder attribute.
     47   result = internal::ParseFolderNameFromLine(
     48       "<DT><H3 PERSONAL_TOOLBAR_FOLDER=\"true\"></H3>",
     49       charset, &folder_name, &is_toolbar_folder, &folder_add_date);
     50   EXPECT_TRUE(result);
     51   EXPECT_EQ(base::string16(), folder_name);
     52   EXPECT_TRUE(is_toolbar_folder);
     53 
     54   // Unicode characters in title and shortcut.
     55   base::string16 title;
     56   GURL url, favicon;
     57   base::string16 shortcut;
     58   base::string16 post_data;
     59   base::Time add_date;
     60   result = internal::ParseBookmarkFromLine(
     61       "<DT><A HREF=\"http://chinese.site.cn/path?query=1#ref\" "
     62       "SHORTCUTURL=\"\xE4\xB8\xAD\">\xE4\xB8\xAD\xE6\x96\x87</A>",
     63       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
     64   EXPECT_TRUE(result);
     65   EXPECT_EQ(L"\x4E2D\x6587", UTF16ToWide(title));
     66   EXPECT_EQ("http://chinese.site.cn/path?query=1#ref", url.spec());
     67   EXPECT_EQ(L"\x4E2D", UTF16ToWide(shortcut));
     68   EXPECT_EQ(base::string16(), post_data);
     69   EXPECT_TRUE(base::Time() == add_date);
     70 
     71   // No shortcut, and url contains %22 ('"' character).
     72   result = internal::ParseBookmarkFromLine(
     73       "<DT><A HREF=\"http://domain.com/?q=%22<>%22\">name</A>",
     74       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
     75   EXPECT_TRUE(result);
     76   EXPECT_EQ(ASCIIToUTF16("name"), title);
     77   EXPECT_EQ("http://domain.com/?q=%22%3C%3E%22", url.spec());
     78   EXPECT_EQ(base::string16(), shortcut);
     79   EXPECT_EQ(base::string16(), post_data);
     80   EXPECT_TRUE(base::Time() == add_date);
     81 
     82   result = internal::ParseBookmarkFromLine(
     83       "<DT><A HREF=\"http://domain.com/?g=";\"\">name</A>",
     84       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
     85   EXPECT_TRUE(result);
     86   EXPECT_EQ(ASCIIToUTF16("name"), title);
     87   EXPECT_EQ("http://domain.com/?g=%22", url.spec());
     88   EXPECT_EQ(base::string16(), shortcut);
     89   EXPECT_EQ(base::string16(), post_data);
     90   EXPECT_TRUE(base::Time() == add_date);
     91 
     92   // Creation date.
     93   result = internal::ParseBookmarkFromLine(
     94       "<DT><A HREF=\"http://site/\" ADD_DATE=\"1121301154\">name</A>",
     95       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
     96   EXPECT_TRUE(result);
     97   EXPECT_EQ(ASCIIToUTF16("name"), title);
     98   EXPECT_EQ(GURL("http://site/"), url);
     99   EXPECT_EQ(base::string16(), shortcut);
    100   EXPECT_EQ(base::string16(), post_data);
    101   EXPECT_TRUE(base::Time::FromTimeT(1121301154) == add_date);
    102 
    103   // Post-data
    104   result = internal::ParseBookmarkFromLine(
    105       "<DT><A HREF=\"http://localhost:8080/test/hello.html\" ADD_DATE=\""
    106       "1212447159\" LAST_VISIT=\"1212447251\" LAST_MODIFIED=\"1212447248\""
    107       "SHORTCUTURL=\"post\" ICON=\"data:\" POST_DATA=\"lname%3D%25s\""
    108       "LAST_CHARSET=\"UTF-8\" ID=\"rdf:#$weKaR3\">Test Post keyword</A>",
    109       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
    110   EXPECT_TRUE(result);
    111   EXPECT_EQ(ASCIIToUTF16("Test Post keyword"), title);
    112   EXPECT_EQ("http://localhost:8080/test/hello.html", url.spec());
    113   EXPECT_EQ(ASCIIToUTF16("post"), shortcut);
    114   EXPECT_EQ(ASCIIToUTF16("lname%3D%25s"), post_data);
    115   EXPECT_TRUE(base::Time::FromTimeT(1212447159) == add_date);
    116 
    117   // Invalid case.
    118   result = internal::ParseBookmarkFromLine(
    119       "<DT><A HREF=\"http://domain.com/?q=%22",
    120       charset, &title, &url, &favicon, &shortcut, &add_date, &post_data);
    121   EXPECT_FALSE(result);
    122   EXPECT_EQ(base::string16(), title);
    123   EXPECT_EQ("", url.spec());
    124   EXPECT_EQ(base::string16(), shortcut);
    125   EXPECT_EQ(base::string16(), post_data);
    126   EXPECT_TRUE(base::Time() == add_date);
    127 
    128   // Epiphany format.
    129   result = internal::ParseMinimumBookmarkFromLine(
    130       "<dt><a href=\"http://www.google.com/\">Google</a></dt>",
    131       charset, &title, &url);
    132   EXPECT_TRUE(result);
    133   EXPECT_EQ(ASCIIToUTF16("Google"), title);
    134   EXPECT_EQ("http://www.google.com/", url.spec());
    135 }
    136 
    137 namespace {
    138 
    139 void ExpectFirstFirefox2Bookmark(const ImportedBookmarkEntry& entry) {
    140   EXPECT_EQ(ASCIIToUTF16("Empty"), entry.title);
    141   EXPECT_TRUE(entry.is_folder);
    142   EXPECT_EQ(base::Time::FromTimeT(1295938143), entry.creation_time);
    143   EXPECT_EQ(1U, entry.path.size());
    144   if (entry.path.size() == 1)
    145     EXPECT_EQ(ASCIIToUTF16("Empty's Parent"), entry.path.front());
    146 }
    147 
    148 void ExpectSecondFirefox2Bookmark(const ImportedBookmarkEntry& entry) {
    149   EXPECT_EQ(ASCIIToUTF16("[Tamura Yukari.com]"), entry.title);
    150   EXPECT_FALSE(entry.is_folder);
    151   EXPECT_EQ(base::Time::FromTimeT(1234567890), entry.creation_time);
    152   EXPECT_EQ(1U, entry.path.size());
    153   if (entry.path.size() == 1)
    154     EXPECT_EQ(ASCIIToUTF16("Not Empty"), entry.path.front());
    155   EXPECT_EQ("http://www.tamurayukari.com/", entry.url.spec());
    156 }
    157 
    158 void ExpectThirdFirefox2Bookmark(const ImportedBookmarkEntry& entry) {
    159   EXPECT_EQ(ASCIIToUTF16("Google"), entry.title);
    160   EXPECT_FALSE(entry.is_folder);
    161   EXPECT_EQ(base::Time::FromTimeT(0000000000), entry.creation_time);
    162   EXPECT_EQ(1U, entry.path.size());
    163   if (entry.path.size() == 1)
    164     EXPECT_EQ(ASCIIToUTF16("Not Empty But Default"), entry.path.front());
    165   EXPECT_EQ("http://www.google.com/", entry.url.spec());
    166 }
    167 
    168 void ExpectFirstEpiphanyBookmark(const ImportedBookmarkEntry& entry) {
    169   EXPECT_EQ(ASCIIToUTF16("[Tamura Yukari.com]"), entry.title);
    170   EXPECT_EQ("http://www.tamurayukari.com/", entry.url.spec());
    171   EXPECT_EQ(0U, entry.path.size());
    172 }
    173 
    174 void ExpectSecondEpiphanyBookmark(const ImportedBookmarkEntry& entry) {
    175   EXPECT_EQ(ASCIIToUTF16("Google"), entry.title);
    176   EXPECT_EQ("http://www.google.com/", entry.url.spec());
    177   EXPECT_EQ(0U, entry.path.size());
    178 }
    179 
    180 }  // namespace
    181 
    182 TEST(BookmarkHTMLReaderTest, Firefox2BookmarkFileImport) {
    183   base::FilePath path;
    184   ASSERT_TRUE(PathService::Get(chrome::DIR_TEST_DATA, &path));
    185   path = path.AppendASCII("bookmark_html_reader");
    186   path = path.AppendASCII("firefox2.html");
    187 
    188   std::vector<ImportedBookmarkEntry> bookmarks;
    189   ImportBookmarksFile(base::Callback<bool(void)>(),
    190                       base::Callback<bool(const GURL&)>(),
    191                       path, &bookmarks, NULL);
    192 
    193   ASSERT_EQ(3U, bookmarks.size());
    194   ExpectFirstFirefox2Bookmark(bookmarks[0]);
    195   ExpectSecondFirefox2Bookmark(bookmarks[1]);
    196   ExpectThirdFirefox2Bookmark(bookmarks[2]);
    197 }
    198 
    199 TEST(BookmarkHTMLReaderTest, EpiphanyBookmarkFileImport) {
    200   base::FilePath path;
    201   ASSERT_TRUE(PathService::Get(chrome::DIR_TEST_DATA, &path));
    202   path = path.AppendASCII("bookmark_html_reader");
    203   path = path.AppendASCII("epiphany.html");
    204 
    205   std::vector<ImportedBookmarkEntry> bookmarks;
    206   ImportBookmarksFile(base::Callback<bool(void)>(),
    207                       base::Callback<bool(const GURL&)>(),
    208                       path, &bookmarks, NULL);
    209 
    210   ASSERT_EQ(2U, bookmarks.size());
    211   ExpectFirstEpiphanyBookmark(bookmarks[0]);
    212   ExpectSecondEpiphanyBookmark(bookmarks[1]);
    213 }
    214 
    215 namespace {
    216 
    217 class CancelAfterFifteenCalls {
    218   int count;
    219  public:
    220   CancelAfterFifteenCalls() : count(0) { }
    221   bool ShouldCancel() {
    222     return ++count > 16;
    223   }
    224 };
    225 
    226 }  // namespace
    227 
    228 TEST(BookmarkHTMLReaderTest, CancellationCallback) {
    229   base::FilePath path;
    230   ASSERT_TRUE(PathService::Get(chrome::DIR_TEST_DATA, &path));
    231   path = path.AppendASCII("bookmark_html_reader");
    232   // Use a file for testing that has multiple bookmarks.
    233   path = path.AppendASCII("firefox2.html");
    234 
    235   std::vector<ImportedBookmarkEntry> bookmarks;
    236   CancelAfterFifteenCalls cancel_fifteen;
    237   ImportBookmarksFile(base::Bind(&CancelAfterFifteenCalls::ShouldCancel,
    238                                  base::Unretained(&cancel_fifteen)),
    239                       base::Callback<bool(const GURL&)>(),
    240                       path, &bookmarks, NULL);
    241 
    242   // The cancellation callback is checked before each line is read, so fifteen
    243   // lines are imported. The first fifteen lines of firefox2.html include only
    244   // one bookmark.
    245   ASSERT_EQ(1U, bookmarks.size());
    246   ExpectFirstFirefox2Bookmark(bookmarks[0]);
    247 }
    248 
    249 namespace {
    250 
    251 bool IsURLValid(const GURL& url) {
    252   // No offense to whomever owns this domain...
    253   return !url.DomainIs("tamurayukari.com");
    254 }
    255 
    256 }  // namespace
    257 
    258 TEST(BookmarkHTMLReaderTest, ValidURLCallback) {
    259   base::FilePath path;
    260   ASSERT_TRUE(PathService::Get(chrome::DIR_TEST_DATA, &path));
    261   path = path.AppendASCII("bookmark_html_reader");
    262   // Use a file for testing that has multiple bookmarks.
    263   path = path.AppendASCII("firefox2.html");
    264 
    265   std::vector<ImportedBookmarkEntry> bookmarks;
    266   ImportBookmarksFile(base::Callback<bool(void)>(),
    267                       base::Bind(&IsURLValid),
    268                       path, &bookmarks, NULL);
    269 
    270   ASSERT_EQ(2U, bookmarks.size());
    271   ExpectFirstFirefox2Bookmark(bookmarks[0]);
    272   ExpectThirdFirefox2Bookmark(bookmarks[1]);
    273 }
    274 
    275 }  // namespace bookmark_html_reader
    276