Home | History | Annotate | Download | only in browser
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/bind.h"
      6 #include "base/files/file_util.h"
      7 #include "base/files/scoped_temp_dir.h"
      8 #include "base/prefs/pref_service.h"
      9 #include "chrome/browser/character_encoding.h"
     10 #include "chrome/browser/net/url_request_mock_util.h"
     11 #include "chrome/browser/profiles/profile.h"
     12 #include "chrome/browser/ui/browser.h"
     13 #include "chrome/browser/ui/browser_commands.h"
     14 #include "chrome/browser/ui/tabs/tab_strip_model.h"
     15 #include "chrome/common/pref_names.h"
     16 #include "chrome/test/base/in_process_browser_test.h"
     17 #include "chrome/test/base/ui_test_utils.h"
     18 #include "content/public/browser/browser_thread.h"
     19 #include "content/public/browser/download_manager.h"
     20 #include "content/public/browser/navigation_controller.h"
     21 #include "content/public/browser/notification_service.h"
     22 #include "content/public/browser/notification_source.h"
     23 #include "content/public/browser/notification_types.h"
     24 #include "content/public/browser/web_contents.h"
     25 #include "content/public/test/test_navigation_observer.h"
     26 #include "net/test/url_request/url_request_mock_http_job.h"
     27 
     28 namespace {
     29 
     30 struct EncodingTestData {
     31   const char* file_name;
     32   const char* encoding_name;
     33 };
     34 
     35 const EncodingTestData kEncodingTestDatas[] = {
     36   { "Big5.html", "Big5" },
     37   { "EUC-JP.html", "EUC-JP" },
     38   { "gb18030.html", "gb18030" },
     39   { "iso-8859-1.html", "ISO-8859-1" },
     40   { "ISO-8859-2.html", "ISO-8859-2" },
     41   { "ISO-8859-4.html", "ISO-8859-4" },
     42   { "ISO-8859-5.html", "ISO-8859-5" },
     43   { "ISO-8859-6.html", "ISO-8859-6" },
     44   { "ISO-8859-7.html", "ISO-8859-7" },
     45   { "ISO-8859-8.html", "ISO-8859-8" },
     46   { "ISO-8859-13.html", "ISO-8859-13" },
     47   { "ISO-8859-15.html", "ISO-8859-15" },
     48   { "KOI8-R.html", "KOI8-R" },
     49   { "KOI8-U.html", "KOI8-U" },
     50   { "macintosh.html", "macintosh" },
     51   { "Shift-JIS.html", "Shift_JIS" },
     52   { "US-ASCII.html", "ISO-8859-1" },  // http://crbug.com/15801
     53   { "UTF-8.html", "UTF-8" },
     54   { "UTF-16LE.html", "UTF-16LE" },
     55   { "windows-874.html", "windows-874" },
     56   { "EUC-KR.html", "EUC-KR" },
     57   { "windows-1250.html", "windows-1250" },
     58   { "windows-1251.html", "windows-1251" },
     59   { "windows-1252.html", "windows-1252" },
     60   { "windows-1253.html", "windows-1253" },
     61   { "windows-1254.html", "windows-1254" },
     62   { "windows-1255.html", "windows-1255" },
     63   { "windows-1256.html", "windows-1256" },
     64   { "windows-1257.html", "windows-1257" },
     65   { "windows-1258.html", "windows-1258" }
     66 };
     67 
     68 class SavePackageFinishedObserver : public content::DownloadManager::Observer {
     69  public:
     70   SavePackageFinishedObserver(content::DownloadManager* manager,
     71                               const base::Closure& callback)
     72       : download_manager_(manager),
     73         callback_(callback) {
     74     download_manager_->AddObserver(this);
     75   }
     76 
     77   virtual ~SavePackageFinishedObserver() {
     78     if (download_manager_)
     79       download_manager_->RemoveObserver(this);
     80   }
     81 
     82   // DownloadManager::Observer:
     83   virtual void OnSavePackageSuccessfullyFinished(
     84       content::DownloadManager* manager, content::DownloadItem* item) OVERRIDE {
     85     callback_.Run();
     86   }
     87   virtual void ManagerGoingDown(content::DownloadManager* manager) OVERRIDE {
     88     download_manager_->RemoveObserver(this);
     89     download_manager_ = NULL;
     90   }
     91 
     92  private:
     93   content::DownloadManager* download_manager_;
     94   base::Closure callback_;
     95 
     96   DISALLOW_COPY_AND_ASSIGN(SavePackageFinishedObserver);
     97 };
     98 
     99 }  // namespace
    100 
    101 using content::BrowserThread;
    102 
    103 static const base::FilePath::CharType* kTestDir =
    104     FILE_PATH_LITERAL("encoding_tests");
    105 
    106 class BrowserEncodingTest
    107     : public InProcessBrowserTest,
    108       public testing::WithParamInterface<EncodingTestData> {
    109  protected:
    110   BrowserEncodingTest() {}
    111 
    112   // Saves the current page and verifies that the output matches the expected
    113   // result.
    114   void SaveAndCompare(const char* filename_to_write,
    115                       const base::FilePath& expected) {
    116     // Dump the page, the content of dump page should be identical to the
    117     // expected result file.
    118     base::FilePath full_file_name = save_dir_.AppendASCII(filename_to_write);
    119     // We save the page as way of complete HTML file, which requires a directory
    120     // name to save sub resources in it. Although this test file does not have
    121     // sub resources, but the directory name is still required.
    122     scoped_refptr<content::MessageLoopRunner> loop_runner(
    123         new content::MessageLoopRunner);
    124     SavePackageFinishedObserver observer(
    125         content::BrowserContext::GetDownloadManager(browser()->profile()),
    126         loop_runner->QuitClosure());
    127     browser()->tab_strip_model()->GetActiveWebContents()->SavePage(
    128         full_file_name, temp_sub_resource_dir_,
    129         content::SAVE_PAGE_TYPE_AS_COMPLETE_HTML);
    130     loop_runner->Run();
    131 
    132     base::FilePath expected_file_name = ui_test_utils::GetTestFilePath(
    133         base::FilePath(kTestDir), expected);
    134 
    135     EXPECT_TRUE(base::ContentsEqual(full_file_name, expected_file_name));
    136   }
    137 
    138   virtual void SetUpOnMainThread() OVERRIDE {
    139     ASSERT_TRUE(temp_dir_.CreateUniqueTempDir());
    140     save_dir_ = temp_dir_.path();
    141     temp_sub_resource_dir_ = save_dir_.AppendASCII("sub_resource_files");
    142 
    143     BrowserThread::PostTask(
    144         BrowserThread::IO, FROM_HERE,
    145         base::Bind(&chrome_browser_net::SetUrlRequestMocksEnabled, true));
    146   }
    147 
    148   base::ScopedTempDir temp_dir_;
    149   base::FilePath save_dir_;
    150   base::FilePath temp_sub_resource_dir_;
    151 };
    152 
    153 // TODO(jnd): 1. Some encodings are missing here. It'll be added later. See
    154 // http://crbug.com/13306.
    155 // 2. Add more files with multiple encoding name variants for each canonical
    156 // encoding name). Webkit layout tests cover some, but testing in the UI test is
    157 // also necessary.
    158 IN_PROC_BROWSER_TEST_P(BrowserEncodingTest, TestEncodingAliasMapping) {
    159   const char* const kAliasTestDir = "alias_mapping";
    160 
    161   base::FilePath test_dir_path = base::FilePath(kTestDir).AppendASCII(
    162       kAliasTestDir);
    163   base::FilePath test_file_path(test_dir_path);
    164   test_file_path = test_file_path.AppendASCII(
    165       GetParam().file_name);
    166 
    167   GURL url = net::URLRequestMockHTTPJob::GetMockUrl(test_file_path);
    168   ui_test_utils::NavigateToURL(browser(), url);
    169   EXPECT_EQ(GetParam().encoding_name,
    170             browser()->tab_strip_model()->GetActiveWebContents()->
    171                 GetEncoding());
    172 }
    173 
    174 INSTANTIATE_TEST_CASE_P(EncodingAliases,
    175                         BrowserEncodingTest,
    176                         testing::ValuesIn(kEncodingTestDatas));
    177 
    178 // Marked as flaky: see  http://crbug.com/44668
    179 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, TestOverrideEncoding) {
    180   const char* const kTestFileName = "gb18030_with_iso88591_meta.html";
    181   const char* const kExpectedFileName =
    182       "expected_gb18030_saved_from_iso88591_meta.html";
    183   const char* const kOverrideTestDir = "user_override";
    184 
    185   base::FilePath test_dir_path =
    186       base::FilePath(kTestDir).AppendASCII(kOverrideTestDir);
    187   test_dir_path = test_dir_path.AppendASCII(kTestFileName);
    188   GURL url = net::URLRequestMockHTTPJob::GetMockUrl(test_dir_path);
    189   ui_test_utils::NavigateToURL(browser(), url);
    190   content::WebContents* web_contents =
    191       browser()->tab_strip_model()->GetActiveWebContents();
    192   EXPECT_EQ("ISO-8859-1", web_contents->GetEncoding());
    193 
    194   // Override the encoding to "gb18030".
    195   const std::string selected_encoding =
    196       CharacterEncoding::GetCanonicalEncodingNameByAliasName("gb18030");
    197   content::TestNavigationObserver navigation_observer(web_contents);
    198   web_contents->SetOverrideEncoding(selected_encoding);
    199   navigation_observer.Wait();
    200   EXPECT_EQ("gb18030", web_contents->GetEncoding());
    201 
    202   base::FilePath expected_filename =
    203       base::FilePath().AppendASCII(kOverrideTestDir).AppendASCII(
    204           kExpectedFileName);
    205   SaveAndCompare(kTestFileName, expected_filename);
    206 }
    207 
    208 // The following encodings are excluded from the auto-detection test because
    209 // it's a known issue that the current encoding detector does not detect them:
    210 // ISO-8859-4
    211 // ISO-8859-13
    212 // KOI8-U
    213 // macintosh
    214 // windows-874
    215 // windows-1252
    216 // windows-1253
    217 // windows-1257
    218 // windows-1258
    219 
    220 // For Hebrew, the expected encoding value is ISO-8859-8-I. See
    221 // http://crbug.com/2927 for more details.
    222 //
    223 // This test fails frequently on the win_rel trybot. See http://crbug.com/122053
    224 // It also times out frequently on Mac dbg. See http://crbug.com/351325
    225 #if defined(OS_WIN) || defined(OS_MACOSX)
    226 #define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect
    227 #else
    228 #define MAYBE_TestEncodingAutoDetect TestEncodingAutoDetect
    229 #endif
    230 // TODO(phajdan.jr): See if fix for http://crbug.com/122053 would help here.
    231 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, MAYBE_TestEncodingAutoDetect) {
    232   struct EncodingAutoDetectTestData {
    233     const char* test_file_name;   // File name of test data.
    234     const char* expected_result;  // File name of expected results.
    235     const char* expected_encoding;   // expected encoding.
    236   };
    237   const EncodingAutoDetectTestData kTestDatas[] = {
    238       { "Big5_with_no_encoding_specified.html",
    239         "expected_Big5_saved_from_no_encoding_specified.html",
    240         "Big5" },
    241       { "gb18030_with_no_encoding_specified.html",
    242         "expected_gb18030_saved_from_no_encoding_specified.html",
    243         "gb18030" },
    244       { "iso-8859-1_with_no_encoding_specified.html",
    245         "expected_iso-8859-1_saved_from_no_encoding_specified.html",
    246         "ISO-8859-1" },
    247       { "ISO-8859-5_with_no_encoding_specified.html",
    248         "expected_ISO-8859-5_saved_from_no_encoding_specified.html",
    249         "ISO-8859-5" },
    250       { "ISO-8859-6_with_no_encoding_specified.html",
    251         "expected_ISO-8859-6_saved_from_no_encoding_specified.html",
    252         "ISO-8859-6" },
    253       { "ISO-8859-7_with_no_encoding_specified.html",
    254         "expected_ISO-8859-7_saved_from_no_encoding_specified.html",
    255         "ISO-8859-7" },
    256       { "ISO-8859-8_with_no_encoding_specified.html",
    257         "expected_ISO-8859-8_saved_from_no_encoding_specified.html",
    258         "ISO-8859-8-I" },
    259       { "KOI8-R_with_no_encoding_specified.html",
    260         "expected_KOI8-R_saved_from_no_encoding_specified.html",
    261         "KOI8-R" },
    262       { "Shift-JIS_with_no_encoding_specified.html",
    263         "expected_Shift-JIS_saved_from_no_encoding_specified.html",
    264         "Shift_JIS" },
    265       { "UTF-8_with_no_encoding_specified.html",
    266         "expected_UTF-8_saved_from_no_encoding_specified.html",
    267         "UTF-8" },
    268       { "EUC-KR_with_no_encoding_specified.html",
    269         "expected_EUC-KR_saved_from_no_encoding_specified.html",
    270         "EUC-KR" },
    271       { "windows-1251_with_no_encoding_specified.html",
    272         "expected_windows-1251_saved_from_no_encoding_specified.html",
    273         "windows-1251" },
    274       { "windows-1254_with_no_encoding_specified.html",
    275         "expected_windows-1254_saved_from_no_encoding_specified.html",
    276         "windows-1254" },
    277       { "windows-1255_with_no_encoding_specified.html",
    278         "expected_windows-1255_saved_from_no_encoding_specified.html",
    279         "windows-1255" },
    280       { "windows-1256_with_no_encoding_specified.html",
    281         "expected_windows-1256_saved_from_no_encoding_specified.html",
    282         "windows-1256" }
    283     };
    284   const char* const kAutoDetectDir = "auto_detect";
    285   // Directory of the files of expected results.
    286   const char* const kExpectedResultDir = "expected_results";
    287 
    288   base::FilePath test_dir_path =
    289       base::FilePath(kTestDir).AppendASCII(kAutoDetectDir);
    290 
    291   // Set the default charset to one of encodings not supported by the current
    292   // auto-detector (Please refer to the above comments) to make sure we
    293   // incorrectly decode the page. Now we use ISO-8859-4.
    294   browser()->profile()->GetPrefs()->SetString(prefs::kDefaultCharset,
    295                                               "ISO-8859-4");
    296 
    297   content::WebContents* web_contents =
    298       browser()->tab_strip_model()->GetActiveWebContents();
    299   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestDatas); ++i) {
    300     // Disable auto detect if it is on.
    301     browser()->profile()->GetPrefs()->SetBoolean(
    302         prefs::kWebKitUsesUniversalDetector, false);
    303 
    304     base::FilePath test_file_path(test_dir_path);
    305     test_file_path = test_file_path.AppendASCII(kTestDatas[i].test_file_name);
    306     GURL url = net::URLRequestMockHTTPJob::GetMockUrl(test_file_path);
    307     ui_test_utils::NavigateToURL(browser(), url);
    308 
    309     // Get the encoding used for the page, it must be the default charset we
    310     // just set.
    311     EXPECT_EQ("ISO-8859-4", web_contents->GetEncoding());
    312 
    313     // Enable the encoding auto detection.
    314     browser()->profile()->GetPrefs()->SetBoolean(
    315         prefs::kWebKitUsesUniversalDetector, true);
    316 
    317     content::TestNavigationObserver observer(web_contents);
    318     chrome::Reload(browser(), CURRENT_TAB);
    319     observer.Wait();
    320 
    321     // Re-get the encoding of page. It should return the real encoding now.
    322     EXPECT_EQ(kTestDatas[i].expected_encoding, web_contents->GetEncoding());
    323 
    324     // Dump the page, the content of dump page should be equal with our expect
    325     // result file.
    326     base::FilePath expected_result_file_name =
    327         base::FilePath().AppendASCII(kAutoDetectDir).
    328         AppendASCII(kExpectedResultDir).
    329         AppendASCII(kTestDatas[i].expected_result);
    330     SaveAndCompare(kTestDatas[i].test_file_name, expected_result_file_name);
    331   }
    332 }
    333