1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/bind.h" 6 #include "base/file_util.h" 7 #include "base/files/scoped_temp_dir.h" 8 #include "base/prefs/pref_service.h" 9 #include "chrome/browser/character_encoding.h" 10 #include "chrome/browser/net/url_request_mock_util.h" 11 #include "chrome/browser/profiles/profile.h" 12 #include "chrome/browser/ui/browser.h" 13 #include "chrome/browser/ui/browser_commands.h" 14 #include "chrome/browser/ui/tabs/tab_strip_model.h" 15 #include "chrome/common/pref_names.h" 16 #include "chrome/test/base/in_process_browser_test.h" 17 #include "chrome/test/base/ui_test_utils.h" 18 #include "content/public/browser/browser_thread.h" 19 #include "content/public/browser/download_manager.h" 20 #include "content/public/browser/navigation_controller.h" 21 #include "content/public/browser/notification_service.h" 22 #include "content/public/browser/notification_source.h" 23 #include "content/public/browser/notification_types.h" 24 #include "content/public/browser/web_contents.h" 25 #include "content/public/test/test_navigation_observer.h" 26 #include "content/test/net/url_request_mock_http_job.h" 27 28 namespace { 29 30 struct EncodingTestData { 31 const char* file_name; 32 const char* encoding_name; 33 }; 34 35 const EncodingTestData kEncodingTestDatas[] = { 36 { "Big5.html", "Big5" }, 37 { "EUC-JP.html", "EUC-JP" }, 38 { "gb18030.html", "gb18030" }, 39 { "iso-8859-1.html", "ISO-8859-1" }, 40 { "ISO-8859-2.html", "ISO-8859-2" }, 41 { "ISO-8859-4.html", "ISO-8859-4" }, 42 { "ISO-8859-5.html", "ISO-8859-5" }, 43 { "ISO-8859-6.html", "ISO-8859-6" }, 44 { "ISO-8859-7.html", "ISO-8859-7" }, 45 { "ISO-8859-8.html", "ISO-8859-8" }, 46 { "ISO-8859-13.html", "ISO-8859-13" }, 47 { "ISO-8859-15.html", "ISO-8859-15" }, 48 { "KOI8-R.html", "KOI8-R" }, 49 { "KOI8-U.html", "KOI8-U" }, 50 { "macintosh.html", "macintosh" }, 51 { "Shift-JIS.html", "Shift_JIS" }, 52 { "US-ASCII.html", "ISO-8859-1" }, // http://crbug.com/15801 53 { "UTF-8.html", "UTF-8" }, 54 { "UTF-16LE.html", "UTF-16LE" }, 55 { "windows-874.html", "windows-874" }, 56 // http://crbug.com/95963 57 // { "windows-949.html", "windows-949" }, 58 { "windows-1250.html", "windows-1250" }, 59 { "windows-1251.html", "windows-1251" }, 60 { "windows-1252.html", "windows-1252" }, 61 { "windows-1253.html", "windows-1253" }, 62 { "windows-1254.html", "windows-1254" }, 63 { "windows-1255.html", "windows-1255" }, 64 { "windows-1256.html", "windows-1256" }, 65 { "windows-1257.html", "windows-1257" }, 66 { "windows-1258.html", "windows-1258" } 67 }; 68 69 class SavePackageFinishedObserver : public content::DownloadManager::Observer { 70 public: 71 SavePackageFinishedObserver(content::DownloadManager* manager, 72 const base::Closure& callback) 73 : download_manager_(manager), 74 callback_(callback) { 75 download_manager_->AddObserver(this); 76 } 77 78 virtual ~SavePackageFinishedObserver() { 79 if (download_manager_) 80 download_manager_->RemoveObserver(this); 81 } 82 83 // DownloadManager::Observer: 84 virtual void OnSavePackageSuccessfullyFinished( 85 content::DownloadManager* manager, content::DownloadItem* item) OVERRIDE { 86 callback_.Run(); 87 } 88 virtual void ManagerGoingDown(content::DownloadManager* manager) OVERRIDE { 89 download_manager_->RemoveObserver(this); 90 download_manager_ = NULL; 91 } 92 93 private: 94 content::DownloadManager* download_manager_; 95 base::Closure callback_; 96 97 DISALLOW_COPY_AND_ASSIGN(SavePackageFinishedObserver); 98 }; 99 100 } // namespace 101 102 using content::BrowserThread; 103 104 static const base::FilePath::CharType* kTestDir = 105 FILE_PATH_LITERAL("encoding_tests"); 106 107 class BrowserEncodingTest 108 : public InProcessBrowserTest, 109 public testing::WithParamInterface<EncodingTestData> { 110 protected: 111 BrowserEncodingTest() {} 112 113 // Saves the current page and verifies that the output matches the expected 114 // result. 115 void SaveAndCompare(const char* filename_to_write, 116 const base::FilePath& expected) { 117 // Dump the page, the content of dump page should be identical to the 118 // expected result file. 119 base::FilePath full_file_name = save_dir_.AppendASCII(filename_to_write); 120 // We save the page as way of complete HTML file, which requires a directory 121 // name to save sub resources in it. Although this test file does not have 122 // sub resources, but the directory name is still required. 123 scoped_refptr<content::MessageLoopRunner> loop_runner( 124 new content::MessageLoopRunner); 125 SavePackageFinishedObserver observer( 126 content::BrowserContext::GetDownloadManager(browser()->profile()), 127 loop_runner->QuitClosure()); 128 browser()->tab_strip_model()->GetActiveWebContents()->SavePage( 129 full_file_name, temp_sub_resource_dir_, 130 content::SAVE_PAGE_TYPE_AS_COMPLETE_HTML); 131 loop_runner->Run(); 132 133 base::FilePath expected_file_name = ui_test_utils::GetTestFilePath( 134 base::FilePath(kTestDir), expected); 135 136 EXPECT_TRUE(base::ContentsEqual(full_file_name, expected_file_name)); 137 } 138 139 virtual void SetUpOnMainThread() OVERRIDE { 140 ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); 141 save_dir_ = temp_dir_.path(); 142 temp_sub_resource_dir_ = save_dir_.AppendASCII("sub_resource_files"); 143 144 BrowserThread::PostTask( 145 BrowserThread::IO, FROM_HERE, 146 base::Bind(&chrome_browser_net::SetUrlRequestMocksEnabled, true)); 147 } 148 149 base::ScopedTempDir temp_dir_; 150 base::FilePath save_dir_; 151 base::FilePath temp_sub_resource_dir_; 152 }; 153 154 // TODO(jnd): 1. Some encodings are missing here. It'll be added later. See 155 // http://crbug.com/13306. 156 // 2. Add more files with multiple encoding name variants for each canonical 157 // encoding name). Webkit layout tests cover some, but testing in the UI test is 158 // also necessary. 159 IN_PROC_BROWSER_TEST_P(BrowserEncodingTest, TestEncodingAliasMapping) { 160 const char* const kAliasTestDir = "alias_mapping"; 161 162 base::FilePath test_dir_path = base::FilePath(kTestDir).AppendASCII( 163 kAliasTestDir); 164 base::FilePath test_file_path(test_dir_path); 165 test_file_path = test_file_path.AppendASCII( 166 GetParam().file_name); 167 168 GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_file_path); 169 ui_test_utils::NavigateToURL(browser(), url); 170 EXPECT_EQ(GetParam().encoding_name, 171 browser()->tab_strip_model()->GetActiveWebContents()-> 172 GetEncoding()); 173 } 174 175 INSTANTIATE_TEST_CASE_P(EncodingAliases, 176 BrowserEncodingTest, 177 testing::ValuesIn(kEncodingTestDatas)); 178 179 // Marked as flaky: see http://crbug.com/44668 180 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, TestOverrideEncoding) { 181 const char* const kTestFileName = "gb18030_with_iso88591_meta.html"; 182 const char* const kExpectedFileName = 183 "expected_gb18030_saved_from_iso88591_meta.html"; 184 const char* const kOverrideTestDir = "user_override"; 185 186 base::FilePath test_dir_path = 187 base::FilePath(kTestDir).AppendASCII(kOverrideTestDir); 188 test_dir_path = test_dir_path.AppendASCII(kTestFileName); 189 GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_dir_path); 190 ui_test_utils::NavigateToURL(browser(), url); 191 content::WebContents* web_contents = 192 browser()->tab_strip_model()->GetActiveWebContents(); 193 EXPECT_EQ("ISO-8859-1", web_contents->GetEncoding()); 194 195 // Override the encoding to "gb18030". 196 const std::string selected_encoding = 197 CharacterEncoding::GetCanonicalEncodingNameByAliasName("gb18030"); 198 content::TestNavigationObserver navigation_observer(web_contents); 199 web_contents->SetOverrideEncoding(selected_encoding); 200 navigation_observer.Wait(); 201 EXPECT_EQ("gb18030", web_contents->GetEncoding()); 202 203 base::FilePath expected_filename = 204 base::FilePath().AppendASCII(kOverrideTestDir).AppendASCII( 205 kExpectedFileName); 206 SaveAndCompare(kTestFileName, expected_filename); 207 } 208 209 // The following encodings are excluded from the auto-detection test because 210 // it's a known issue that the current encoding detector does not detect them: 211 // ISO-8859-4 212 // ISO-8859-13 213 // KOI8-U 214 // macintosh 215 // windows-874 216 // windows-1252 217 // windows-1253 218 // windows-1257 219 // windows-1258 220 221 // For Hebrew, the expected encoding value is ISO-8859-8-I. See 222 // http://crbug.com/2927 for more details. 223 // 224 // This test fails frequently on the win_rel trybot. See http://crbug.com/122053 225 #if defined(OS_WIN) || defined(OS_MACOSX) 226 #define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect 227 #else 228 #define MAYBE_TestEncodingAutoDetect TestEncodingAutoDetect 229 #endif 230 // TODO(phajdan.jr): See if fix for http://crbug.com/122053 would help here. 231 IN_PROC_BROWSER_TEST_F(BrowserEncodingTest, MAYBE_TestEncodingAutoDetect) { 232 struct EncodingAutoDetectTestData { 233 const char* test_file_name; // File name of test data. 234 const char* expected_result; // File name of expected results. 235 const char* expected_encoding; // expected encoding. 236 }; 237 const EncodingAutoDetectTestData kTestDatas[] = { 238 { "Big5_with_no_encoding_specified.html", 239 "expected_Big5_saved_from_no_encoding_specified.html", 240 "Big5" }, 241 { "gb18030_with_no_encoding_specified.html", 242 "expected_gb18030_saved_from_no_encoding_specified.html", 243 "gb18030" }, 244 { "iso-8859-1_with_no_encoding_specified.html", 245 "expected_iso-8859-1_saved_from_no_encoding_specified.html", 246 "ISO-8859-1" }, 247 { "ISO-8859-5_with_no_encoding_specified.html", 248 "expected_ISO-8859-5_saved_from_no_encoding_specified.html", 249 "ISO-8859-5" }, 250 { "ISO-8859-6_with_no_encoding_specified.html", 251 "expected_ISO-8859-6_saved_from_no_encoding_specified.html", 252 "ISO-8859-6" }, 253 { "ISO-8859-7_with_no_encoding_specified.html", 254 "expected_ISO-8859-7_saved_from_no_encoding_specified.html", 255 "ISO-8859-7" }, 256 { "ISO-8859-8_with_no_encoding_specified.html", 257 "expected_ISO-8859-8_saved_from_no_encoding_specified.html", 258 "ISO-8859-8-I" }, 259 { "KOI8-R_with_no_encoding_specified.html", 260 "expected_KOI8-R_saved_from_no_encoding_specified.html", 261 "KOI8-R" }, 262 { "Shift-JIS_with_no_encoding_specified.html", 263 "expected_Shift-JIS_saved_from_no_encoding_specified.html", 264 "Shift_JIS" }, 265 { "UTF-8_with_no_encoding_specified.html", 266 "expected_UTF-8_saved_from_no_encoding_specified.html", 267 "UTF-8" }, 268 { "windows-949_with_no_encoding_specified.html", 269 "expected_windows-949_saved_from_no_encoding_specified.html", 270 "windows-949-2000" }, 271 { "windows-1251_with_no_encoding_specified.html", 272 "expected_windows-1251_saved_from_no_encoding_specified.html", 273 "windows-1251" }, 274 { "windows-1254_with_no_encoding_specified.html", 275 "expected_windows-1254_saved_from_no_encoding_specified.html", 276 "windows-1254" }, 277 { "windows-1255_with_no_encoding_specified.html", 278 "expected_windows-1255_saved_from_no_encoding_specified.html", 279 "windows-1255" }, 280 { "windows-1256_with_no_encoding_specified.html", 281 "expected_windows-1256_saved_from_no_encoding_specified.html", 282 "windows-1256" } 283 }; 284 const char* const kAutoDetectDir = "auto_detect"; 285 // Directory of the files of expected results. 286 const char* const kExpectedResultDir = "expected_results"; 287 288 base::FilePath test_dir_path = 289 base::FilePath(kTestDir).AppendASCII(kAutoDetectDir); 290 291 // Set the default charset to one of encodings not supported by the current 292 // auto-detector (Please refer to the above comments) to make sure we 293 // incorrectly decode the page. Now we use ISO-8859-4. 294 browser()->profile()->GetPrefs()->SetString(prefs::kDefaultCharset, 295 "ISO-8859-4"); 296 297 content::WebContents* web_contents = 298 browser()->tab_strip_model()->GetActiveWebContents(); 299 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestDatas); ++i) { 300 // Disable auto detect if it is on. 301 browser()->profile()->GetPrefs()->SetBoolean( 302 prefs::kWebKitUsesUniversalDetector, false); 303 304 base::FilePath test_file_path(test_dir_path); 305 test_file_path = test_file_path.AppendASCII(kTestDatas[i].test_file_name); 306 GURL url = content::URLRequestMockHTTPJob::GetMockUrl(test_file_path); 307 ui_test_utils::NavigateToURL(browser(), url); 308 309 // Get the encoding used for the page, it must be the default charset we 310 // just set. 311 EXPECT_EQ("ISO-8859-4", web_contents->GetEncoding()); 312 313 // Enable the encoding auto detection. 314 browser()->profile()->GetPrefs()->SetBoolean( 315 prefs::kWebKitUsesUniversalDetector, true); 316 317 content::TestNavigationObserver observer(web_contents); 318 chrome::Reload(browser(), CURRENT_TAB); 319 observer.Wait(); 320 321 // Re-get the encoding of page. It should return the real encoding now. 322 EXPECT_EQ(kTestDatas[i].expected_encoding, web_contents->GetEncoding()); 323 324 // Dump the page, the content of dump page should be equal with our expect 325 // result file. 326 base::FilePath expected_result_file_name = 327 base::FilePath().AppendASCII(kAutoDetectDir). 328 AppendASCII(kExpectedResultDir). 329 AppendASCII(kTestDatas[i].expected_result); 330 SaveAndCompare(kTestDatas[i].test_file_name, expected_result_file_name); 331 } 332 } 333