1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 #include <string> 5 6 #include "base/file_util.h" 7 #include "base/memory/scoped_temp_dir.h" 8 #include "chrome/browser/net/url_request_mock_http_job.h" 9 #include "chrome/browser/download/save_package.h" 10 #include "chrome/common/pref_names.h" 11 #include "chrome/test/automation/browser_proxy.h" 12 #include "chrome/test/automation/tab_proxy.h" 13 #include "chrome/test/ui/ui_test.h" 14 #include "chrome/test/ui_test_utils.h" 15 16 static const FilePath::CharType* kTestDir = FILE_PATH_LITERAL("encoding_tests"); 17 18 class BrowserEncodingTest : public UITest { 19 protected: 20 BrowserEncodingTest() : UITest() {} 21 22 // Make sure the content of the page are as expected 23 // after override or auto-detect 24 void CheckFile(const FilePath& generated_file, 25 const FilePath& expected_result_file, 26 bool check_equal) { 27 FilePath expected_result_filepath = ui_test_utils::GetTestFilePath( 28 FilePath(kTestDir), expected_result_file); 29 30 ASSERT_TRUE(file_util::PathExists(expected_result_filepath)); 31 WaitForGeneratedFileAndCheck(generated_file, 32 expected_result_filepath, 33 true, // We do care whether they are equal. 34 check_equal, 35 true); // Delete the generated file when done. 36 } 37 38 virtual void SetUp() { 39 UITest::SetUp(); 40 ASSERT_TRUE(temp_dir_.CreateUniqueTempDir()); 41 save_dir_ = temp_dir_.path(); 42 temp_sub_resource_dir_ = save_dir_.AppendASCII("sub_resource_files"); 43 } 44 45 ScopedTempDir temp_dir_; 46 FilePath save_dir_; 47 FilePath temp_sub_resource_dir_; 48 }; 49 50 // TODO(jnd): 1. Some encodings are missing here. It'll be added later. See 51 // http://crbug.com/13306. 52 // 2. Add more files with multiple encoding name variants for each canonical 53 // encoding name). Webkit layout tests cover some, but testing in the UI test is 54 // also necessary. 55 TEST_F(BrowserEncodingTest, TestEncodingAliasMapping) { 56 struct EncodingTestData { 57 const char* file_name; 58 const char* encoding_name; 59 }; 60 61 const EncodingTestData kEncodingTestDatas[] = { 62 { "Big5.html", "Big5" }, 63 { "EUC-JP.html", "EUC-JP" }, 64 { "gb18030.html", "gb18030" }, 65 { "iso-8859-1.html", "ISO-8859-1" }, 66 { "ISO-8859-2.html", "ISO-8859-2" }, 67 { "ISO-8859-4.html", "ISO-8859-4" }, 68 { "ISO-8859-5.html", "ISO-8859-5" }, 69 { "ISO-8859-6.html", "ISO-8859-6" }, 70 { "ISO-8859-7.html", "ISO-8859-7" }, 71 { "ISO-8859-8.html", "ISO-8859-8" }, 72 { "ISO-8859-13.html", "ISO-8859-13" }, 73 { "ISO-8859-15.html", "ISO-8859-15" }, 74 { "KOI8-R.html", "KOI8-R" }, 75 { "KOI8-U.html", "KOI8-U" }, 76 { "macintosh.html", "macintosh" }, 77 { "Shift-JIS.html", "Shift_JIS" }, 78 { "US-ASCII.html", "ISO-8859-1" }, // http://crbug.com/15801 79 { "UTF-8.html", "UTF-8" }, 80 { "UTF-16LE.html", "UTF-16LE" }, 81 { "windows-874.html", "windows-874" }, 82 { "windows-949.html", "windows-949" }, 83 { "windows-1250.html", "windows-1250" }, 84 { "windows-1251.html", "windows-1251" }, 85 { "windows-1252.html", "windows-1252" }, 86 { "windows-1253.html", "windows-1253" }, 87 { "windows-1254.html", "windows-1254" }, 88 { "windows-1255.html", "windows-1255" }, 89 { "windows-1256.html", "windows-1256" }, 90 { "windows-1257.html", "windows-1257" }, 91 { "windows-1258.html", "windows-1258" } 92 }; 93 const char* const kAliasTestDir = "alias_mapping"; 94 95 scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); 96 ASSERT_TRUE(tab_proxy.get()); 97 98 FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kAliasTestDir); 99 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kEncodingTestDatas); ++i) { 100 FilePath test_file_path(test_dir_path); 101 test_file_path = test_file_path.AppendASCII( 102 kEncodingTestDatas[i].file_name); 103 104 NavigateToURL(URLRequestMockHTTPJob::GetMockUrl(test_file_path)); 105 106 std::string encoding; 107 EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding)); 108 EXPECT_EQ(encoding, kEncodingTestDatas[i].encoding_name); 109 } 110 } 111 112 // Marked as flaky: see http://crbug.com/44668 113 TEST_F(BrowserEncodingTest, FLAKY_TestOverrideEncoding) { 114 const char* const kTestFileName = "gb18030_with_iso88591_meta.html"; 115 const char* const kExpectedFileName = 116 "expected_gb18030_saved_from_iso88591_meta.html"; 117 const char* const kOverrideTestDir = "user_override"; 118 119 FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kOverrideTestDir); 120 test_dir_path = test_dir_path.AppendASCII(kTestFileName); 121 GURL url = URLRequestMockHTTPJob::GetMockUrl(test_dir_path); 122 scoped_refptr<TabProxy> tab_proxy(GetActiveTab()); 123 ASSERT_TRUE(tab_proxy.get()); 124 ASSERT_TRUE(tab_proxy->NavigateToURL(url)); 125 WaitUntilTabCount(1); 126 127 // Get the encoding declared in the page. 128 std::string encoding; 129 EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding)); 130 EXPECT_EQ(encoding, "ISO-8859-1"); 131 132 // Override the encoding to "gb18030". 133 int64 last_nav_time = 0; 134 EXPECT_TRUE(tab_proxy->GetLastNavigationTime(&last_nav_time)); 135 EXPECT_TRUE(tab_proxy->OverrideEncoding("gb18030")); 136 EXPECT_TRUE(tab_proxy->WaitForNavigation(last_nav_time)); 137 138 // Re-get the encoding of page. It should be gb18030. 139 EXPECT_TRUE(tab_proxy->GetPageCurrentEncoding(&encoding)); 140 EXPECT_EQ(encoding, "gb18030"); 141 142 // Dump the page, the content of dump page should be identical to the 143 // expected result file. 144 FilePath full_file_name = save_dir_.AppendASCII(kTestFileName); 145 // We save the page as way of complete HTML file, which requires a directory 146 // name to save sub resources in it. Although this test file does not have 147 // sub resources, but the directory name is still required. 148 EXPECT_TRUE(tab_proxy->SavePage(full_file_name, temp_sub_resource_dir_, 149 SavePackage::SAVE_AS_COMPLETE_HTML)); 150 scoped_refptr<BrowserProxy> browser(automation()->GetBrowserWindow(0)); 151 ASSERT_TRUE(browser.get()); 152 EXPECT_TRUE(WaitForDownloadShelfVisible(browser.get())); 153 FilePath expected_file_name = FilePath().AppendASCII(kOverrideTestDir); 154 expected_file_name = expected_file_name.AppendASCII(kExpectedFileName); 155 CheckFile(full_file_name, expected_file_name, true); 156 } 157 158 // The following encodings are excluded from the auto-detection test because 159 // it's a known issue that the current encoding detector does not detect them: 160 // ISO-8859-4 161 // ISO-8859-13 162 // KOI8-U 163 // macintosh 164 // windows-874 165 // windows-1252 166 // windows-1253 167 // windows-1257 168 // windows-1258 169 170 // For Hebrew, the expected encoding value is ISO-8859-8-I. See 171 // http://crbug.com/2927 for more details. 172 // FLAKY / Disabled on CrOS: see http://crbug.com/44666 173 #if defined(OS_CHROMEOS) 174 #define MAYBE_TestEncodingAutoDetect DISABLED_TestEncodingAutoDetect 175 #else 176 #define MAYBE_TestEncodingAutoDetect FLAKY_TestEncodingAutoDetect 177 #endif 178 179 TEST_F(BrowserEncodingTest, MAYBE_TestEncodingAutoDetect) { 180 struct EncodingAutoDetectTestData { 181 const char* test_file_name; // File name of test data. 182 const char* expected_result; // File name of expected results. 183 const char* expected_encoding; // expected encoding. 184 }; 185 const EncodingAutoDetectTestData kTestDatas[] = { 186 { "Big5_with_no_encoding_specified.html", 187 "expected_Big5_saved_from_no_encoding_specified.html", 188 "Big5" }, 189 { "gb18030_with_no_encoding_specified.html", 190 "expected_gb18030_saved_from_no_encoding_specified.html", 191 "gb18030" }, 192 { "iso-8859-1_with_no_encoding_specified.html", 193 "expected_iso-8859-1_saved_from_no_encoding_specified.html", 194 "ISO-8859-1" }, 195 { "ISO-8859-5_with_no_encoding_specified.html", 196 "expected_ISO-8859-5_saved_from_no_encoding_specified.html", 197 "ISO-8859-5" }, 198 { "ISO-8859-6_with_no_encoding_specified.html", 199 "expected_ISO-8859-6_saved_from_no_encoding_specified.html", 200 "ISO-8859-6" }, 201 { "ISO-8859-7_with_no_encoding_specified.html", 202 "expected_ISO-8859-7_saved_from_no_encoding_specified.html", 203 "ISO-8859-7" }, 204 { "ISO-8859-8_with_no_encoding_specified.html", 205 "expected_ISO-8859-8_saved_from_no_encoding_specified.html", 206 "ISO-8859-8-I" }, 207 { "KOI8-R_with_no_encoding_specified.html", 208 "expected_KOI8-R_saved_from_no_encoding_specified.html", 209 "KOI8-R" }, 210 { "Shift-JIS_with_no_encoding_specified.html", 211 "expected_Shift-JIS_saved_from_no_encoding_specified.html", 212 "Shift_JIS" }, 213 { "UTF-8_with_no_encoding_specified.html", 214 "expected_UTF-8_saved_from_no_encoding_specified.html", 215 "UTF-8" }, 216 { "windows-949_with_no_encoding_specified.html", 217 "expected_windows-949_saved_from_no_encoding_specified.html", 218 "windows-949" }, 219 { "windows-1251_with_no_encoding_specified.html", 220 "expected_windows-1251_saved_from_no_encoding_specified.html", 221 "windows-1251" }, 222 { "windows-1254_with_no_encoding_specified.html", 223 "expected_windows-1254_saved_from_no_encoding_specified.html", 224 "windows-1254" }, 225 { "windows-1255_with_no_encoding_specified.html", 226 "expected_windows-1255_saved_from_no_encoding_specified.html", 227 "windows-1255" }, 228 { "windows-1256_with_no_encoding_specified.html", 229 "expected_windows-1256_saved_from_no_encoding_specified.html", 230 "windows-1256" } 231 }; 232 const char* const kAutoDetectDir = "auto_detect"; 233 // Directory of the files of expected results. 234 const char* const kExpectedResultDir = "expected_results"; 235 236 // Full path of saved file. full_file_name = save_dir_ + file_name[i]; 237 FilePath full_saved_file_name; 238 239 FilePath test_dir_path = FilePath(kTestDir).AppendASCII(kAutoDetectDir); 240 241 scoped_refptr<BrowserProxy> browser(automation()->GetBrowserWindow(0)); 242 ASSERT_TRUE(browser.get()); 243 // Set the default charset to one of encodings not supported by the current 244 // auto-detector (Please refer to the above comments) to make sure we 245 // incorrectly decode the page. Now we use ISO-8859-4. 246 ASSERT_TRUE(browser->SetStringPreference(prefs::kDefaultCharset, 247 "ISO-8859-4")); 248 scoped_refptr<TabProxy> tab(GetActiveTab()); 249 ASSERT_TRUE(tab.get()); 250 251 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestDatas);i++) { 252 FilePath test_file_path(test_dir_path); 253 test_file_path = test_file_path.AppendASCII(kTestDatas[i].test_file_name); 254 GURL url = 255 URLRequestMockHTTPJob::GetMockUrl(test_file_path); 256 ASSERT_TRUE(tab->NavigateToURL(url)); 257 258 // Disable auto detect if it is on. 259 EXPECT_TRUE( 260 browser->SetBooleanPreference(prefs::kWebKitUsesUniversalDetector, 261 false)); 262 EXPECT_TRUE(tab->Reload()); 263 264 // Get the encoding used for the page, it must be the default charset we 265 // just set. 266 std::string encoding; 267 EXPECT_TRUE(tab->GetPageCurrentEncoding(&encoding)); 268 EXPECT_EQ(encoding, "ISO-8859-4"); 269 270 // Enable the encoding auto detection. 271 EXPECT_TRUE(browser->SetBooleanPreference( 272 prefs::kWebKitUsesUniversalDetector, true)); 273 EXPECT_TRUE(tab->Reload()); 274 275 // Re-get the encoding of page. It should return the real encoding now. 276 bool encoding_auto_detect = false; 277 EXPECT_TRUE( 278 browser->GetBooleanPreference(prefs::kWebKitUsesUniversalDetector, 279 &encoding_auto_detect)); 280 EXPECT_TRUE(encoding_auto_detect); 281 EXPECT_TRUE(tab->GetPageCurrentEncoding(&encoding)); 282 EXPECT_EQ(encoding, kTestDatas[i].expected_encoding); 283 284 // Dump the page, the content of dump page should be equal with our expect 285 // result file. 286 full_saved_file_name = save_dir_.AppendASCII(kTestDatas[i].test_file_name); 287 // Full path of expect result file. 288 FilePath expected_result_file_name = FilePath().AppendASCII(kAutoDetectDir); 289 expected_result_file_name = expected_result_file_name.AppendASCII( 290 kExpectedResultDir); 291 expected_result_file_name = expected_result_file_name.AppendASCII( 292 kTestDatas[i].expected_result); 293 EXPECT_TRUE(tab->SavePage(full_saved_file_name, temp_sub_resource_dir_, 294 SavePackage::SAVE_AS_COMPLETE_HTML)); 295 EXPECT_TRUE(WaitForDownloadShelfVisible(browser.get())); 296 CheckFile(full_saved_file_name, expected_result_file_name, true); 297 } 298 } 299