1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <string> 6 7 #include "net/base/escape.h" 8 9 #include "base/basictypes.h" 10 #include "base/i18n/icu_string_conversions.h" 11 #include "base/string_util.h" 12 #include "testing/gtest/include/gtest/gtest.h" 13 14 namespace { 15 16 struct EscapeCase { 17 const wchar_t* input; 18 const wchar_t* output; 19 }; 20 21 struct UnescapeURLCase { 22 const wchar_t* input; 23 UnescapeRule::Type rules; 24 const wchar_t* output; 25 }; 26 27 struct UnescapeURLCaseASCII { 28 const char* input; 29 UnescapeRule::Type rules; 30 const char* output; 31 }; 32 33 struct UnescapeAndDecodeCase { 34 const char* input; 35 36 // The expected output when run through UnescapeURL. 37 const char* url_unescaped; 38 39 // The expected output when run through UnescapeQuery. 40 const char* query_unescaped; 41 42 // The expected output when run through UnescapeAndDecodeURLComponent. 43 const wchar_t* decoded; 44 }; 45 46 struct AdjustOffsetCase { 47 const char* input; 48 size_t input_offset; 49 size_t output_offset; 50 }; 51 52 struct EscapeForHTMLCase { 53 const char* input; 54 const char* expected_output; 55 }; 56 57 } // namespace 58 59 TEST(EscapeTest, EscapeTextForFormSubmission) { 60 const EscapeCase escape_cases[] = { 61 {L"foo", L"foo"}, 62 {L"foo bar", L"foo+bar"}, 63 {L"foo++", L"foo%2B%2B"} 64 }; 65 for (size_t i = 0; i < arraysize(escape_cases); ++i) { 66 EscapeCase value = escape_cases[i]; 67 EXPECT_EQ(value.output, EscapeQueryParamValueUTF8(value.input, true)); 68 } 69 70 const EscapeCase escape_cases_no_plus[] = { 71 {L"foo", L"foo"}, 72 {L"foo bar", L"foo%20bar"}, 73 {L"foo++", L"foo%2B%2B"} 74 }; 75 for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { 76 EscapeCase value = escape_cases_no_plus[i]; 77 EXPECT_EQ(value.output, EscapeQueryParamValueUTF8(value.input, false)); 78 } 79 80 // Test all the values in we're supposed to be escaping. 81 const std::string no_escape( 82 "abcdefghijklmnopqrstuvwxyz" 83 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 84 "0123456789" 85 "!'()*-._~"); 86 for (int i = 0; i < 256; ++i) { 87 std::string in; 88 in.push_back(i); 89 std::string out = EscapeQueryParamValue(in, true); 90 if (0 == i) { 91 EXPECT_EQ(out, std::string("%00")); 92 } else if (32 == i) { 93 // Spaces are plus escaped like web forms. 94 EXPECT_EQ(out, std::string("+")); 95 } else if (no_escape.find(in) == std::string::npos) { 96 // Check %hex escaping 97 std::string expected = StringPrintf("%%%02X", i); 98 EXPECT_EQ(expected, out); 99 } else { 100 // No change for things in the no_escape list. 101 EXPECT_EQ(out, in); 102 } 103 } 104 105 // Check to see if EscapeQueryParamValueUTF8 is the same as 106 // EscapeQueryParamValue(..., kCodepageUTF8,) 107 string16 test_str; 108 test_str.reserve(5000); 109 for (int i = 1; i < 5000; ++i) { 110 test_str.push_back(i); 111 } 112 string16 wide; 113 EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true, 114 &wide)); 115 EXPECT_EQ(UTF16ToWideHack(wide), 116 EscapeQueryParamValueUTF8(UTF16ToWideHack(test_str), true)); 117 EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false, 118 &wide)); 119 EXPECT_EQ(UTF16ToWideHack(wide), 120 EscapeQueryParamValueUTF8(UTF16ToWideHack(test_str), false)); 121 } 122 123 TEST(EscapeTest, EscapePath) { 124 ASSERT_EQ( 125 // Most of the character space we care about, un-escaped 126 EscapePath( 127 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" 128 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 129 "[\\]^_`abcdefghijklmnopqrstuvwxyz" 130 "{|}~\x7f\x80\xff"), 131 // Escaped 132 "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" 133 "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 134 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" 135 "%7B%7C%7D~%7F%80%FF"); 136 } 137 138 TEST(EscapeTest, EscapeUrlEncodedData) { 139 ASSERT_EQ( 140 // Most of the character space we care about, un-escaped 141 EscapeUrlEncodedData( 142 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" 143 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 144 "[\\]^_`abcdefghijklmnopqrstuvwxyz" 145 "{|}~\x7f\x80\xff"), 146 // Escaped 147 "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B" 148 "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ" 149 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" 150 "%7B%7C%7D~%7F%80%FF"); 151 } 152 153 TEST(EscapeTest, UnescapeURLComponentASCII) { 154 const UnescapeURLCaseASCII unescape_cases[] = { 155 {"", UnescapeRule::NORMAL, ""}, 156 {"%2", UnescapeRule::NORMAL, "%2"}, 157 {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, 158 {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, 159 {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, 160 {"Some%20random text %25%3bOK", UnescapeRule::NONE, 161 "Some%20random text %25%3bOK"}, 162 {"Some%20random text %25%3bOK", UnescapeRule::NORMAL, 163 "Some%20random text %25;OK"}, 164 {"Some%20random text %25%3bOK", UnescapeRule::SPACES, 165 "Some random text %25;OK"}, 166 {"Some%20random text %25%3bOK", UnescapeRule::URL_SPECIAL_CHARS, 167 "Some%20random text %;OK"}, 168 {"Some%20random text %25%3bOK", 169 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 170 "Some random text %;OK"}, 171 {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, 172 {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}, 173 // Certain URL-sensitive characters should not be unescaped unless asked. 174 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, 175 "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, 176 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", 177 UnescapeRule::URL_SPECIAL_CHARS, 178 "Hello%20%13%10world ## ?? == && %% ++"}, 179 // Control characters. 180 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, 181 "%01%02%03%04%05%06%07%08%09 %"}, 182 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, 183 "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, 184 {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"}, 185 {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"}, 186 }; 187 188 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 189 std::string str(unescape_cases[i].input); 190 EXPECT_EQ(std::string(unescape_cases[i].output), 191 UnescapeURLComponent(str, unescape_cases[i].rules)); 192 } 193 194 // Test the NULL character unescaping (which wouldn't work above since those 195 // are just char pointers). 196 std::string input("Null"); 197 input.push_back(0); // Also have a NULL in the input. 198 input.append("%00%39Test"); 199 200 // When we're unescaping NULLs 201 std::string expected("Null"); 202 expected.push_back(0); 203 expected.push_back(0); 204 expected.append("9Test"); 205 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); 206 207 // When we're not unescaping NULLs. 208 expected = "Null"; 209 expected.push_back(0); 210 expected.append("%009Test"); 211 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); 212 } 213 214 TEST(EscapeTest, UnescapeURLComponent) { 215 const UnescapeURLCase unescape_cases[] = { 216 {L"", UnescapeRule::NORMAL, L""}, 217 {L"%2", UnescapeRule::NORMAL, L"%2"}, 218 {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"}, 219 {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"}, 220 {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"}, 221 {L"Some%20random text %25%3bOK", UnescapeRule::NONE, 222 L"Some%20random text %25%3bOK"}, 223 {L"Some%20random text %25%3bOK", UnescapeRule::NORMAL, 224 L"Some%20random text %25;OK"}, 225 {L"Some%20random text %25%3bOK", UnescapeRule::SPACES, 226 L"Some random text %25;OK"}, 227 {L"Some%20random text %25%3bOK", UnescapeRule::URL_SPECIAL_CHARS, 228 L"Some%20random text %;OK"}, 229 {L"Some%20random text %25%3bOK", 230 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 231 L"Some random text %;OK"}, 232 {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"}, 233 {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"}, 234 // Certain URL-sensitive characters should not be unescaped unless asked. 235 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, 236 L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, 237 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", 238 UnescapeRule::URL_SPECIAL_CHARS, 239 L"Hello%20%13%10world ## ?? == && %% ++"}, 240 // Control characters. 241 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, 242 L"%01%02%03%04%05%06%07%08%09 %"}, 243 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, 244 L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, 245 {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"}, 246 {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, 247 L"Hello%20\x13\x10\x02"}, 248 {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS, 249 L"Hello\x9824\x9827"}, 250 }; 251 252 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 253 string16 str(WideToUTF16(unescape_cases[i].input)); 254 EXPECT_EQ(WideToUTF16(unescape_cases[i].output), 255 UnescapeURLComponent(str, unescape_cases[i].rules)); 256 } 257 258 // Test the NULL character unescaping (which wouldn't work above since those 259 // are just char pointers). 260 string16 input(WideToUTF16(L"Null")); 261 input.push_back(0); // Also have a NULL in the input. 262 input.append(WideToUTF16(L"%00%39Test")); 263 264 // When we're unescaping NULLs 265 string16 expected(WideToUTF16(L"Null")); 266 expected.push_back(0); 267 expected.push_back(0); 268 expected.append(ASCIIToUTF16("9Test")); 269 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); 270 271 // When we're not unescaping NULLs. 272 expected = WideToUTF16(L"Null"); 273 expected.push_back(0); 274 expected.append(WideToUTF16(L"%009Test")); 275 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); 276 } 277 278 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { 279 const UnescapeAndDecodeCase unescape_cases[] = { 280 { "%", 281 "%", 282 "%", 283 L"%"}, 284 { "+", 285 "+", 286 " ", 287 L"+"}, 288 { "%2+", 289 "%2+", 290 "%2 ", 291 L"%2+"}, 292 { "+%%%+%%%", 293 "+%%%+%%%", 294 " %%% %%%", 295 L"+%%%+%%%"}, 296 { "Don't escape anything", 297 "Don't escape anything", 298 "Don't escape anything", 299 L"Don't escape anything"}, 300 { "+Invalid %escape %2+", 301 "+Invalid %escape %2+", 302 " Invalid %escape %2 ", 303 L"+Invalid %escape %2+"}, 304 { "Some random text %25%3BOK", 305 "Some random text %25;OK", 306 "Some random text %25;OK", 307 L"Some random text %25;OK"}, 308 { "%01%02%03%04%05%06%07%08%09", 309 "%01%02%03%04%05%06%07%08%09", 310 "%01%02%03%04%05%06%07%08%09", 311 L"%01%02%03%04%05%06%07%08%09"}, 312 { "%E4%BD%A0+%E5%A5%BD", 313 "\xE4\xBD\xA0+\xE5\xA5\xBD", 314 "\xE4\xBD\xA0 \xE5\xA5\xBD", 315 L"\x4f60+\x597d"}, 316 { "%ED%ED", // Invalid UTF-8. 317 "\xED\xED", 318 "\xED\xED", 319 L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. 320 }; 321 322 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 323 std::string unescaped = UnescapeURLComponent(unescape_cases[i].input, 324 UnescapeRule::NORMAL); 325 EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped); 326 327 unescaped = UnescapeURLComponent(unescape_cases[i].input, 328 UnescapeRule::REPLACE_PLUS_WITH_SPACE); 329 EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); 330 331 // TODO: Need to test unescape_spaces and unescape_percent. 332 string16 decoded = UnescapeAndDecodeUTF8URLComponent( 333 unescape_cases[i].input, UnescapeRule::NORMAL, NULL); 334 EXPECT_EQ(WideToUTF16Hack(std::wstring(unescape_cases[i].decoded)), 335 decoded); 336 } 337 } 338 339 TEST(EscapeTest, AdjustOffset) { 340 const AdjustOffsetCase adjust_cases[] = { 341 {"", 0, std::wstring::npos}, 342 {"test", 0, 0}, 343 {"test", 2, 2}, 344 {"test", 4, std::wstring::npos}, 345 {"test", std::wstring::npos, std::wstring::npos}, 346 {"%3Btest", 6, 4}, 347 {"%3Btest", 2, std::wstring::npos}, 348 {"test%3B", 2, 2}, 349 {"%E4%BD%A0+%E5%A5%BD", 9, 1}, 350 {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos}, 351 {"%ED%B0%80+%E5%A5%BD", 6, 6}, 352 }; 353 354 for (size_t i = 0; i < arraysize(adjust_cases); i++) { 355 size_t offset = adjust_cases[i].input_offset; 356 UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input, 357 UnescapeRule::NORMAL, &offset); 358 EXPECT_EQ(adjust_cases[i].output_offset, offset); 359 } 360 } 361 362 TEST(EscapeTest, EscapeForHTML) { 363 const EscapeForHTMLCase tests[] = { 364 { "hello", "hello" }, 365 { "<hello>", "<hello>" }, 366 { "don\'t mess with me", "don't mess with me" }, 367 }; 368 for (size_t i = 0; i < arraysize(tests); ++i) { 369 std::string result = EscapeForHTML(std::string(tests[i].input)); 370 EXPECT_EQ(std::string(tests[i].expected_output), result); 371 } 372 } 373 374 TEST(EscapeTest, UnescapeForHTML) { 375 const EscapeForHTMLCase tests[] = { 376 { "", "" }, 377 { "<hello>", "<hello>" }, 378 { "don't mess with me", "don\'t mess with me" }, 379 { "<>&"'", "<>&\"'" }, 380 { "& lt; & ; &; '", "& lt; & ; &; '" }, 381 { "&", "&" }, 382 { """, "\"" }, 383 { "'", "'" }, 384 { "<", "<" }, 385 { ">", ">" }, 386 { "& &", "& &" }, 387 }; 388 for (size_t i = 0; i < arraysize(tests); ++i) { 389 string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input)); 390 EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result); 391 } 392 } 393