1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <algorithm> 6 #include <string> 7 8 #include "net/base/escape.h" 9 10 #include "base/basictypes.h" 11 #include "base/i18n/icu_string_conversions.h" 12 #include "base/strings/string_util.h" 13 #include "base/strings/stringprintf.h" 14 #include "base/strings/utf_string_conversions.h" 15 #include "testing/gtest/include/gtest/gtest.h" 16 17 namespace net { 18 namespace { 19 20 const size_t kNpos = base::string16::npos; 21 22 struct EscapeCase { 23 const char* input; 24 const char* output; 25 }; 26 27 struct UnescapeURLCase { 28 const wchar_t* input; 29 UnescapeRule::Type rules; 30 const wchar_t* output; 31 }; 32 33 struct UnescapeURLCaseASCII { 34 const char* input; 35 UnescapeRule::Type rules; 36 const char* output; 37 }; 38 39 struct UnescapeAndDecodeCase { 40 const char* input; 41 42 // The expected output when run through UnescapeURL. 43 const char* url_unescaped; 44 45 // The expected output when run through UnescapeQuery. 46 const char* query_unescaped; 47 48 // The expected output when run through UnescapeAndDecodeURLComponent. 49 const wchar_t* decoded; 50 }; 51 52 struct AdjustOffsetCase { 53 const char* input; 54 size_t input_offset; 55 size_t output_offset; 56 }; 57 58 struct EscapeForHTMLCase { 59 const char* input; 60 const char* expected_output; 61 }; 62 63 TEST(EscapeTest, EscapeTextForFormSubmission) { 64 const EscapeCase escape_cases[] = { 65 {"foo", "foo"}, 66 {"foo bar", "foo+bar"}, 67 {"foo++", "foo%2B%2B"} 68 }; 69 for (size_t i = 0; i < arraysize(escape_cases); ++i) { 70 EscapeCase value = escape_cases[i]; 71 EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true)); 72 } 73 74 const EscapeCase escape_cases_no_plus[] = { 75 {"foo", "foo"}, 76 {"foo bar", "foo%20bar"}, 77 {"foo++", "foo%2B%2B"} 78 }; 79 for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) { 80 EscapeCase value = escape_cases_no_plus[i]; 81 EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false)); 82 } 83 84 // Test all the values in we're supposed to be escaping. 85 const std::string no_escape( 86 "abcdefghijklmnopqrstuvwxyz" 87 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 88 "0123456789" 89 "!'()*-._~"); 90 for (int i = 0; i < 256; ++i) { 91 std::string in; 92 in.push_back(i); 93 std::string out = EscapeQueryParamValue(in, true); 94 if (0 == i) { 95 EXPECT_EQ(out, std::string("%00")); 96 } else if (32 == i) { 97 // Spaces are plus escaped like web forms. 98 EXPECT_EQ(out, std::string("+")); 99 } else if (no_escape.find(in) == std::string::npos) { 100 // Check %hex escaping 101 std::string expected = base::StringPrintf("%%%02X", i); 102 EXPECT_EQ(expected, out); 103 } else { 104 // No change for things in the no_escape list. 105 EXPECT_EQ(out, in); 106 } 107 } 108 } 109 110 TEST(EscapeTest, EscapePath) { 111 ASSERT_EQ( 112 // Most of the character space we care about, un-escaped 113 EscapePath( 114 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" 115 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 116 "[\\]^_`abcdefghijklmnopqrstuvwxyz" 117 "{|}~\x7f\x80\xff"), 118 // Escaped 119 "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;" 120 "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 121 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" 122 "%7B%7C%7D~%7F%80%FF"); 123 } 124 125 TEST(EscapeTest, EscapeUrlEncodedData) { 126 ASSERT_EQ( 127 // Most of the character space we care about, un-escaped 128 EscapeUrlEncodedData( 129 "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;" 130 "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ" 131 "[\\]^_`abcdefghijklmnopqrstuvwxyz" 132 "{|}~\x7f\x80\xff", true), 133 // Escaped 134 "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B" 135 "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ" 136 "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz" 137 "%7B%7C%7D~%7F%80%FF"); 138 } 139 140 TEST(EscapeTest, EscapeUrlEncodedDataSpace) { 141 ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b"); 142 ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b"); 143 } 144 145 TEST(EscapeTest, UnescapeURLComponentASCII) { 146 const UnescapeURLCaseASCII unescape_cases[] = { 147 {"", UnescapeRule::NORMAL, ""}, 148 {"%2", UnescapeRule::NORMAL, "%2"}, 149 {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"}, 150 {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"}, 151 {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"}, 152 {"Some%20random text %25%2dOK", UnescapeRule::NONE, 153 "Some%20random text %25%2dOK"}, 154 {"Some%20random text %25%2dOK", UnescapeRule::NORMAL, 155 "Some%20random text %25-OK"}, 156 {"Some%20random text %25%2dOK", UnescapeRule::SPACES, 157 "Some random text %25-OK"}, 158 {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, 159 "Some%20random text %-OK"}, 160 {"Some%20random text %25%2dOK", 161 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 162 "Some random text %-OK"}, 163 {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"}, 164 {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"}, 165 // Certain URL-sensitive characters should not be unescaped unless asked. 166 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, 167 "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, 168 {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", 169 UnescapeRule::URL_SPECIAL_CHARS, 170 "Hello%20%13%10world ## ?? == && %% ++"}, 171 // We can neither escape nor unescape '@' since some websites expect it to 172 // be preserved as either '@' or "%40". 173 // See http://b/996720 and http://crbug.com/23933 . 174 {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"}, 175 // Control characters. 176 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, 177 "%01%02%03%04%05%06%07%08%09 %"}, 178 {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, 179 "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, 180 {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"}, 181 {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"}, 182 }; 183 184 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 185 std::string str(unescape_cases[i].input); 186 EXPECT_EQ(std::string(unescape_cases[i].output), 187 UnescapeURLComponent(str, unescape_cases[i].rules)); 188 } 189 190 // Test the NULL character unescaping (which wouldn't work above since those 191 // are just char pointers). 192 std::string input("Null"); 193 input.push_back(0); // Also have a NULL in the input. 194 input.append("%00%39Test"); 195 196 // When we're unescaping NULLs 197 std::string expected("Null"); 198 expected.push_back(0); 199 expected.push_back(0); 200 expected.append("9Test"); 201 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); 202 203 // When we're not unescaping NULLs. 204 expected = "Null"; 205 expected.push_back(0); 206 expected.append("%009Test"); 207 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); 208 } 209 210 TEST(EscapeTest, UnescapeURLComponent) { 211 const UnescapeURLCase unescape_cases[] = { 212 {L"", UnescapeRule::NORMAL, L""}, 213 {L"%2", UnescapeRule::NORMAL, L"%2"}, 214 {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"}, 215 {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"}, 216 {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"}, 217 {L"Some%20random text %25%2dOK", UnescapeRule::NONE, 218 L"Some%20random text %25%2dOK"}, 219 {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL, 220 L"Some%20random text %25-OK"}, 221 {L"Some%20random text %25%2dOK", UnescapeRule::SPACES, 222 L"Some random text %25-OK"}, 223 {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS, 224 L"Some%20random text %-OK"}, 225 {L"Some%20random text %25%2dOK", 226 UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS, 227 L"Some random text %-OK"}, 228 {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"}, 229 {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"}, 230 // Certain URL-sensitive characters should not be unescaped unless asked. 231 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES, 232 L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"}, 233 {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", 234 UnescapeRule::URL_SPECIAL_CHARS, 235 L"Hello%20%13%10world ## ?? == && %% ++"}, 236 // We can neither escape nor unescape '@' since some websites expect it to 237 // be preserved as either '@' or "%40". 238 // See http://b/996720 and http://crbug.com/23933 . 239 {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"}, 240 // Control characters. 241 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS, 242 L"%01%02%03%04%05%06%07%08%09 %"}, 243 {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS, 244 L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"}, 245 {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"}, 246 {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, 247 L"Hello%20\x13\x10\x02"}, 248 {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS, 249 L"Hello\x9824\x9827"}, 250 }; 251 252 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 253 base::string16 str(WideToUTF16(unescape_cases[i].input)); 254 EXPECT_EQ(WideToUTF16(unescape_cases[i].output), 255 UnescapeURLComponent(str, unescape_cases[i].rules)); 256 } 257 258 // Test the NULL character unescaping (which wouldn't work above since those 259 // are just char pointers). 260 base::string16 input(WideToUTF16(L"Null")); 261 input.push_back(0); // Also have a NULL in the input. 262 input.append(WideToUTF16(L"%00%39Test")); 263 264 // When we're unescaping NULLs 265 base::string16 expected(WideToUTF16(L"Null")); 266 expected.push_back(0); 267 expected.push_back(0); 268 expected.append(ASCIIToUTF16("9Test")); 269 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS)); 270 271 // When we're not unescaping NULLs. 272 expected = WideToUTF16(L"Null"); 273 expected.push_back(0); 274 expected.append(WideToUTF16(L"%009Test")); 275 EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL)); 276 } 277 278 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) { 279 const UnescapeAndDecodeCase unescape_cases[] = { 280 { "%", 281 "%", 282 "%", 283 L"%"}, 284 { "+", 285 "+", 286 " ", 287 L"+"}, 288 { "%2+", 289 "%2+", 290 "%2 ", 291 L"%2+"}, 292 { "+%%%+%%%", 293 "+%%%+%%%", 294 " %%% %%%", 295 L"+%%%+%%%"}, 296 { "Don't escape anything", 297 "Don't escape anything", 298 "Don't escape anything", 299 L"Don't escape anything"}, 300 { "+Invalid %escape %2+", 301 "+Invalid %escape %2+", 302 " Invalid %escape %2 ", 303 L"+Invalid %escape %2+"}, 304 { "Some random text %25%2dOK", 305 "Some random text %25-OK", 306 "Some random text %25-OK", 307 L"Some random text %25-OK"}, 308 { "%01%02%03%04%05%06%07%08%09", 309 "%01%02%03%04%05%06%07%08%09", 310 "%01%02%03%04%05%06%07%08%09", 311 L"%01%02%03%04%05%06%07%08%09"}, 312 { "%E4%BD%A0+%E5%A5%BD", 313 "\xE4\xBD\xA0+\xE5\xA5\xBD", 314 "\xE4\xBD\xA0 \xE5\xA5\xBD", 315 L"\x4f60+\x597d"}, 316 { "%ED%ED", // Invalid UTF-8. 317 "\xED\xED", 318 "\xED\xED", 319 L"%ED%ED"}, // Invalid UTF-8 -> kept unescaped. 320 }; 321 322 for (size_t i = 0; i < arraysize(unescape_cases); i++) { 323 std::string unescaped = UnescapeURLComponent(unescape_cases[i].input, 324 UnescapeRule::NORMAL); 325 EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped); 326 327 unescaped = UnescapeURLComponent(unescape_cases[i].input, 328 UnescapeRule::REPLACE_PLUS_WITH_SPACE); 329 EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped); 330 331 // TODO: Need to test unescape_spaces and unescape_percent. 332 base::string16 decoded = UnescapeAndDecodeUTF8URLComponent( 333 unescape_cases[i].input, UnescapeRule::NORMAL, NULL); 334 EXPECT_EQ(WideToUTF16(unescape_cases[i].decoded), decoded); 335 } 336 } 337 338 TEST(EscapeTest, AdjustOffset) { 339 const AdjustOffsetCase adjust_cases[] = { 340 {"", 0, 0}, 341 {"", 1, std::string::npos}, 342 {"test", 0, 0}, 343 {"test", 2, 2}, 344 {"test", 4, 4}, 345 {"test", 5, std::string::npos}, 346 {"test", std::string::npos, std::string::npos}, 347 {"%2dtest", 6, 4}, 348 {"%2dtest", 2, std::string::npos}, 349 {"test%2d", 2, 2}, 350 {"%E4%BD%A0+%E5%A5%BD", 9, 1}, 351 {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos}, 352 {"%ED%B0%80+%E5%A5%BD", 6, 6}, 353 }; 354 355 for (size_t i = 0; i < arraysize(adjust_cases); i++) { 356 size_t offset = adjust_cases[i].input_offset; 357 UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input, 358 UnescapeRule::NORMAL, &offset); 359 EXPECT_EQ(adjust_cases[i].output_offset, offset); 360 } 361 } 362 363 TEST(EscapeTest, EscapeForHTML) { 364 const EscapeForHTMLCase tests[] = { 365 { "hello", "hello" }, 366 { "<hello>", "<hello>" }, 367 { "don\'t mess with me", "don't mess with me" }, 368 }; 369 for (size_t i = 0; i < arraysize(tests); ++i) { 370 std::string result = EscapeForHTML(std::string(tests[i].input)); 371 EXPECT_EQ(std::string(tests[i].expected_output), result); 372 } 373 } 374 375 TEST(EscapeTest, UnescapeForHTML) { 376 const EscapeForHTMLCase tests[] = { 377 { "", "" }, 378 { "<hello>", "<hello>" }, 379 { "don't mess with me", "don\'t mess with me" }, 380 { "<>&"'", "<>&\"'" }, 381 { "& lt; & ; &; '", "& lt; & ; &; '" }, 382 { "&", "&" }, 383 { """, "\"" }, 384 { "'", "'" }, 385 { "<", "<" }, 386 { ">", ">" }, 387 { "& &", "& &" }, 388 }; 389 for (size_t i = 0; i < arraysize(tests); ++i) { 390 base::string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input)); 391 EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result); 392 } 393 } 394 395 TEST(EscapeTest, AdjustEncodingOffset) { 396 // Imagine we have strings as shown in the following cases where the 397 // %XX's represent encoded characters 398 399 // 1: abc%ECdef ==> abcXdef 400 std::vector<size_t> offsets; 401 for (size_t t = 0; t < 9; ++t) 402 offsets.push_back(t); 403 internal::AdjustEncodingOffset::Adjustments adjustments; 404 adjustments.push_back(3); 405 std::for_each(offsets.begin(), offsets.end(), 406 internal::AdjustEncodingOffset(adjustments)); 407 size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6}; 408 EXPECT_EQ(offsets.size(), arraysize(expected_1)); 409 for (size_t i = 0; i < arraysize(expected_1); ++i) 410 EXPECT_EQ(expected_1[i], offsets[i]); 411 412 413 // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX 414 offsets.clear(); 415 for (size_t t = 0; t < 18; ++t) 416 offsets.push_back(t); 417 adjustments.clear(); 418 adjustments.push_back(0); 419 adjustments.push_back(6); 420 adjustments.push_back(9); 421 adjustments.push_back(15); 422 std::for_each(offsets.begin(), offsets.end(), 423 internal::AdjustEncodingOffset(adjustments)); 424 size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos, 425 kNpos, 6, 7, 8, 9, kNpos, kNpos}; 426 EXPECT_EQ(offsets.size(), arraysize(expected_2)); 427 for (size_t i = 0; i < arraysize(expected_2); ++i) 428 EXPECT_EQ(expected_2[i], offsets[i]); 429 } 430 431 } // namespace 432 } // namespace net 433