Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <algorithm>
      6 #include <string>
      7 
      8 #include "net/base/escape.h"
      9 
     10 #include "base/basictypes.h"
     11 #include "base/i18n/icu_string_conversions.h"
     12 #include "base/string_util.h"
     13 #include "base/stringprintf.h"
     14 #include "base/utf_string_conversions.h"
     15 #include "testing/gtest/include/gtest/gtest.h"
     16 
     17 namespace {
     18 
     19 static const size_t kNpos = string16::npos;
     20 
     21 struct EscapeCase {
     22   const wchar_t* input;
     23   const wchar_t* output;
     24 };
     25 
     26 struct UnescapeURLCase {
     27   const wchar_t* input;
     28   UnescapeRule::Type rules;
     29   const wchar_t* output;
     30 };
     31 
     32 struct UnescapeURLCaseASCII {
     33   const char* input;
     34   UnescapeRule::Type rules;
     35   const char* output;
     36 };
     37 
     38 struct UnescapeAndDecodeCase {
     39   const char* input;
     40 
     41   // The expected output when run through UnescapeURL.
     42   const char* url_unescaped;
     43 
     44   // The expected output when run through UnescapeQuery.
     45   const char* query_unescaped;
     46 
     47   // The expected output when run through UnescapeAndDecodeURLComponent.
     48   const wchar_t* decoded;
     49 };
     50 
     51 struct AdjustOffsetCase {
     52   const char* input;
     53   size_t input_offset;
     54   size_t output_offset;
     55 };
     56 
     57 struct EscapeForHTMLCase {
     58   const char* input;
     59   const char* expected_output;
     60 };
     61 
     62 }  // namespace
     63 
     64 TEST(EscapeTest, EscapeTextForFormSubmission) {
     65   const EscapeCase escape_cases[] = {
     66     {L"foo", L"foo"},
     67     {L"foo bar", L"foo+bar"},
     68     {L"foo++", L"foo%2B%2B"}
     69   };
     70   for (size_t i = 0; i < arraysize(escape_cases); ++i) {
     71     EscapeCase value = escape_cases[i];
     72     EXPECT_EQ(WideToUTF16Hack(value.output),
     73               EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), true));
     74   }
     75 
     76   const EscapeCase escape_cases_no_plus[] = {
     77     {L"foo", L"foo"},
     78     {L"foo bar", L"foo%20bar"},
     79     {L"foo++", L"foo%2B%2B"}
     80   };
     81   for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
     82     EscapeCase value = escape_cases_no_plus[i];
     83     EXPECT_EQ(WideToUTF16Hack(value.output),
     84               EscapeQueryParamValueUTF8(WideToUTF16Hack(value.input), false));
     85   }
     86 
     87   // Test all the values in we're supposed to be escaping.
     88   const std::string no_escape(
     89     "abcdefghijklmnopqrstuvwxyz"
     90     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     91     "0123456789"
     92     "!'()*-._~");
     93   for (int i = 0; i < 256; ++i) {
     94     std::string in;
     95     in.push_back(i);
     96     std::string out = EscapeQueryParamValue(in, true);
     97     if (0 == i) {
     98       EXPECT_EQ(out, std::string("%00"));
     99     } else if (32 == i) {
    100       // Spaces are plus escaped like web forms.
    101       EXPECT_EQ(out, std::string("+"));
    102     } else if (no_escape.find(in) == std::string::npos) {
    103       // Check %hex escaping
    104       std::string expected = base::StringPrintf("%%%02X", i);
    105       EXPECT_EQ(expected, out);
    106     } else {
    107       // No change for things in the no_escape list.
    108       EXPECT_EQ(out, in);
    109     }
    110   }
    111 
    112   // Check to see if EscapeQueryParamValueUTF8 is the same as
    113   // EscapeQueryParamValue(..., kCodepageUTF8,)
    114   string16 test_str;
    115   test_str.reserve(5000);
    116   for (int i = 1; i < 5000; ++i) {
    117     test_str.push_back(i);
    118   }
    119   string16 wide;
    120   EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, true,
    121                                     &wide));
    122   EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, true));
    123   EXPECT_TRUE(EscapeQueryParamValue(test_str, base::kCodepageUTF8, false,
    124                                     &wide));
    125   EXPECT_EQ(wide, EscapeQueryParamValueUTF8(test_str, false));
    126 }
    127 
    128 TEST(EscapeTest, EscapePath) {
    129   ASSERT_EQ(
    130     // Most of the character space we care about, un-escaped
    131     EscapePath(
    132       "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
    133       "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    134       "[\\]^_`abcdefghijklmnopqrstuvwxyz"
    135       "{|}~\x7f\x80\xff"),
    136     // Escaped
    137     "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
    138     "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    139     "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
    140     "%7B%7C%7D~%7F%80%FF");
    141 }
    142 
    143 TEST(EscapeTest, EscapeUrlEncodedData) {
    144   ASSERT_EQ(
    145     // Most of the character space we care about, un-escaped
    146     EscapeUrlEncodedData(
    147       "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
    148       "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    149       "[\\]^_`abcdefghijklmnopqrstuvwxyz"
    150       "{|}~\x7f\x80\xff"),
    151     // Escaped
    152     "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
    153     "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    154     "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
    155     "%7B%7C%7D~%7F%80%FF");
    156 }
    157 
    158 TEST(EscapeTest, UnescapeURLComponentASCII) {
    159   const UnescapeURLCaseASCII unescape_cases[] = {
    160     {"", UnescapeRule::NORMAL, ""},
    161     {"%2", UnescapeRule::NORMAL, "%2"},
    162     {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
    163     {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
    164     {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
    165     {"Some%20random text %25%2dOK", UnescapeRule::NONE,
    166      "Some%20random text %25%2dOK"},
    167     {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
    168      "Some%20random text %25-OK"},
    169     {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
    170      "Some random text %25-OK"},
    171     {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
    172      "Some%20random text %-OK"},
    173     {"Some%20random text %25%2dOK",
    174      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
    175      "Some random text %-OK"},
    176     {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
    177     {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
    178     // Certain URL-sensitive characters should not be unescaped unless asked.
    179     {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
    180      "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
    181     {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
    182      UnescapeRule::URL_SPECIAL_CHARS,
    183      "Hello%20%13%10world ## ?? == && %% ++"},
    184     // Control characters.
    185     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
    186      "%01%02%03%04%05%06%07%08%09 %"},
    187     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
    188      "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
    189     {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
    190     {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
    191   };
    192 
    193   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
    194     std::string str(unescape_cases[i].input);
    195     EXPECT_EQ(std::string(unescape_cases[i].output),
    196               UnescapeURLComponent(str, unescape_cases[i].rules));
    197   }
    198 
    199   // Test the NULL character unescaping (which wouldn't work above since those
    200   // are just char pointers).
    201   std::string input("Null");
    202   input.push_back(0);  // Also have a NULL in the input.
    203   input.append("%00%39Test");
    204 
    205   // When we're unescaping NULLs
    206   std::string expected("Null");
    207   expected.push_back(0);
    208   expected.push_back(0);
    209   expected.append("9Test");
    210   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
    211 
    212   // When we're not unescaping NULLs.
    213   expected = "Null";
    214   expected.push_back(0);
    215   expected.append("%009Test");
    216   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
    217 }
    218 
    219 TEST(EscapeTest, UnescapeURLComponent) {
    220   const UnescapeURLCase unescape_cases[] = {
    221     {L"", UnescapeRule::NORMAL, L""},
    222     {L"%2", UnescapeRule::NORMAL, L"%2"},
    223     {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
    224     {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
    225     {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
    226     {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
    227      L"Some%20random text %25%2dOK"},
    228     {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
    229      L"Some%20random text %25-OK"},
    230     {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
    231      L"Some random text %25-OK"},
    232     {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
    233      L"Some%20random text %-OK"},
    234     {L"Some%20random text %25%2dOK",
    235      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
    236      L"Some random text %-OK"},
    237     {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
    238     {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
    239     // Certain URL-sensitive characters should not be unescaped unless asked.
    240     {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
    241      L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
    242     {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
    243      UnescapeRule::URL_SPECIAL_CHARS,
    244      L"Hello%20%13%10world ## ?? == && %% ++"},
    245     // We can neither escape nor unescape '@' since some websites expect it to
    246     // be preserved as either '@' or "%40".
    247     // See http://b/996720 and http://crbug.com/23933 .
    248     {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
    249     // Control characters.
    250     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
    251      L"%01%02%03%04%05%06%07%08%09 %"},
    252     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
    253      L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
    254     {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
    255     {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
    256      L"Hello%20\x13\x10\x02"},
    257     {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
    258      L"Hello\x9824\x9827"},
    259   };
    260 
    261   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
    262     string16 str(WideToUTF16(unescape_cases[i].input));
    263     EXPECT_EQ(WideToUTF16(unescape_cases[i].output),
    264               UnescapeURLComponent(str, unescape_cases[i].rules));
    265   }
    266 
    267   // Test the NULL character unescaping (which wouldn't work above since those
    268   // are just char pointers).
    269   string16 input(WideToUTF16(L"Null"));
    270   input.push_back(0);  // Also have a NULL in the input.
    271   input.append(WideToUTF16(L"%00%39Test"));
    272 
    273   // When we're unescaping NULLs
    274   string16 expected(WideToUTF16(L"Null"));
    275   expected.push_back(0);
    276   expected.push_back(0);
    277   expected.append(ASCIIToUTF16("9Test"));
    278   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
    279 
    280   // When we're not unescaping NULLs.
    281   expected = WideToUTF16(L"Null");
    282   expected.push_back(0);
    283   expected.append(WideToUTF16(L"%009Test"));
    284   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
    285 }
    286 
    287 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
    288   const UnescapeAndDecodeCase unescape_cases[] = {
    289     { "%",
    290       "%",
    291       "%",
    292      L"%"},
    293     { "+",
    294       "+",
    295       " ",
    296      L"+"},
    297     { "%2+",
    298       "%2+",
    299       "%2 ",
    300      L"%2+"},
    301     { "+%%%+%%%",
    302       "+%%%+%%%",
    303       " %%% %%%",
    304      L"+%%%+%%%"},
    305     { "Don't escape anything",
    306       "Don't escape anything",
    307       "Don't escape anything",
    308      L"Don't escape anything"},
    309     { "+Invalid %escape %2+",
    310       "+Invalid %escape %2+",
    311       " Invalid %escape %2 ",
    312      L"+Invalid %escape %2+"},
    313     { "Some random text %25%2dOK",
    314       "Some random text %25-OK",
    315       "Some random text %25-OK",
    316      L"Some random text %25-OK"},
    317     { "%01%02%03%04%05%06%07%08%09",
    318       "%01%02%03%04%05%06%07%08%09",
    319       "%01%02%03%04%05%06%07%08%09",
    320      L"%01%02%03%04%05%06%07%08%09"},
    321     { "%E4%BD%A0+%E5%A5%BD",
    322       "\xE4\xBD\xA0+\xE5\xA5\xBD",
    323       "\xE4\xBD\xA0 \xE5\xA5\xBD",
    324      L"\x4f60+\x597d"},
    325     { "%ED%ED",  // Invalid UTF-8.
    326       "\xED\xED",
    327       "\xED\xED",
    328      L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped.
    329   };
    330 
    331   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
    332     std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
    333                                                  UnescapeRule::NORMAL);
    334     EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
    335 
    336     unescaped = UnescapeURLComponent(unescape_cases[i].input,
    337                                      UnescapeRule::REPLACE_PLUS_WITH_SPACE);
    338     EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
    339 
    340     // TODO: Need to test unescape_spaces and unescape_percent.
    341     string16 decoded = UnescapeAndDecodeUTF8URLComponent(
    342         unescape_cases[i].input, UnescapeRule::NORMAL, NULL);
    343     EXPECT_EQ(WideToUTF16Hack(std::wstring(unescape_cases[i].decoded)),
    344               decoded);
    345   }
    346 }
    347 
    348 TEST(EscapeTest, AdjustOffset) {
    349   const AdjustOffsetCase adjust_cases[] = {
    350     {"", 0, std::wstring::npos},
    351     {"test", 0, 0},
    352     {"test", 2, 2},
    353     {"test", 4, std::wstring::npos},
    354     {"test", std::wstring::npos, std::wstring::npos},
    355     {"%2dtest", 6, 4},
    356     {"%2dtest", 2, std::wstring::npos},
    357     {"test%2d", 2, 2},
    358     {"%E4%BD%A0+%E5%A5%BD", 9, 1},
    359     {"%E4%BD%A0+%E5%A5%BD", 6, std::wstring::npos},
    360     {"%ED%B0%80+%E5%A5%BD", 6, 6},
    361   };
    362 
    363   for (size_t i = 0; i < arraysize(adjust_cases); i++) {
    364     size_t offset = adjust_cases[i].input_offset;
    365     UnescapeAndDecodeUTF8URLComponent(adjust_cases[i].input,
    366                                       UnescapeRule::NORMAL, &offset);
    367     EXPECT_EQ(adjust_cases[i].output_offset, offset);
    368   }
    369 }
    370 
    371 TEST(EscapeTest, EscapeForHTML) {
    372   const EscapeForHTMLCase tests[] = {
    373     { "hello", "hello" },
    374     { "<hello>", "&lt;hello&gt;" },
    375     { "don\'t mess with me", "don&#39;t mess with me" },
    376   };
    377   for (size_t i = 0; i < arraysize(tests); ++i) {
    378     std::string result = EscapeForHTML(std::string(tests[i].input));
    379     EXPECT_EQ(std::string(tests[i].expected_output), result);
    380   }
    381 }
    382 
    383 TEST(EscapeTest, UnescapeForHTML) {
    384   const EscapeForHTMLCase tests[] = {
    385     { "", "" },
    386     { "&lt;hello&gt;", "<hello>" },
    387     { "don&#39;t mess with me", "don\'t mess with me" },
    388     { "&lt;&gt;&amp;&quot;&#39;", "<>&\"'" },
    389     { "& lt; &amp ; &; '", "& lt; &amp ; &; '" },
    390     { "&amp;", "&" },
    391     { "&quot;", "\"" },
    392     { "&#39;", "'" },
    393     { "&lt;", "<" },
    394     { "&gt;", ">" },
    395     { "&amp; &", "& &" },
    396   };
    397   for (size_t i = 0; i < arraysize(tests); ++i) {
    398     string16 result = UnescapeForHTML(ASCIIToUTF16(tests[i].input));
    399     EXPECT_EQ(ASCIIToUTF16(tests[i].expected_output), result);
    400   }
    401 }
    402 
    403 TEST(EscapeTest, AdjustEncodingOffset) {
    404   // Imagine we have strings as shown in the following cases where the
    405   // %XX's represent encoded characters
    406 
    407   // 1: abc%ECdef ==> abcXdef
    408   std::vector<size_t> offsets;
    409   for (size_t t = 0; t < 9; ++t)
    410     offsets.push_back(t);
    411   AdjustEncodingOffset::Adjustments adjustments;
    412   adjustments.push_back(3);
    413   std::for_each(offsets.begin(), offsets.end(),
    414                 AdjustEncodingOffset(adjustments));
    415   size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
    416   EXPECT_EQ(offsets.size(), arraysize(expected_1));
    417   for (size_t i = 0; i < arraysize(expected_1); ++i)
    418     EXPECT_EQ(expected_1[i], offsets[i]);
    419 
    420 
    421   // 2: %ECabc%EC%ECdef%EC ==> XabcXXdefX
    422   offsets.clear();
    423   for (size_t t = 0; t < 18; ++t)
    424     offsets.push_back(t);
    425   adjustments.clear();
    426   adjustments.push_back(0);
    427   adjustments.push_back(6);
    428   adjustments.push_back(9);
    429   adjustments.push_back(15);
    430   std::for_each(offsets.begin(), offsets.end(),
    431                 AdjustEncodingOffset(adjustments));
    432   size_t expected_2[] = {0, kNpos, kNpos, 1, 2, 3, 4, kNpos, kNpos, 5, kNpos,
    433                          kNpos, 6, 7, 8, 9, kNpos, kNpos};
    434   EXPECT_EQ(offsets.size(), arraysize(expected_2));
    435   for (size_t i = 0; i < arraysize(expected_2); ++i)
    436     EXPECT_EQ(expected_2[i], offsets[i]);
    437 }
    438