Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <algorithm>
      6 #include <string>
      7 
      8 #include "net/base/escape.h"
      9 
     10 #include "base/basictypes.h"
     11 #include "base/strings/string_util.h"
     12 #include "base/strings/stringprintf.h"
     13 #include "base/strings/utf_string_conversions.h"
     14 #include "testing/gtest/include/gtest/gtest.h"
     15 
     16 namespace net {
     17 namespace {
     18 
     19 struct EscapeCase {
     20   const char* input;
     21   const char* output;
     22 };
     23 
     24 struct UnescapeURLCase {
     25   const wchar_t* input;
     26   UnescapeRule::Type rules;
     27   const wchar_t* output;
     28 };
     29 
     30 struct UnescapeURLCaseASCII {
     31   const char* input;
     32   UnescapeRule::Type rules;
     33   const char* output;
     34 };
     35 
     36 struct UnescapeAndDecodeCase {
     37   const char* input;
     38 
     39   // The expected output when run through UnescapeURL.
     40   const char* url_unescaped;
     41 
     42   // The expected output when run through UnescapeQuery.
     43   const char* query_unescaped;
     44 
     45   // The expected output when run through UnescapeAndDecodeURLComponent.
     46   const wchar_t* decoded;
     47 };
     48 
     49 struct AdjustOffsetCase {
     50   const char* input;
     51   size_t input_offset;
     52   size_t output_offset;
     53 };
     54 
     55 struct EscapeForHTMLCase {
     56   const char* input;
     57   const char* expected_output;
     58 };
     59 
     60 TEST(EscapeTest, EscapeTextForFormSubmission) {
     61   const EscapeCase escape_cases[] = {
     62     {"foo", "foo"},
     63     {"foo bar", "foo+bar"},
     64     {"foo++", "foo%2B%2B"}
     65   };
     66   for (size_t i = 0; i < arraysize(escape_cases); ++i) {
     67     EscapeCase value = escape_cases[i];
     68     EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, true));
     69   }
     70 
     71   const EscapeCase escape_cases_no_plus[] = {
     72     {"foo", "foo"},
     73     {"foo bar", "foo%20bar"},
     74     {"foo++", "foo%2B%2B"}
     75   };
     76   for (size_t i = 0; i < arraysize(escape_cases_no_plus); ++i) {
     77     EscapeCase value = escape_cases_no_plus[i];
     78     EXPECT_EQ(value.output, EscapeQueryParamValue(value.input, false));
     79   }
     80 
     81   // Test all the values in we're supposed to be escaping.
     82   const std::string no_escape(
     83     "abcdefghijklmnopqrstuvwxyz"
     84     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
     85     "0123456789"
     86     "!'()*-._~");
     87   for (int i = 0; i < 256; ++i) {
     88     std::string in;
     89     in.push_back(i);
     90     std::string out = EscapeQueryParamValue(in, true);
     91     if (0 == i) {
     92       EXPECT_EQ(out, std::string("%00"));
     93     } else if (32 == i) {
     94       // Spaces are plus escaped like web forms.
     95       EXPECT_EQ(out, std::string("+"));
     96     } else if (no_escape.find(in) == std::string::npos) {
     97       // Check %hex escaping
     98       std::string expected = base::StringPrintf("%%%02X", i);
     99       EXPECT_EQ(expected, out);
    100     } else {
    101       // No change for things in the no_escape list.
    102       EXPECT_EQ(out, in);
    103     }
    104   }
    105 }
    106 
    107 TEST(EscapeTest, EscapePath) {
    108   ASSERT_EQ(
    109     // Most of the character space we care about, un-escaped
    110     EscapePath(
    111       "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
    112       "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    113       "[\\]^_`abcdefghijklmnopqrstuvwxyz"
    114       "{|}~\x7f\x80\xff"),
    115     // Escaped
    116     "%02%0A%1D%20!%22%23$%25&'()*+,-./0123456789%3A;"
    117     "%3C=%3E%3F@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    118     "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
    119     "%7B%7C%7D~%7F%80%FF");
    120 }
    121 
    122 TEST(EscapeTest, DataURLWithAccentedCharacters) {
    123   const std::string url =
    124       "text/html;charset=utf-8,%3Chtml%3E%3Cbody%3ETonton,%20ton%20th%C3"
    125       "%A9%20t'a-t-il%20%C3%B4t%C3%A9%20ta%20toux%20";
    126 
    127   base::OffsetAdjuster::Adjustments adjustments;
    128   net::UnescapeAndDecodeUTF8URLComponentWithAdjustments(
    129       url, UnescapeRule::SPACES, &adjustments);
    130 }
    131 
    132 TEST(EscapeTest, EscapeUrlEncodedData) {
    133   ASSERT_EQ(
    134     // Most of the character space we care about, un-escaped
    135     EscapeUrlEncodedData(
    136       "\x02\n\x1d !\"#$%&'()*+,-./0123456789:;"
    137       "<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    138       "[\\]^_`abcdefghijklmnopqrstuvwxyz"
    139       "{|}~\x7f\x80\xff", true),
    140     // Escaped
    141     "%02%0A%1D+!%22%23%24%25%26%27()*%2B,-./0123456789:%3B"
    142     "%3C%3D%3E%3F%40ABCDEFGHIJKLMNOPQRSTUVWXYZ"
    143     "%5B%5C%5D%5E_%60abcdefghijklmnopqrstuvwxyz"
    144     "%7B%7C%7D~%7F%80%FF");
    145 }
    146 
    147 TEST(EscapeTest, EscapeUrlEncodedDataSpace) {
    148   ASSERT_EQ(EscapeUrlEncodedData("a b", true), "a+b");
    149   ASSERT_EQ(EscapeUrlEncodedData("a b", false), "a%20b");
    150 }
    151 
    152 TEST(EscapeTest, UnescapeURLComponentASCII) {
    153   const UnescapeURLCaseASCII unescape_cases[] = {
    154     {"", UnescapeRule::NORMAL, ""},
    155     {"%2", UnescapeRule::NORMAL, "%2"},
    156     {"%%%%%%", UnescapeRule::NORMAL, "%%%%%%"},
    157     {"Don't escape anything", UnescapeRule::NORMAL, "Don't escape anything"},
    158     {"Invalid %escape %2", UnescapeRule::NORMAL, "Invalid %escape %2"},
    159     {"Some%20random text %25%2dOK", UnescapeRule::NONE,
    160      "Some%20random text %25%2dOK"},
    161     {"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
    162      "Some%20random text %25-OK"},
    163     {"Some%20random text %25%2dOK", UnescapeRule::SPACES,
    164      "Some random text %25-OK"},
    165     {"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
    166      "Some%20random text %-OK"},
    167     {"Some%20random text %25%2dOK",
    168      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
    169      "Some random text %-OK"},
    170     {"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, "\xA0\xB1\xC2\xD3\xE4\xF5"},
    171     {"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, "\xAa\xBb\xCc\xDd\xEe\xFf"},
    172     // Certain URL-sensitive characters should not be unescaped unless asked.
    173     {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
    174      "Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
    175     {"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
    176      UnescapeRule::URL_SPECIAL_CHARS,
    177      "Hello%20%13%10world ## ?? == && %% ++"},
    178     // We can neither escape nor unescape '@' since some websites expect it to
    179     // be preserved as either '@' or "%40".
    180     // See http://b/996720 and http://crbug.com/23933 .
    181     {"me@my%40example", UnescapeRule::NORMAL, "me@my%40example"},
    182     // Control characters.
    183     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
    184      "%01%02%03%04%05%06%07%08%09 %"},
    185     {"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
    186      "\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
    187     {"Hello%20%13%10%02", UnescapeRule::SPACES, "Hello %13%10%02"},
    188     {"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS, "Hello%20\x13\x10\x02"},
    189   };
    190 
    191   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
    192     std::string str(unescape_cases[i].input);
    193     EXPECT_EQ(std::string(unescape_cases[i].output),
    194               UnescapeURLComponent(str, unescape_cases[i].rules));
    195   }
    196 
    197   // Test the NULL character unescaping (which wouldn't work above since those
    198   // are just char pointers).
    199   std::string input("Null");
    200   input.push_back(0);  // Also have a NULL in the input.
    201   input.append("%00%39Test");
    202 
    203   // When we're unescaping NULLs
    204   std::string expected("Null");
    205   expected.push_back(0);
    206   expected.push_back(0);
    207   expected.append("9Test");
    208   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
    209 
    210   // When we're not unescaping NULLs.
    211   expected = "Null";
    212   expected.push_back(0);
    213   expected.append("%009Test");
    214   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
    215 }
    216 
    217 TEST(EscapeTest, UnescapeURLComponent) {
    218   const UnescapeURLCase unescape_cases[] = {
    219     {L"", UnescapeRule::NORMAL, L""},
    220     {L"%2", UnescapeRule::NORMAL, L"%2"},
    221     {L"%%%%%%", UnescapeRule::NORMAL, L"%%%%%%"},
    222     {L"Don't escape anything", UnescapeRule::NORMAL, L"Don't escape anything"},
    223     {L"Invalid %escape %2", UnescapeRule::NORMAL, L"Invalid %escape %2"},
    224     {L"Some%20random text %25%2dOK", UnescapeRule::NONE,
    225      L"Some%20random text %25%2dOK"},
    226     {L"Some%20random text %25%2dOK", UnescapeRule::NORMAL,
    227      L"Some%20random text %25-OK"},
    228     {L"Some%20random text %25%E2%80", UnescapeRule::NORMAL,
    229      L"Some%20random text %25\xE2\x80"},
    230     {L"Some%20random text %25%E2%80OK", UnescapeRule::NORMAL,
    231      L"Some%20random text %25\xE2\x80OK"},
    232     {L"Some%20random text %25%E2%80%84OK", UnescapeRule::NORMAL,
    233      L"Some%20random text %25\xE2\x80\x84OK"},
    234 
    235     // BiDi Control characters should not be unescaped.
    236     {L"Some%20random text %25%D8%9COK", UnescapeRule::NORMAL,
    237      L"Some%20random text %25%D8%9COK"},
    238     {L"Some%20random text %25%E2%80%8EOK", UnescapeRule::NORMAL,
    239      L"Some%20random text %25%E2%80%8EOK"},
    240     {L"Some%20random text %25%E2%80%8FOK", UnescapeRule::NORMAL,
    241      L"Some%20random text %25%E2%80%8FOK"},
    242     {L"Some%20random text %25%E2%80%AAOK", UnescapeRule::NORMAL,
    243      L"Some%20random text %25%E2%80%AAOK"},
    244     {L"Some%20random text %25%E2%80%ABOK", UnescapeRule::NORMAL,
    245      L"Some%20random text %25%E2%80%ABOK"},
    246     {L"Some%20random text %25%E2%80%AEOK", UnescapeRule::NORMAL,
    247      L"Some%20random text %25%E2%80%AEOK"},
    248     {L"Some%20random text %25%E2%81%A6OK", UnescapeRule::NORMAL,
    249      L"Some%20random text %25%E2%81%A6OK"},
    250     {L"Some%20random text %25%E2%81%A9OK", UnescapeRule::NORMAL,
    251      L"Some%20random text %25%E2%81%A9OK"},
    252 
    253     {L"Some%20random text %25%2dOK", UnescapeRule::SPACES,
    254      L"Some random text %25-OK"},
    255     {L"Some%20random text %25%2dOK", UnescapeRule::URL_SPECIAL_CHARS,
    256      L"Some%20random text %-OK"},
    257     {L"Some%20random text %25%2dOK",
    258      UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS,
    259      L"Some random text %-OK"},
    260     {L"%A0%B1%C2%D3%E4%F5", UnescapeRule::NORMAL, L"\xA0\xB1\xC2\xD3\xE4\xF5"},
    261     {L"%Aa%Bb%Cc%Dd%Ee%Ff", UnescapeRule::NORMAL, L"\xAa\xBb\xCc\xDd\xEe\xFf"},
    262     // Certain URL-sensitive characters should not be unescaped unless asked.
    263     {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+", UnescapeRule::SPACES,
    264      L"Hello %13%10world %23# %3F? %3D= %26& %25% %2B+"},
    265     {L"Hello%20%13%10world %23# %3F? %3D= %26& %25% %2B+",
    266      UnescapeRule::URL_SPECIAL_CHARS,
    267      L"Hello%20%13%10world ## ?? == && %% ++"},
    268     // We can neither escape nor unescape '@' since some websites expect it to
    269     // be preserved as either '@' or "%40".
    270     // See http://b/996720 and http://crbug.com/23933 .
    271     {L"me@my%40example", UnescapeRule::NORMAL, L"me@my%40example"},
    272     // Control characters.
    273     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::URL_SPECIAL_CHARS,
    274      L"%01%02%03%04%05%06%07%08%09 %"},
    275     {L"%01%02%03%04%05%06%07%08%09 %25", UnescapeRule::CONTROL_CHARS,
    276      L"\x01\x02\x03\x04\x05\x06\x07\x08\x09 %25"},
    277     {L"Hello%20%13%10%02", UnescapeRule::SPACES, L"Hello %13%10%02"},
    278     {L"Hello%20%13%10%02", UnescapeRule::CONTROL_CHARS,
    279      L"Hello%20\x13\x10\x02"},
    280     {L"Hello\x9824\x9827", UnescapeRule::CONTROL_CHARS,
    281      L"Hello\x9824\x9827"},
    282   };
    283 
    284   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
    285     base::string16 str(base::WideToUTF16(unescape_cases[i].input));
    286     EXPECT_EQ(base::WideToUTF16(unescape_cases[i].output),
    287               UnescapeURLComponent(str, unescape_cases[i].rules));
    288   }
    289 
    290   // Test the NULL character unescaping (which wouldn't work above since those
    291   // are just char pointers).
    292   base::string16 input(base::WideToUTF16(L"Null"));
    293   input.push_back(0);  // Also have a NULL in the input.
    294   input.append(base::WideToUTF16(L"%00%39Test"));
    295 
    296   // When we're unescaping NULLs
    297   base::string16 expected(base::WideToUTF16(L"Null"));
    298   expected.push_back(0);
    299   expected.push_back(0);
    300   expected.append(base::ASCIIToUTF16("9Test"));
    301   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::CONTROL_CHARS));
    302 
    303   // When we're not unescaping NULLs.
    304   expected = base::WideToUTF16(L"Null");
    305   expected.push_back(0);
    306   expected.append(base::WideToUTF16(L"%009Test"));
    307   EXPECT_EQ(expected, UnescapeURLComponent(input, UnescapeRule::NORMAL));
    308 }
    309 
    310 TEST(EscapeTest, UnescapeAndDecodeUTF8URLComponent) {
    311   const UnescapeAndDecodeCase unescape_cases[] = {
    312     { "%",
    313       "%",
    314       "%",
    315      L"%"},
    316     { "+",
    317       "+",
    318       " ",
    319      L"+"},
    320     { "%2+",
    321       "%2+",
    322       "%2 ",
    323      L"%2+"},
    324     { "+%%%+%%%",
    325       "+%%%+%%%",
    326       " %%% %%%",
    327      L"+%%%+%%%"},
    328     { "Don't escape anything",
    329       "Don't escape anything",
    330       "Don't escape anything",
    331      L"Don't escape anything"},
    332     { "+Invalid %escape %2+",
    333       "+Invalid %escape %2+",
    334       " Invalid %escape %2 ",
    335      L"+Invalid %escape %2+"},
    336     { "Some random text %25%2dOK",
    337       "Some random text %25-OK",
    338       "Some random text %25-OK",
    339      L"Some random text %25-OK"},
    340     { "%01%02%03%04%05%06%07%08%09",
    341       "%01%02%03%04%05%06%07%08%09",
    342       "%01%02%03%04%05%06%07%08%09",
    343      L"%01%02%03%04%05%06%07%08%09"},
    344     { "%E4%BD%A0+%E5%A5%BD",
    345       "\xE4\xBD\xA0+\xE5\xA5\xBD",
    346       "\xE4\xBD\xA0 \xE5\xA5\xBD",
    347      L"\x4f60+\x597d"},
    348     { "%ED%ED",  // Invalid UTF-8.
    349       "\xED\xED",
    350       "\xED\xED",
    351      L"%ED%ED"},  // Invalid UTF-8 -> kept unescaped.
    352   };
    353 
    354   for (size_t i = 0; i < arraysize(unescape_cases); i++) {
    355     std::string unescaped = UnescapeURLComponent(unescape_cases[i].input,
    356                                                  UnescapeRule::NORMAL);
    357     EXPECT_EQ(std::string(unescape_cases[i].url_unescaped), unescaped);
    358 
    359     unescaped = UnescapeURLComponent(unescape_cases[i].input,
    360                                      UnescapeRule::REPLACE_PLUS_WITH_SPACE);
    361     EXPECT_EQ(std::string(unescape_cases[i].query_unescaped), unescaped);
    362 
    363     // TODO: Need to test unescape_spaces and unescape_percent.
    364     base::string16 decoded = UnescapeAndDecodeUTF8URLComponent(
    365         unescape_cases[i].input, UnescapeRule::NORMAL);
    366     EXPECT_EQ(base::WideToUTF16(unescape_cases[i].decoded), decoded);
    367   }
    368 }
    369 
    370 TEST(EscapeTest, AdjustOffset) {
    371   const AdjustOffsetCase adjust_cases[] = {
    372     {"", 0, 0},
    373     {"test", 0, 0},
    374     {"test", 2, 2},
    375     {"test", 4, 4},
    376     {"test", std::string::npos, std::string::npos},
    377     {"%2dtest", 6, 4},
    378     {"%2dtest", 3, 1},
    379     {"%2dtest", 2, std::string::npos},
    380     {"%2dtest", 1, std::string::npos},
    381     {"%2dtest", 0, 0},
    382     {"test%2d", 2, 2},
    383     {"%E4%BD%A0+%E5%A5%BD", 9, 1},
    384     {"%E4%BD%A0+%E5%A5%BD", 6, std::string::npos},
    385     {"%E4%BD%A0+%E5%A5%BD", 0, 0},
    386     {"%E4%BD%A0+%E5%A5%BD", 10, 2},
    387     {"%E4%BD%A0+%E5%A5%BD", 19, 3},
    388 
    389     {"hi%41test%E4%BD%A0+%E5%A5%BD", 18, 8},
    390     {"hi%41test%E4%BD%A0+%E5%A5%BD", 15, std::string::npos},
    391     {"hi%41test%E4%BD%A0+%E5%A5%BD", 9, 7},
    392     {"hi%41test%E4%BD%A0+%E5%A5%BD", 19, 9},
    393     {"hi%41test%E4%BD%A0+%E5%A5%BD", 28, 10},
    394     {"hi%41test%E4%BD%A0+%E5%A5%BD", 0, 0},
    395     {"hi%41test%E4%BD%A0+%E5%A5%BD", 2, 2},
    396     {"hi%41test%E4%BD%A0+%E5%A5%BD", 3, std::string::npos},
    397     {"hi%41test%E4%BD%A0+%E5%A5%BD", 5, 3},
    398 
    399     {"%E4%BD%A0+%E5%A5%BDhi%41test", 9, 1},
    400     {"%E4%BD%A0+%E5%A5%BDhi%41test", 6, std::string::npos},
    401     {"%E4%BD%A0+%E5%A5%BDhi%41test", 0, 0},
    402     {"%E4%BD%A0+%E5%A5%BDhi%41test", 10, 2},
    403     {"%E4%BD%A0+%E5%A5%BDhi%41test", 19, 3},
    404     {"%E4%BD%A0+%E5%A5%BDhi%41test", 21, 5},
    405     {"%E4%BD%A0+%E5%A5%BDhi%41test", 22, std::string::npos},
    406     {"%E4%BD%A0+%E5%A5%BDhi%41test", 24, 6},
    407     {"%E4%BD%A0+%E5%A5%BDhi%41test", 28, 10},
    408 
    409     {"%ED%B0%80+%E5%A5%BD", 6, 6},  // not convertable to UTF-8
    410   };
    411 
    412   for (size_t i = 0; i < arraysize(adjust_cases); i++) {
    413     size_t offset = adjust_cases[i].input_offset;
    414     base::OffsetAdjuster::Adjustments adjustments;
    415     UnescapeAndDecodeUTF8URLComponentWithAdjustments(
    416         adjust_cases[i].input, UnescapeRule::NORMAL, &adjustments);
    417     base::OffsetAdjuster::AdjustOffset(adjustments, &offset);
    418     EXPECT_EQ(adjust_cases[i].output_offset, offset)
    419         << "input=" << adjust_cases[i].input
    420         << " offset=" << adjust_cases[i].input_offset;
    421   }
    422 }
    423 
    424 TEST(EscapeTest, EscapeForHTML) {
    425   const EscapeForHTMLCase tests[] = {
    426     { "hello", "hello" },
    427     { "<hello>", "&lt;hello&gt;" },
    428     { "don\'t mess with me", "don&#39;t mess with me" },
    429   };
    430   for (size_t i = 0; i < arraysize(tests); ++i) {
    431     std::string result = EscapeForHTML(std::string(tests[i].input));
    432     EXPECT_EQ(std::string(tests[i].expected_output), result);
    433   }
    434 }
    435 
    436 TEST(EscapeTest, UnescapeForHTML) {
    437   const EscapeForHTMLCase tests[] = {
    438     { "", "" },
    439     { "&lt;hello&gt;", "<hello>" },
    440     { "don&#39;t mess with me", "don\'t mess with me" },
    441     { "&lt;&gt;&amp;&quot;&#39;", "<>&\"'" },
    442     { "& lt; &amp ; &; '", "& lt; &amp ; &; '" },
    443     { "&amp;", "&" },
    444     { "&quot;", "\"" },
    445     { "&#39;", "'" },
    446     { "&lt;", "<" },
    447     { "&gt;", ">" },
    448     { "&amp; &", "& &" },
    449   };
    450   for (size_t i = 0; i < arraysize(tests); ++i) {
    451     base::string16 result = UnescapeForHTML(base::ASCIIToUTF16(tests[i].input));
    452     EXPECT_EQ(base::ASCIIToUTF16(tests[i].expected_output), result);
    453   }
    454 }
    455 
    456 
    457 }  // namespace
    458 }  // namespace net
    459