Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <math.h>
      8 #include <stdarg.h>
      9 
     10 #include <algorithm>
     11 
     12 #include "base/basictypes.h"
     13 #include "base/strings/string16.h"
     14 #include "base/strings/utf_string_conversions.h"
     15 #include "testing/gmock/include/gmock/gmock.h"
     16 #include "testing/gtest/include/gtest/gtest.h"
     17 
     18 using ::testing::ElementsAre;
     19 
     20 namespace base {
     21 
     22 static const struct trim_case {
     23   const wchar_t* input;
     24   const TrimPositions positions;
     25   const wchar_t* output;
     26   const TrimPositions return_value;
     27 } trim_cases[] = {
     28   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
     29   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
     30   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
     31   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
     32   {L"", TRIM_ALL, L"", TRIM_NONE},
     33   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
     34   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
     35   {L"  ", TRIM_ALL, L"", TRIM_ALL},
     36   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
     37   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
     38 };
     39 
     40 static const struct trim_case_ascii {
     41   const char* input;
     42   const TrimPositions positions;
     43   const char* output;
     44   const TrimPositions return_value;
     45 } trim_cases_ascii[] = {
     46   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
     47   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
     48   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
     49   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
     50   {"", TRIM_ALL, "", TRIM_NONE},
     51   {"  ", TRIM_LEADING, "", TRIM_LEADING},
     52   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
     53   {"  ", TRIM_ALL, "", TRIM_ALL},
     54   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
     55 };
     56 
     57 namespace {
     58 
     59 // Helper used to test TruncateUTF8ToByteSize.
     60 bool Truncated(const std::string& input,
     61                const size_t byte_size,
     62                std::string* output) {
     63     size_t prev = input.length();
     64     TruncateUTF8ToByteSize(input, byte_size, output);
     65     return prev != output->length();
     66 }
     67 
     68 }  // namespace
     69 
     70 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
     71   std::string output;
     72 
     73   // Empty strings and invalid byte_size arguments
     74   EXPECT_FALSE(Truncated(std::string(), 0, &output));
     75   EXPECT_EQ(output, "");
     76   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
     77   EXPECT_EQ(output, "");
     78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
     79   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
     80 
     81   // Testing the truncation of valid UTF8 correctly
     82   EXPECT_TRUE(Truncated("abc", 2, &output));
     83   EXPECT_EQ(output, "ab");
     84   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
     85   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     86   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
     87   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     88   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
     89   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
     90 
     91   {
     92     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
     93     const std::string array_string(array, arraysize(array));
     94     EXPECT_TRUE(Truncated(array_string, 4, &output));
     95     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
     96   }
     97 
     98   {
     99     const char array[] = "\x00\xc2\x81\xc2\x81";
    100     const std::string array_string(array, arraysize(array));
    101     EXPECT_TRUE(Truncated(array_string, 4, &output));
    102     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
    103   }
    104 
    105   // Testing invalid UTF8
    106   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
    107   EXPECT_EQ(output.compare(""), 0);
    108   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
    109   EXPECT_EQ(output.compare(""), 0);
    110   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
    111   EXPECT_EQ(output.compare(""), 0);
    112 
    113   // Testing invalid UTF8 mixed with valid UTF8
    114   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
    115   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
    116   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
    117   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
    118   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
    119               10, &output));
    120   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
    121   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
    122               10, &output));
    123   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
    124   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
    125   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
    126 
    127   // Overlong sequences
    128   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
    129   EXPECT_EQ(output.compare(""), 0);
    130   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
    131   EXPECT_EQ(output.compare(""), 0);
    132   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
    133   EXPECT_EQ(output.compare(""), 0);
    134   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
    135   EXPECT_EQ(output.compare(""), 0);
    136   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
    137   EXPECT_EQ(output.compare(""), 0);
    138   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
    139   EXPECT_EQ(output.compare(""), 0);
    140   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
    141   EXPECT_EQ(output.compare(""), 0);
    142   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
    143   EXPECT_EQ(output.compare(""), 0);
    144   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
    145   EXPECT_EQ(output.compare(""), 0);
    146   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
    147   EXPECT_EQ(output.compare(""), 0);
    148   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
    149   EXPECT_EQ(output.compare(""), 0);
    150 
    151   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    152   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
    153   EXPECT_EQ(output.compare(""), 0);
    154   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
    155   EXPECT_EQ(output.compare(""), 0);
    156   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
    157   EXPECT_EQ(output.compare(""), 0);
    158 
    159   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    160   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
    161   EXPECT_EQ(output.compare(""), 0);
    162   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
    163   EXPECT_EQ(output.compare(""), 0);
    164 
    165   {
    166     const char array[] = "\x00\x00\xfe\xff";
    167     const std::string array_string(array, arraysize(array));
    168     EXPECT_TRUE(Truncated(array_string, 4, &output));
    169     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
    170   }
    171 
    172   // Variants on the previous test
    173   {
    174     const char array[] = "\xff\xfe\x00\x00";
    175     const std::string array_string(array, 4);
    176     EXPECT_FALSE(Truncated(array_string, 4, &output));
    177     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
    178   }
    179   {
    180     const char array[] = "\xff\x00\x00\xfe";
    181     const std::string array_string(array, arraysize(array));
    182     EXPECT_TRUE(Truncated(array_string, 4, &output));
    183     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
    184   }
    185 
    186   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    187   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
    188   EXPECT_EQ(output.compare(""), 0);
    189   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
    190   EXPECT_EQ(output.compare(""), 0);
    191   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
    192   EXPECT_EQ(output.compare(""), 0);
    193   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
    194   EXPECT_EQ(output.compare(""), 0);
    195   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
    196   EXPECT_EQ(output.compare(""), 0);
    197 
    198   // Strings in legacy encodings that are valid in UTF-8, but
    199   // are invalid as UTF-8 in real data.
    200   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
    201   EXPECT_EQ(output.compare("caf"), 0);
    202   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
    203   EXPECT_EQ(output.compare(""), 0);
    204   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
    205   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    206   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
    207               &output));
    208   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    209 
    210   // Testing using the same string as input and output.
    211   EXPECT_FALSE(Truncated(output, 4, &output));
    212   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    213   EXPECT_TRUE(Truncated(output, 3, &output));
    214   EXPECT_EQ(output.compare("\xa7\x41"), 0);
    215 
    216   // "abc" with U+201[CD] in windows-125[0-8]
    217   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
    218   EXPECT_EQ(output.compare("\x93" "abc"), 0);
    219 
    220   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    221   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
    222   EXPECT_EQ(output.compare(""), 0);
    223 
    224   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    225   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
    226   EXPECT_EQ(output.compare(""), 0);
    227 }
    228 
    229 TEST(StringUtilTest, TrimWhitespace) {
    230   string16 output;  // Allow contents to carry over to next testcase
    231   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
    232     const trim_case& value = trim_cases[i];
    233     EXPECT_EQ(value.return_value,
    234               TrimWhitespace(WideToUTF16(value.input), value.positions,
    235                              &output));
    236     EXPECT_EQ(WideToUTF16(value.output), output);
    237   }
    238 
    239   // Test that TrimWhitespace() can take the same string for input and output
    240   output = ASCIIToUTF16("  This is a test \r\n");
    241   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    242   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
    243 
    244   // Once more, but with a string of whitespace
    245   output = ASCIIToUTF16("  \r\n");
    246   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    247   EXPECT_EQ(string16(), output);
    248 
    249   std::string output_ascii;
    250   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
    251     const trim_case_ascii& value = trim_cases_ascii[i];
    252     EXPECT_EQ(value.return_value,
    253               TrimWhitespace(value.input, value.positions, &output_ascii));
    254     EXPECT_EQ(value.output, output_ascii);
    255   }
    256 }
    257 
    258 static const struct collapse_case {
    259   const wchar_t* input;
    260   const bool trim;
    261   const wchar_t* output;
    262 } collapse_cases[] = {
    263   {L" Google Video ", false, L"Google Video"},
    264   {L"Google Video", false, L"Google Video"},
    265   {L"", false, L""},
    266   {L"  ", false, L""},
    267   {L"\t\rTest String\n", false, L"Test String"},
    268   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
    269   {L"    Test     \n  \t String    ", false, L"Test String"},
    270   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
    271   {L"   Test String", false, L"Test String"},
    272   {L"Test String    ", false, L"Test String"},
    273   {L"Test String", false, L"Test String"},
    274   {L"", true, L""},
    275   {L"\n", true, L""},
    276   {L"  \r  ", true, L""},
    277   {L"\nFoo", true, L"Foo"},
    278   {L"\r  Foo  ", true, L"Foo"},
    279   {L" Foo bar ", true, L"Foo bar"},
    280   {L"  \tFoo  bar  \n", true, L"Foo bar"},
    281   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
    282 };
    283 
    284 TEST(StringUtilTest, CollapseWhitespace) {
    285   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
    286     const collapse_case& value = collapse_cases[i];
    287     EXPECT_EQ(WideToUTF16(value.output),
    288               CollapseWhitespace(WideToUTF16(value.input), value.trim));
    289   }
    290 }
    291 
    292 static const struct collapse_case_ascii {
    293   const char* input;
    294   const bool trim;
    295   const char* output;
    296 } collapse_cases_ascii[] = {
    297   {" Google Video ", false, "Google Video"},
    298   {"Google Video", false, "Google Video"},
    299   {"", false, ""},
    300   {"  ", false, ""},
    301   {"\t\rTest String\n", false, "Test String"},
    302   {"    Test     \n  \t String    ", false, "Test String"},
    303   {"   Test String", false, "Test String"},
    304   {"Test String    ", false, "Test String"},
    305   {"Test String", false, "Test String"},
    306   {"", true, ""},
    307   {"\n", true, ""},
    308   {"  \r  ", true, ""},
    309   {"\nFoo", true, "Foo"},
    310   {"\r  Foo  ", true, "Foo"},
    311   {" Foo bar ", true, "Foo bar"},
    312   {"  \tFoo  bar  \n", true, "Foo bar"},
    313   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
    314 };
    315 
    316 TEST(StringUtilTest, CollapseWhitespaceASCII) {
    317   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
    318     const collapse_case_ascii& value = collapse_cases_ascii[i];
    319     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
    320   }
    321 }
    322 
    323 TEST(StringUtilTest, IsStringUTF8) {
    324   EXPECT_TRUE(IsStringUTF8("abc"));
    325   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
    326   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
    327   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
    328   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
    329   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
    330 
    331   // surrogate code points
    332   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
    333   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
    334   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
    335 
    336   // overlong sequences
    337   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
    338   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
    339   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
    340   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
    341   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
    342   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
    343   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
    344   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
    345   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
    346   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
    347   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
    348 
    349   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    350   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
    351   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
    352   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
    353 
    354   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    355   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
    356   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
    357   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
    358   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
    359 
    360   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    361   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
    362   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
    363   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
    364   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
    365   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
    366   // Strings in legacy encodings. We can certainly make up strings
    367   // in a legacy encoding that are valid in UTF-8, but in real data,
    368   // most of them are invalid as UTF-8.
    369   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
    370   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
    371   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
    372   // "abc" with U+201[CD] in windows-125[0-8]
    373   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
    374   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    375   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
    376   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    377   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
    378 
    379   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
    380   // representation, and the second uses a 2-byte sequence. The second version
    381   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
    382   // given codepoint must be used.
    383   static const char kEmbeddedNull[] = "embedded\0null";
    384   EXPECT_TRUE(IsStringUTF8(
    385       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
    386   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
    387 }
    388 
    389 TEST(StringUtilTest, ConvertASCII) {
    390   static const char* char_cases[] = {
    391     "Google Video",
    392     "Hello, world\n",
    393     "0123ABCDwxyz \a\b\t\r\n!+,.~"
    394   };
    395 
    396   static const wchar_t* const wchar_cases[] = {
    397     L"Google Video",
    398     L"Hello, world\n",
    399     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
    400   };
    401 
    402   for (size_t i = 0; i < arraysize(char_cases); ++i) {
    403     EXPECT_TRUE(IsStringASCII(char_cases[i]));
    404     string16 utf16 = ASCIIToUTF16(char_cases[i]);
    405     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
    406 
    407     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
    408     EXPECT_EQ(char_cases[i], ascii);
    409   }
    410 
    411   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
    412 
    413   // Convert empty strings.
    414   string16 empty16;
    415   std::string empty;
    416   EXPECT_EQ(empty, UTF16ToASCII(empty16));
    417   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
    418 
    419   // Convert strings with an embedded NUL character.
    420   const char chars_with_nul[] = "test\0string";
    421   const int length_with_nul = arraysize(chars_with_nul) - 1;
    422   std::string string_with_nul(chars_with_nul, length_with_nul);
    423   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
    424   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
    425             wide_with_nul.length());
    426   std::string narrow_with_nul = UTF16ToASCII(WideToUTF16(wide_with_nul));
    427   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
    428             narrow_with_nul.length());
    429   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
    430 }
    431 
    432 TEST(StringUtilTest, ToUpperASCII) {
    433   EXPECT_EQ('C', ToUpperASCII('C'));
    434   EXPECT_EQ('C', ToUpperASCII('c'));
    435   EXPECT_EQ('2', ToUpperASCII('2'));
    436 
    437   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
    438   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
    439   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
    440 
    441   std::string in_place_a("Cc2");
    442   StringToUpperASCII(&in_place_a);
    443   EXPECT_EQ("CC2", in_place_a);
    444 
    445   std::wstring in_place_w(L"Cc2");
    446   StringToUpperASCII(&in_place_w);
    447   EXPECT_EQ(L"CC2", in_place_w);
    448 
    449   std::string original_a("Cc2");
    450   std::string upper_a = StringToUpperASCII(original_a);
    451   EXPECT_EQ("CC2", upper_a);
    452 
    453   std::wstring original_w(L"Cc2");
    454   std::wstring upper_w = StringToUpperASCII(original_w);
    455   EXPECT_EQ(L"CC2", upper_w);
    456 }
    457 
    458 TEST(StringUtilTest, LowerCaseEqualsASCII) {
    459   static const struct {
    460     const char*    src_a;
    461     const char*    dst;
    462   } lowercase_cases[] = {
    463     { "FoO", "foo" },
    464     { "foo", "foo" },
    465     { "FOO", "foo" },
    466   };
    467 
    468   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
    469     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
    470                                      lowercase_cases[i].dst));
    471     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
    472                                      lowercase_cases[i].dst));
    473   }
    474 }
    475 
    476 TEST(StringUtilTest, FormatBytesUnlocalized) {
    477   static const struct {
    478     int64 bytes;
    479     const char* expected;
    480   } cases[] = {
    481     // Expected behavior: we show one post-decimal digit when we have
    482     // under two pre-decimal digits, except in cases where it makes no
    483     // sense (zero or bytes).
    484     // Since we switch units once we cross the 1000 mark, this keeps
    485     // the display of file sizes or bytes consistently around three
    486     // digits.
    487     {0, "0 B"},
    488     {512, "512 B"},
    489     {1024*1024, "1.0 MB"},
    490     {1024*1024*1024, "1.0 GB"},
    491     {10LL*1024*1024*1024, "10.0 GB"},
    492     {99LL*1024*1024*1024, "99.0 GB"},
    493     {105LL*1024*1024*1024, "105 GB"},
    494     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
    495     {~(1LL<<63), "8192 PB"},
    496 
    497     {99*1024 + 103, "99.1 kB"},
    498     {1024*1024 + 103, "1.0 MB"},
    499     {1024*1024 + 205 * 1024, "1.2 MB"},
    500     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
    501     {10LL*1024*1024*1024, "10.0 GB"},
    502     {100LL*1024*1024*1024, "100 GB"},
    503   };
    504 
    505   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    506     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
    507               FormatBytesUnlocalized(cases[i].bytes));
    508   }
    509 }
    510 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
    511   static const struct {
    512     const char* str;
    513     string16::size_type start_offset;
    514     const char* find_this;
    515     const char* replace_with;
    516     const char* expected;
    517   } cases[] = {
    518     {"aaa", 0, "a", "b", "bbb"},
    519     {"abb", 0, "ab", "a", "ab"},
    520     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
    521     {"Not found", 0, "x", "0", "Not found"},
    522     {"Not found again", 5, "x", "0", "Not found again"},
    523     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    524      "Four score and seven years agoMakingFour score and seven years agoit"
    525      "Four score and seven years agomuchFour score and seven years agolonger"
    526      "Four score and seven years ago"},
    527     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    528     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
    529     {"abababab", 2, "ab", "c", "abccc"},
    530   };
    531 
    532   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    533     string16 str = ASCIIToUTF16(cases[i].str);
    534     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
    535                                  ASCIIToUTF16(cases[i].find_this),
    536                                  ASCIIToUTF16(cases[i].replace_with));
    537     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    538   }
    539 }
    540 
    541 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
    542   static const struct {
    543     const char* str;
    544     string16::size_type start_offset;
    545     const char* find_this;
    546     const char* replace_with;
    547     const char* expected;
    548   } cases[] = {
    549     {"aaa", 0, "a", "b", "baa"},
    550     {"abb", 0, "ab", "a", "ab"},
    551     {"Removing some substrings inging", 0, "ing", "",
    552       "Remov some substrings inging"},
    553     {"Not found", 0, "x", "0", "Not found"},
    554     {"Not found again", 5, "x", "0", "Not found again"},
    555     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    556      "Four score and seven years agoMaking it much longer "},
    557     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    558     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
    559     {"abababab", 2, "ab", "c", "abcabab"},
    560   };
    561 
    562   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    563     string16 str = ASCIIToUTF16(cases[i].str);
    564     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
    565                                      ASCIIToUTF16(cases[i].find_this),
    566                                      ASCIIToUTF16(cases[i].replace_with));
    567     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    568   }
    569 }
    570 
    571 TEST(StringUtilTest, HexDigitToInt) {
    572   EXPECT_EQ(0, HexDigitToInt('0'));
    573   EXPECT_EQ(1, HexDigitToInt('1'));
    574   EXPECT_EQ(2, HexDigitToInt('2'));
    575   EXPECT_EQ(3, HexDigitToInt('3'));
    576   EXPECT_EQ(4, HexDigitToInt('4'));
    577   EXPECT_EQ(5, HexDigitToInt('5'));
    578   EXPECT_EQ(6, HexDigitToInt('6'));
    579   EXPECT_EQ(7, HexDigitToInt('7'));
    580   EXPECT_EQ(8, HexDigitToInt('8'));
    581   EXPECT_EQ(9, HexDigitToInt('9'));
    582   EXPECT_EQ(10, HexDigitToInt('A'));
    583   EXPECT_EQ(11, HexDigitToInt('B'));
    584   EXPECT_EQ(12, HexDigitToInt('C'));
    585   EXPECT_EQ(13, HexDigitToInt('D'));
    586   EXPECT_EQ(14, HexDigitToInt('E'));
    587   EXPECT_EQ(15, HexDigitToInt('F'));
    588 
    589   // Verify the lower case as well.
    590   EXPECT_EQ(10, HexDigitToInt('a'));
    591   EXPECT_EQ(11, HexDigitToInt('b'));
    592   EXPECT_EQ(12, HexDigitToInt('c'));
    593   EXPECT_EQ(13, HexDigitToInt('d'));
    594   EXPECT_EQ(14, HexDigitToInt('e'));
    595   EXPECT_EQ(15, HexDigitToInt('f'));
    596 }
    597 
    598 // This checks where we can use the assignment operator for a va_list. We need
    599 // a way to do this since Visual C doesn't support va_copy, but assignment on
    600 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
    601 // capability.
    602 static void VariableArgsFunc(const char* format, ...) {
    603   va_list org;
    604   va_start(org, format);
    605 
    606   va_list dup;
    607   GG_VA_COPY(dup, org);
    608   int i1 = va_arg(org, int);
    609   int j1 = va_arg(org, int);
    610   char* s1 = va_arg(org, char*);
    611   double d1 = va_arg(org, double);
    612   va_end(org);
    613 
    614   int i2 = va_arg(dup, int);
    615   int j2 = va_arg(dup, int);
    616   char* s2 = va_arg(dup, char*);
    617   double d2 = va_arg(dup, double);
    618 
    619   EXPECT_EQ(i1, i2);
    620   EXPECT_EQ(j1, j2);
    621   EXPECT_STREQ(s1, s2);
    622   EXPECT_EQ(d1, d2);
    623 
    624   va_end(dup);
    625 }
    626 
    627 TEST(StringUtilTest, VAList) {
    628   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
    629 }
    630 
    631 // Test for Tokenize
    632 template <typename STR>
    633 void TokenizeTest() {
    634   std::vector<STR> r;
    635   size_t size;
    636 
    637   size = Tokenize(STR("This is a string"), STR(" "), &r);
    638   EXPECT_EQ(4U, size);
    639   ASSERT_EQ(4U, r.size());
    640   EXPECT_EQ(r[0], STR("This"));
    641   EXPECT_EQ(r[1], STR("is"));
    642   EXPECT_EQ(r[2], STR("a"));
    643   EXPECT_EQ(r[3], STR("string"));
    644   r.clear();
    645 
    646   size = Tokenize(STR("one,two,three"), STR(","), &r);
    647   EXPECT_EQ(3U, size);
    648   ASSERT_EQ(3U, r.size());
    649   EXPECT_EQ(r[0], STR("one"));
    650   EXPECT_EQ(r[1], STR("two"));
    651   EXPECT_EQ(r[2], STR("three"));
    652   r.clear();
    653 
    654   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
    655   EXPECT_EQ(3U, size);
    656   ASSERT_EQ(3U, r.size());
    657   EXPECT_EQ(r[0], STR("one"));
    658   EXPECT_EQ(r[1], STR("two"));
    659   EXPECT_EQ(r[2], STR("three;four"));
    660   r.clear();
    661 
    662   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
    663   EXPECT_EQ(4U, size);
    664   ASSERT_EQ(4U, r.size());
    665   EXPECT_EQ(r[0], STR("one"));
    666   EXPECT_EQ(r[1], STR("two"));
    667   EXPECT_EQ(r[2], STR("three"));
    668   EXPECT_EQ(r[3], STR("four"));
    669   r.clear();
    670 
    671   size = Tokenize(STR("one, two, three"), STR(","), &r);
    672   EXPECT_EQ(3U, size);
    673   ASSERT_EQ(3U, r.size());
    674   EXPECT_EQ(r[0], STR("one"));
    675   EXPECT_EQ(r[1], STR(" two"));
    676   EXPECT_EQ(r[2], STR(" three"));
    677   r.clear();
    678 
    679   size = Tokenize(STR("one, two, three, "), STR(","), &r);
    680   EXPECT_EQ(4U, size);
    681   ASSERT_EQ(4U, r.size());
    682   EXPECT_EQ(r[0], STR("one"));
    683   EXPECT_EQ(r[1], STR(" two"));
    684   EXPECT_EQ(r[2], STR(" three"));
    685   EXPECT_EQ(r[3], STR(" "));
    686   r.clear();
    687 
    688   size = Tokenize(STR("one, two, three,"), STR(","), &r);
    689   EXPECT_EQ(3U, size);
    690   ASSERT_EQ(3U, r.size());
    691   EXPECT_EQ(r[0], STR("one"));
    692   EXPECT_EQ(r[1], STR(" two"));
    693   EXPECT_EQ(r[2], STR(" three"));
    694   r.clear();
    695 
    696   size = Tokenize(STR(), STR(","), &r);
    697   EXPECT_EQ(0U, size);
    698   ASSERT_EQ(0U, r.size());
    699   r.clear();
    700 
    701   size = Tokenize(STR(","), STR(","), &r);
    702   EXPECT_EQ(0U, size);
    703   ASSERT_EQ(0U, r.size());
    704   r.clear();
    705 
    706   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
    707   EXPECT_EQ(0U, size);
    708   ASSERT_EQ(0U, r.size());
    709   r.clear();
    710 
    711   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
    712   EXPECT_EQ(1U, size);
    713   ASSERT_EQ(1U, r.size());
    714   EXPECT_EQ(r[0], STR("a"));
    715   r.clear();
    716 
    717   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
    718   EXPECT_EQ(2U, size);
    719   ASSERT_EQ(2U, r.size());
    720   EXPECT_EQ(r[0], STR("\ta\t"));
    721   EXPECT_EQ(r[1], STR("b\tcc"));
    722   r.clear();
    723 }
    724 
    725 TEST(StringUtilTest, TokenizeStdString) {
    726   TokenizeTest<std::string>();
    727 }
    728 
    729 TEST(StringUtilTest, TokenizeStringPiece) {
    730   TokenizeTest<base::StringPiece>();
    731 }
    732 
    733 // Test for JoinString
    734 TEST(StringUtilTest, JoinString) {
    735   std::vector<std::string> in;
    736   EXPECT_EQ("", JoinString(in, ','));
    737 
    738   in.push_back("a");
    739   EXPECT_EQ("a", JoinString(in, ','));
    740 
    741   in.push_back("b");
    742   in.push_back("c");
    743   EXPECT_EQ("a,b,c", JoinString(in, ','));
    744 
    745   in.push_back(std::string());
    746   EXPECT_EQ("a,b,c,", JoinString(in, ','));
    747   in.push_back(" ");
    748   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
    749 }
    750 
    751 // Test for JoinString overloaded with std::string separator
    752 TEST(StringUtilTest, JoinStringWithString) {
    753   std::string separator(", ");
    754   std::vector<std::string> parts;
    755   EXPECT_EQ(std::string(), JoinString(parts, separator));
    756 
    757   parts.push_back("a");
    758   EXPECT_EQ("a", JoinString(parts, separator));
    759 
    760   parts.push_back("b");
    761   parts.push_back("c");
    762   EXPECT_EQ("a, b, c", JoinString(parts, separator));
    763 
    764   parts.push_back(std::string());
    765   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
    766   parts.push_back(" ");
    767   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
    768 }
    769 
    770 // Test for JoinString overloaded with string16 separator
    771 TEST(StringUtilTest, JoinStringWithString16) {
    772   string16 separator = ASCIIToUTF16(", ");
    773   std::vector<string16> parts;
    774   EXPECT_EQ(string16(), JoinString(parts, separator));
    775 
    776   parts.push_back(ASCIIToUTF16("a"));
    777   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
    778 
    779   parts.push_back(ASCIIToUTF16("b"));
    780   parts.push_back(ASCIIToUTF16("c"));
    781   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
    782 
    783   parts.push_back(ASCIIToUTF16(""));
    784   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
    785   parts.push_back(ASCIIToUTF16(" "));
    786   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
    787 }
    788 
    789 TEST(StringUtilTest, StartsWith) {
    790   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
    791   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
    792   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
    793   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
    794   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
    795   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
    796   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
    797   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
    798   EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
    799   EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
    800 
    801   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
    802                          ASCIIToUTF16("javascript"), true));
    803   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
    804                           ASCIIToUTF16("javascript"), true));
    805   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
    806                          ASCIIToUTF16("javascript"), false));
    807   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
    808                          ASCIIToUTF16("javascript"), false));
    809   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
    810                           ASCIIToUTF16("javascript"), true));
    811   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
    812                           ASCIIToUTF16("javascript"), false));
    813   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
    814   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
    815   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
    816   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
    817 }
    818 
    819 TEST(StringUtilTest, EndsWith) {
    820   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
    821                        ASCIIToUTF16(".plugin"), true));
    822   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
    823                         ASCIIToUTF16(".plugin"), true));
    824   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
    825                        ASCIIToUTF16(".plugin"), false));
    826   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
    827                        ASCIIToUTF16(".plugin"), false));
    828   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
    829   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
    830   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
    831                         ASCIIToUTF16(".plugin"), true));
    832   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
    833                         ASCIIToUTF16(".plugin"), false));
    834   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
    835   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
    836   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
    837   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
    838   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
    839                        ASCIIToUTF16(".plugin"), false));
    840   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
    841   EXPECT_TRUE(EndsWith(string16(), string16(), false));
    842   EXPECT_TRUE(EndsWith(string16(), string16(), true));
    843 }
    844 
    845 TEST(StringUtilTest, GetStringFWithOffsets) {
    846   std::vector<string16> subst;
    847   subst.push_back(ASCIIToUTF16("1"));
    848   subst.push_back(ASCIIToUTF16("2"));
    849   std::vector<size_t> offsets;
    850 
    851   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
    852                             subst,
    853                             &offsets);
    854   EXPECT_EQ(2U, offsets.size());
    855   EXPECT_EQ(7U, offsets[0]);
    856   EXPECT_EQ(25U, offsets[1]);
    857   offsets.clear();
    858 
    859   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
    860                             subst,
    861                             &offsets);
    862   EXPECT_EQ(2U, offsets.size());
    863   EXPECT_EQ(25U, offsets[0]);
    864   EXPECT_EQ(7U, offsets[1]);
    865   offsets.clear();
    866 }
    867 
    868 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
    869   // Test whether replacestringplaceholders works as expected when there
    870   // are fewer inputs than outputs.
    871   std::vector<string16> subst;
    872   subst.push_back(ASCIIToUTF16("9a"));
    873   subst.push_back(ASCIIToUTF16("8b"));
    874   subst.push_back(ASCIIToUTF16("7c"));
    875 
    876   string16 formatted =
    877       ReplaceStringPlaceholders(
    878           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
    879 
    880   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
    881 }
    882 
    883 TEST(StringUtilTest, ReplaceStringPlaceholders) {
    884   std::vector<string16> subst;
    885   subst.push_back(ASCIIToUTF16("9a"));
    886   subst.push_back(ASCIIToUTF16("8b"));
    887   subst.push_back(ASCIIToUTF16("7c"));
    888   subst.push_back(ASCIIToUTF16("6d"));
    889   subst.push_back(ASCIIToUTF16("5e"));
    890   subst.push_back(ASCIIToUTF16("4f"));
    891   subst.push_back(ASCIIToUTF16("3g"));
    892   subst.push_back(ASCIIToUTF16("2h"));
    893   subst.push_back(ASCIIToUTF16("1i"));
    894 
    895   string16 formatted =
    896       ReplaceStringPlaceholders(
    897           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
    898 
    899   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
    900 }
    901 
    902 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
    903   std::vector<string16> subst;
    904   subst.push_back(ASCIIToUTF16("9a"));
    905   subst.push_back(ASCIIToUTF16("8b"));
    906   subst.push_back(ASCIIToUTF16("7c"));
    907   subst.push_back(ASCIIToUTF16("6d"));
    908   subst.push_back(ASCIIToUTF16("5e"));
    909   subst.push_back(ASCIIToUTF16("4f"));
    910   subst.push_back(ASCIIToUTF16("3g"));
    911   subst.push_back(ASCIIToUTF16("2h"));
    912   subst.push_back(ASCIIToUTF16("1i"));
    913   subst.push_back(ASCIIToUTF16("0j"));
    914   subst.push_back(ASCIIToUTF16("-1k"));
    915   subst.push_back(ASCIIToUTF16("-2l"));
    916   subst.push_back(ASCIIToUTF16("-3m"));
    917   subst.push_back(ASCIIToUTF16("-4n"));
    918 
    919   string16 formatted =
    920       ReplaceStringPlaceholders(
    921           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
    922                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
    923 
    924   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
    925                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
    926 }
    927 
    928 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
    929   std::vector<std::string> subst;
    930   subst.push_back("9a");
    931   subst.push_back("8b");
    932   subst.push_back("7c");
    933   subst.push_back("6d");
    934   subst.push_back("5e");
    935   subst.push_back("4f");
    936   subst.push_back("3g");
    937   subst.push_back("2h");
    938   subst.push_back("1i");
    939 
    940   std::string formatted =
    941       ReplaceStringPlaceholders(
    942           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
    943 
    944   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
    945 }
    946 
    947 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
    948   std::vector<std::string> subst;
    949   subst.push_back("a");
    950   subst.push_back("b");
    951   subst.push_back("c");
    952   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
    953             "$1 $$2 $$$3");
    954 }
    955 
    956 TEST(StringUtilTest, MatchPatternTest) {
    957   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
    958   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
    959   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
    960   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
    961   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
    962   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
    963   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
    964   EXPECT_FALSE(MatchPattern("", "*.*"));
    965   EXPECT_TRUE(MatchPattern("", "*"));
    966   EXPECT_TRUE(MatchPattern("", "?"));
    967   EXPECT_TRUE(MatchPattern("", ""));
    968   EXPECT_FALSE(MatchPattern("Hello", ""));
    969   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
    970   // Stop after a certain recursion depth.
    971   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
    972 
    973   // Test UTF8 matching.
    974   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
    975   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
    976   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
    977   // Invalid sequences should be handled as a single invalid character.
    978   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
    979   // If the pattern has invalid characters, it shouldn't match anything.
    980   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
    981 
    982   // Test UTF16 character matching.
    983   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
    984                            UTF8ToUTF16("*.com")));
    985   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
    986                            UTF8ToUTF16("He??o\\*1*")));
    987 
    988   // This test verifies that consecutive wild cards are collapsed into 1
    989   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
    990   // recursion depth).
    991   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
    992                            UTF8ToUTF16("He********************************o")));
    993 }
    994 
    995 TEST(StringUtilTest, LcpyTest) {
    996   // Test the normal case where we fit in our buffer.
    997   {
    998     char dst[10];
    999     wchar_t wdst[10];
   1000     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1001     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1002     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1003     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1004   }
   1005 
   1006   // Test dst_size == 0, nothing should be written to |dst| and we should
   1007   // have the equivalent of strlen(src).
   1008   {
   1009     char dst[2] = {1, 2};
   1010     wchar_t wdst[2] = {1, 2};
   1011     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
   1012     EXPECT_EQ(1, dst[0]);
   1013     EXPECT_EQ(2, dst[1]);
   1014     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
   1015     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
   1016     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
   1017   }
   1018 
   1019   // Test the case were we _just_ competely fit including the null.
   1020   {
   1021     char dst[8];
   1022     wchar_t wdst[8];
   1023     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1024     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1025     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1026     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1027   }
   1028 
   1029   // Test the case were we we are one smaller, so we can't fit the null.
   1030   {
   1031     char dst[7];
   1032     wchar_t wdst[7];
   1033     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1034     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
   1035     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1036     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
   1037   }
   1038 
   1039   // Test the case were we are just too small.
   1040   {
   1041     char dst[3];
   1042     wchar_t wdst[3];
   1043     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1044     EXPECT_EQ(0, memcmp(dst, "ab", 3));
   1045     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1046     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
   1047   }
   1048 }
   1049 
   1050 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
   1051   static const struct {
   1052     const wchar_t* input;
   1053     bool portable;
   1054   } cases[] = {
   1055     { L"%ls", true },
   1056     { L"%s", false },
   1057     { L"%S", false },
   1058     { L"%lS", false },
   1059     { L"Hello, %s", false },
   1060     { L"%lc", true },
   1061     { L"%c", false },
   1062     { L"%C", false },
   1063     { L"%lC", false },
   1064     { L"%ls %s", false },
   1065     { L"%s %ls", false },
   1066     { L"%s %ls %s", false },
   1067     { L"%f", true },
   1068     { L"%f %F", false },
   1069     { L"%d %D", false },
   1070     { L"%o %O", false },
   1071     { L"%u %U", false },
   1072     { L"%f %d %o %u", true },
   1073     { L"%-8d (%02.1f%)", true },
   1074     { L"% 10s", false },
   1075     { L"% 10ls", true }
   1076   };
   1077   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
   1078     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
   1079 }
   1080 
   1081 TEST(StringUtilTest, RemoveChars) {
   1082   const char* kRemoveChars = "-/+*";
   1083   std::string input = "A-+bc/d!*";
   1084   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
   1085   EXPECT_EQ("Abcd!", input);
   1086 
   1087   // No characters match kRemoveChars.
   1088   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1089   EXPECT_EQ("Abcd!", input);
   1090 
   1091   // Empty string.
   1092   input.clear();
   1093   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1094   EXPECT_EQ(std::string(), input);
   1095 }
   1096 
   1097 TEST(StringUtilTest, ReplaceChars) {
   1098   struct TestData {
   1099     const char* input;
   1100     const char* replace_chars;
   1101     const char* replace_with;
   1102     const char* output;
   1103     bool result;
   1104   } cases[] = {
   1105     { "", "", "", "", false },
   1106     { "test", "", "", "test", false },
   1107     { "test", "", "!", "test", false },
   1108     { "test", "z", "!", "test", false },
   1109     { "test", "e", "!", "t!st", true },
   1110     { "test", "e", "!?", "t!?st", true },
   1111     { "test", "ez", "!", "t!st", true },
   1112     { "test", "zed", "!?", "t!?st", true },
   1113     { "test", "t", "!?", "!?es!?", true },
   1114     { "test", "et", "!>", "!>!>s!>", true },
   1115     { "test", "zest", "!", "!!!!", true },
   1116     { "test", "szt", "!", "!e!!", true },
   1117     { "test", "t", "test", "testestest", true },
   1118   };
   1119 
   1120   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
   1121     std::string output;
   1122     bool result = ReplaceChars(cases[i].input,
   1123                                cases[i].replace_chars,
   1124                                cases[i].replace_with,
   1125                                &output);
   1126     EXPECT_EQ(cases[i].result, result);
   1127     EXPECT_EQ(cases[i].output, output);
   1128   }
   1129 }
   1130 
   1131 TEST(StringUtilTest, ContainsOnlyChars) {
   1132   // Providing an empty list of characters should return false but for the empty
   1133   // string.
   1134   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
   1135   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
   1136 
   1137   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
   1138   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
   1139   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
   1140   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
   1141   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
   1142 
   1143   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
   1144   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
   1145   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
   1146   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
   1147   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
   1148   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
   1149 
   1150   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
   1151   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
   1152   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
   1153   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
   1154   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
   1155   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
   1156                                   kWhitespaceUTF16));
   1157 }
   1158 
   1159 class WriteIntoTest : public testing::Test {
   1160  protected:
   1161   static void WritesCorrectly(size_t num_chars) {
   1162     std::string buffer;
   1163     char kOriginal[] = "supercali";
   1164     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
   1165     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
   1166     // string at the first \0.
   1167     EXPECT_EQ(std::string(kOriginal,
   1168                           std::min(num_chars, arraysize(kOriginal) - 1)),
   1169               std::string(buffer.c_str()));
   1170     EXPECT_EQ(num_chars, buffer.size());
   1171   }
   1172 };
   1173 
   1174 TEST_F(WriteIntoTest, WriteInto) {
   1175   // Validate that WriteInto reserves enough space and
   1176   // sizes a string correctly.
   1177   WritesCorrectly(1);
   1178   WritesCorrectly(2);
   1179   WritesCorrectly(5000);
   1180 
   1181   // Validate that WriteInto doesn't modify other strings
   1182   // when using a Copy-on-Write implementation.
   1183   const char kLive[] = "live";
   1184   const char kDead[] = "dead";
   1185   const std::string live = kLive;
   1186   std::string dead = live;
   1187   strncpy(WriteInto(&dead, 5), kDead, 4);
   1188   EXPECT_EQ(kDead, dead);
   1189   EXPECT_EQ(4u, dead.size());
   1190   EXPECT_EQ(kLive, live);
   1191   EXPECT_EQ(4u, live.size());
   1192 }
   1193 
   1194 }  // namespace base
   1195