Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <math.h>
      8 #include <stdarg.h>
      9 
     10 #include <limits>
     11 #include <sstream>
     12 
     13 #include "base/basictypes.h"
     14 #include "base/strings/string16.h"
     15 #include "base/strings/utf_string_conversions.h"
     16 #include "testing/gmock/include/gmock/gmock.h"
     17 #include "testing/gtest/include/gtest/gtest.h"
     18 
     19 using ::testing::ElementsAre;
     20 
     21 namespace base {
     22 
     23 static const struct trim_case {
     24   const wchar_t* input;
     25   const TrimPositions positions;
     26   const wchar_t* output;
     27   const TrimPositions return_value;
     28 } trim_cases[] = {
     29   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
     30   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
     31   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
     32   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
     33   {L"", TRIM_ALL, L"", TRIM_NONE},
     34   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
     35   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
     36   {L"  ", TRIM_ALL, L"", TRIM_ALL},
     37   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
     38   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
     39 };
     40 
     41 static const struct trim_case_ascii {
     42   const char* input;
     43   const TrimPositions positions;
     44   const char* output;
     45   const TrimPositions return_value;
     46 } trim_cases_ascii[] = {
     47   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
     48   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
     49   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
     50   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
     51   {"", TRIM_ALL, "", TRIM_NONE},
     52   {"  ", TRIM_LEADING, "", TRIM_LEADING},
     53   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
     54   {"  ", TRIM_ALL, "", TRIM_ALL},
     55   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
     56 };
     57 
     58 namespace {
     59 
     60 // Helper used to test TruncateUTF8ToByteSize.
     61 bool Truncated(const std::string& input, const size_t byte_size,
     62                std::string* output) {
     63     size_t prev = input.length();
     64     TruncateUTF8ToByteSize(input, byte_size, output);
     65     return prev != output->length();
     66 }
     67 
     68 }  // namespace
     69 
     70 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
     71   std::string output;
     72 
     73   // Empty strings and invalid byte_size arguments
     74   EXPECT_FALSE(Truncated(std::string(), 0, &output));
     75   EXPECT_EQ(output, "");
     76   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
     77   EXPECT_EQ(output, "");
     78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
     79   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
     80 
     81   // Testing the truncation of valid UTF8 correctly
     82   EXPECT_TRUE(Truncated("abc", 2, &output));
     83   EXPECT_EQ(output, "ab");
     84   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
     85   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     86   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
     87   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     88   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
     89   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
     90 
     91   {
     92     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
     93     const std::string array_string(array, arraysize(array));
     94     EXPECT_TRUE(Truncated(array_string, 4, &output));
     95     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
     96   }
     97 
     98   {
     99     const char array[] = "\x00\xc2\x81\xc2\x81";
    100     const std::string array_string(array, arraysize(array));
    101     EXPECT_TRUE(Truncated(array_string, 4, &output));
    102     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
    103   }
    104 
    105   // Testing invalid UTF8
    106   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
    107   EXPECT_EQ(output.compare(""), 0);
    108   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
    109   EXPECT_EQ(output.compare(""), 0);
    110   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
    111   EXPECT_EQ(output.compare(""), 0);
    112 
    113   // Testing invalid UTF8 mixed with valid UTF8
    114   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
    115   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
    116   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
    117   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
    118   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
    119               10, &output));
    120   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
    121   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
    122               10, &output));
    123   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
    124   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
    125   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
    126 
    127   // Overlong sequences
    128   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
    129   EXPECT_EQ(output.compare(""), 0);
    130   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
    131   EXPECT_EQ(output.compare(""), 0);
    132   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
    133   EXPECT_EQ(output.compare(""), 0);
    134   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
    135   EXPECT_EQ(output.compare(""), 0);
    136   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
    137   EXPECT_EQ(output.compare(""), 0);
    138   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
    139   EXPECT_EQ(output.compare(""), 0);
    140   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
    141   EXPECT_EQ(output.compare(""), 0);
    142   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
    143   EXPECT_EQ(output.compare(""), 0);
    144   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
    145   EXPECT_EQ(output.compare(""), 0);
    146   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
    147   EXPECT_EQ(output.compare(""), 0);
    148   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
    149   EXPECT_EQ(output.compare(""), 0);
    150 
    151   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    152   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
    153   EXPECT_EQ(output.compare(""), 0);
    154   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
    155   EXPECT_EQ(output.compare(""), 0);
    156   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
    157   EXPECT_EQ(output.compare(""), 0);
    158 
    159   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    160   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
    161   EXPECT_EQ(output.compare(""), 0);
    162   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
    163   EXPECT_EQ(output.compare(""), 0);
    164 
    165   {
    166     const char array[] = "\x00\x00\xfe\xff";
    167     const std::string array_string(array, arraysize(array));
    168     EXPECT_TRUE(Truncated(array_string, 4, &output));
    169     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
    170   }
    171 
    172   // Variants on the previous test
    173   {
    174     const char array[] = "\xff\xfe\x00\x00";
    175     const std::string array_string(array, 4);
    176     EXPECT_FALSE(Truncated(array_string, 4, &output));
    177     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
    178   }
    179   {
    180     const char array[] = "\xff\x00\x00\xfe";
    181     const std::string array_string(array, arraysize(array));
    182     EXPECT_TRUE(Truncated(array_string, 4, &output));
    183     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
    184   }
    185 
    186   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    187   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
    188   EXPECT_EQ(output.compare(""), 0);
    189   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
    190   EXPECT_EQ(output.compare(""), 0);
    191   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
    192   EXPECT_EQ(output.compare(""), 0);
    193   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
    194   EXPECT_EQ(output.compare(""), 0);
    195   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
    196   EXPECT_EQ(output.compare(""), 0);
    197 
    198   // Strings in legacy encodings that are valid in UTF-8, but
    199   // are invalid as UTF-8 in real data.
    200   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
    201   EXPECT_EQ(output.compare("caf"), 0);
    202   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
    203   EXPECT_EQ(output.compare(""), 0);
    204   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
    205   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    206   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
    207               &output));
    208   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    209 
    210   // Testing using the same string as input and output.
    211   EXPECT_FALSE(Truncated(output, 4, &output));
    212   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    213   EXPECT_TRUE(Truncated(output, 3, &output));
    214   EXPECT_EQ(output.compare("\xa7\x41"), 0);
    215 
    216   // "abc" with U+201[CD] in windows-125[0-8]
    217   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
    218   EXPECT_EQ(output.compare("\x93" "abc"), 0);
    219 
    220   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    221   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
    222   EXPECT_EQ(output.compare(""), 0);
    223 
    224   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    225   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
    226   EXPECT_EQ(output.compare(""), 0);
    227 }
    228 
    229 TEST(StringUtilTest, TrimWhitespace) {
    230   string16 output;  // Allow contents to carry over to next testcase
    231   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
    232     const trim_case& value = trim_cases[i];
    233     EXPECT_EQ(value.return_value,
    234               TrimWhitespace(WideToUTF16(value.input), value.positions,
    235                              &output));
    236     EXPECT_EQ(WideToUTF16(value.output), output);
    237   }
    238 
    239   // Test that TrimWhitespace() can take the same string for input and output
    240   output = ASCIIToUTF16("  This is a test \r\n");
    241   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    242   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
    243 
    244   // Once more, but with a string of whitespace
    245   output = ASCIIToUTF16("  \r\n");
    246   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    247   EXPECT_EQ(string16(), output);
    248 
    249   std::string output_ascii;
    250   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
    251     const trim_case_ascii& value = trim_cases_ascii[i];
    252     EXPECT_EQ(value.return_value,
    253               TrimWhitespace(value.input, value.positions, &output_ascii));
    254     EXPECT_EQ(value.output, output_ascii);
    255   }
    256 }
    257 
    258 static const struct collapse_case {
    259   const wchar_t* input;
    260   const bool trim;
    261   const wchar_t* output;
    262 } collapse_cases[] = {
    263   {L" Google Video ", false, L"Google Video"},
    264   {L"Google Video", false, L"Google Video"},
    265   {L"", false, L""},
    266   {L"  ", false, L""},
    267   {L"\t\rTest String\n", false, L"Test String"},
    268   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
    269   {L"    Test     \n  \t String    ", false, L"Test String"},
    270   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
    271   {L"   Test String", false, L"Test String"},
    272   {L"Test String    ", false, L"Test String"},
    273   {L"Test String", false, L"Test String"},
    274   {L"", true, L""},
    275   {L"\n", true, L""},
    276   {L"  \r  ", true, L""},
    277   {L"\nFoo", true, L"Foo"},
    278   {L"\r  Foo  ", true, L"Foo"},
    279   {L" Foo bar ", true, L"Foo bar"},
    280   {L"  \tFoo  bar  \n", true, L"Foo bar"},
    281   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
    282 };
    283 
    284 TEST(StringUtilTest, CollapseWhitespace) {
    285   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
    286     const collapse_case& value = collapse_cases[i];
    287     EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
    288   }
    289 }
    290 
    291 static const struct collapse_case_ascii {
    292   const char* input;
    293   const bool trim;
    294   const char* output;
    295 } collapse_cases_ascii[] = {
    296   {" Google Video ", false, "Google Video"},
    297   {"Google Video", false, "Google Video"},
    298   {"", false, ""},
    299   {"  ", false, ""},
    300   {"\t\rTest String\n", false, "Test String"},
    301   {"    Test     \n  \t String    ", false, "Test String"},
    302   {"   Test String", false, "Test String"},
    303   {"Test String    ", false, "Test String"},
    304   {"Test String", false, "Test String"},
    305   {"", true, ""},
    306   {"\n", true, ""},
    307   {"  \r  ", true, ""},
    308   {"\nFoo", true, "Foo"},
    309   {"\r  Foo  ", true, "Foo"},
    310   {" Foo bar ", true, "Foo bar"},
    311   {"  \tFoo  bar  \n", true, "Foo bar"},
    312   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
    313 };
    314 
    315 TEST(StringUtilTest, CollapseWhitespaceASCII) {
    316   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
    317     const collapse_case_ascii& value = collapse_cases_ascii[i];
    318     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
    319   }
    320 }
    321 
    322 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
    323   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(std::string()));
    324   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
    325   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
    326   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
    327   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
    328   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
    329 }
    330 
    331 TEST(StringUtilTest, ContainsOnlyWhitespace) {
    332   EXPECT_TRUE(ContainsOnlyWhitespace(string16()));
    333   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
    334   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
    335   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
    336   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
    337   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
    338 }
    339 
    340 TEST(StringUtilTest, IsStringUTF8) {
    341   EXPECT_TRUE(IsStringUTF8("abc"));
    342   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
    343   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
    344   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
    345   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
    346   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
    347 
    348   // surrogate code points
    349   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
    350   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
    351   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
    352 
    353   // overlong sequences
    354   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
    355   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
    356   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
    357   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
    358   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
    359   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
    360   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
    361   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
    362   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
    363   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
    364   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
    365 
    366   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    367   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
    368   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
    369   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
    370 
    371   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    372   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
    373   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
    374   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
    375   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
    376 
    377   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    378   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
    379   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
    380   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
    381   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
    382   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
    383   // Strings in legacy encodings. We can certainly make up strings
    384   // in a legacy encoding that are valid in UTF-8, but in real data,
    385   // most of them are invalid as UTF-8.
    386   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
    387   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
    388   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
    389   // "abc" with U+201[CD] in windows-125[0-8]
    390   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
    391   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    392   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
    393   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    394   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
    395 
    396   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
    397   // representation, and the second uses a 2-byte sequence. The second version
    398   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
    399   // given codepoint must be used.
    400   static const char kEmbeddedNull[] = "embedded\0null";
    401   EXPECT_TRUE(IsStringUTF8(
    402       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
    403   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
    404 }
    405 
    406 TEST(StringUtilTest, ConvertASCII) {
    407   static const char* char_cases[] = {
    408     "Google Video",
    409     "Hello, world\n",
    410     "0123ABCDwxyz \a\b\t\r\n!+,.~"
    411   };
    412 
    413   static const wchar_t* const wchar_cases[] = {
    414     L"Google Video",
    415     L"Hello, world\n",
    416     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
    417   };
    418 
    419   for (size_t i = 0; i < arraysize(char_cases); ++i) {
    420     EXPECT_TRUE(IsStringASCII(char_cases[i]));
    421     std::wstring wide = ASCIIToWide(char_cases[i]);
    422     EXPECT_EQ(wchar_cases[i], wide);
    423 
    424     EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
    425     std::string ascii = WideToASCII(wchar_cases[i]);
    426     EXPECT_EQ(char_cases[i], ascii);
    427   }
    428 
    429   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
    430   EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
    431 
    432   // Convert empty strings.
    433   std::wstring wempty;
    434   std::string empty;
    435   EXPECT_EQ(empty, WideToASCII(wempty));
    436   EXPECT_EQ(wempty, ASCIIToWide(empty));
    437 
    438   // Convert strings with an embedded NUL character.
    439   const char chars_with_nul[] = "test\0string";
    440   const int length_with_nul = arraysize(chars_with_nul) - 1;
    441   std::string string_with_nul(chars_with_nul, length_with_nul);
    442   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
    443   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
    444             wide_with_nul.length());
    445   std::string narrow_with_nul = WideToASCII(wide_with_nul);
    446   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
    447             narrow_with_nul.length());
    448   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
    449 }
    450 
    451 TEST(StringUtilTest, ToUpperASCII) {
    452   EXPECT_EQ('C', ToUpperASCII('C'));
    453   EXPECT_EQ('C', ToUpperASCII('c'));
    454   EXPECT_EQ('2', ToUpperASCII('2'));
    455 
    456   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
    457   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
    458   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
    459 
    460   std::string in_place_a("Cc2");
    461   StringToUpperASCII(&in_place_a);
    462   EXPECT_EQ("CC2", in_place_a);
    463 
    464   std::wstring in_place_w(L"Cc2");
    465   StringToUpperASCII(&in_place_w);
    466   EXPECT_EQ(L"CC2", in_place_w);
    467 
    468   std::string original_a("Cc2");
    469   std::string upper_a = StringToUpperASCII(original_a);
    470   EXPECT_EQ("CC2", upper_a);
    471 
    472   std::wstring original_w(L"Cc2");
    473   std::wstring upper_w = StringToUpperASCII(original_w);
    474   EXPECT_EQ(L"CC2", upper_w);
    475 }
    476 
    477 TEST(StringUtilTest, LowerCaseEqualsASCII) {
    478   static const struct {
    479     const wchar_t* src_w;
    480     const char*    src_a;
    481     const char*    dst;
    482   } lowercase_cases[] = {
    483     { L"FoO", "FoO", "foo" },
    484     { L"foo", "foo", "foo" },
    485     { L"FOO", "FOO", "foo" },
    486   };
    487 
    488   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
    489     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
    490                                      lowercase_cases[i].dst));
    491     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
    492                                      lowercase_cases[i].dst));
    493   }
    494 }
    495 
    496 TEST(StringUtilTest, FormatBytesUnlocalized) {
    497   static const struct {
    498     int64 bytes;
    499     const char* expected;
    500   } cases[] = {
    501     // Expected behavior: we show one post-decimal digit when we have
    502     // under two pre-decimal digits, except in cases where it makes no
    503     // sense (zero or bytes).
    504     // Since we switch units once we cross the 1000 mark, this keeps
    505     // the display of file sizes or bytes consistently around three
    506     // digits.
    507     {0, "0 B"},
    508     {512, "512 B"},
    509     {1024*1024, "1.0 MB"},
    510     {1024*1024*1024, "1.0 GB"},
    511     {10LL*1024*1024*1024, "10.0 GB"},
    512     {99LL*1024*1024*1024, "99.0 GB"},
    513     {105LL*1024*1024*1024, "105 GB"},
    514     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
    515     {~(1LL<<63), "8192 PB"},
    516 
    517     {99*1024 + 103, "99.1 kB"},
    518     {1024*1024 + 103, "1.0 MB"},
    519     {1024*1024 + 205 * 1024, "1.2 MB"},
    520     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
    521     {10LL*1024*1024*1024, "10.0 GB"},
    522     {100LL*1024*1024*1024, "100 GB"},
    523   };
    524 
    525   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    526     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
    527               FormatBytesUnlocalized(cases[i].bytes));
    528   }
    529 }
    530 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
    531   static const struct {
    532     const char* str;
    533     string16::size_type start_offset;
    534     const char* find_this;
    535     const char* replace_with;
    536     const char* expected;
    537   } cases[] = {
    538     {"aaa", 0, "a", "b", "bbb"},
    539     {"abb", 0, "ab", "a", "ab"},
    540     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
    541     {"Not found", 0, "x", "0", "Not found"},
    542     {"Not found again", 5, "x", "0", "Not found again"},
    543     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    544      "Four score and seven years agoMakingFour score and seven years agoit"
    545      "Four score and seven years agomuchFour score and seven years agolonger"
    546      "Four score and seven years ago"},
    547     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    548     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
    549     {"abababab", 2, "ab", "c", "abccc"},
    550   };
    551 
    552   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    553     string16 str = ASCIIToUTF16(cases[i].str);
    554     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
    555                                  ASCIIToUTF16(cases[i].find_this),
    556                                  ASCIIToUTF16(cases[i].replace_with));
    557     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    558   }
    559 }
    560 
    561 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
    562   static const struct {
    563     const char* str;
    564     string16::size_type start_offset;
    565     const char* find_this;
    566     const char* replace_with;
    567     const char* expected;
    568   } cases[] = {
    569     {"aaa", 0, "a", "b", "baa"},
    570     {"abb", 0, "ab", "a", "ab"},
    571     {"Removing some substrings inging", 0, "ing", "",
    572       "Remov some substrings inging"},
    573     {"Not found", 0, "x", "0", "Not found"},
    574     {"Not found again", 5, "x", "0", "Not found again"},
    575     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    576      "Four score and seven years agoMaking it much longer "},
    577     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    578     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
    579     {"abababab", 2, "ab", "c", "abcabab"},
    580   };
    581 
    582   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    583     string16 str = ASCIIToUTF16(cases[i].str);
    584     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
    585                                      ASCIIToUTF16(cases[i].find_this),
    586                                      ASCIIToUTF16(cases[i].replace_with));
    587     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    588   }
    589 }
    590 
    591 TEST(StringUtilTest, HexDigitToInt) {
    592   EXPECT_EQ(0, HexDigitToInt('0'));
    593   EXPECT_EQ(1, HexDigitToInt('1'));
    594   EXPECT_EQ(2, HexDigitToInt('2'));
    595   EXPECT_EQ(3, HexDigitToInt('3'));
    596   EXPECT_EQ(4, HexDigitToInt('4'));
    597   EXPECT_EQ(5, HexDigitToInt('5'));
    598   EXPECT_EQ(6, HexDigitToInt('6'));
    599   EXPECT_EQ(7, HexDigitToInt('7'));
    600   EXPECT_EQ(8, HexDigitToInt('8'));
    601   EXPECT_EQ(9, HexDigitToInt('9'));
    602   EXPECT_EQ(10, HexDigitToInt('A'));
    603   EXPECT_EQ(11, HexDigitToInt('B'));
    604   EXPECT_EQ(12, HexDigitToInt('C'));
    605   EXPECT_EQ(13, HexDigitToInt('D'));
    606   EXPECT_EQ(14, HexDigitToInt('E'));
    607   EXPECT_EQ(15, HexDigitToInt('F'));
    608 
    609   // Verify the lower case as well.
    610   EXPECT_EQ(10, HexDigitToInt('a'));
    611   EXPECT_EQ(11, HexDigitToInt('b'));
    612   EXPECT_EQ(12, HexDigitToInt('c'));
    613   EXPECT_EQ(13, HexDigitToInt('d'));
    614   EXPECT_EQ(14, HexDigitToInt('e'));
    615   EXPECT_EQ(15, HexDigitToInt('f'));
    616 }
    617 
    618 // This checks where we can use the assignment operator for a va_list. We need
    619 // a way to do this since Visual C doesn't support va_copy, but assignment on
    620 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
    621 // capability.
    622 static void VariableArgsFunc(const char* format, ...) {
    623   va_list org;
    624   va_start(org, format);
    625 
    626   va_list dup;
    627   GG_VA_COPY(dup, org);
    628   int i1 = va_arg(org, int);
    629   int j1 = va_arg(org, int);
    630   char* s1 = va_arg(org, char*);
    631   double d1 = va_arg(org, double);
    632   va_end(org);
    633 
    634   int i2 = va_arg(dup, int);
    635   int j2 = va_arg(dup, int);
    636   char* s2 = va_arg(dup, char*);
    637   double d2 = va_arg(dup, double);
    638 
    639   EXPECT_EQ(i1, i2);
    640   EXPECT_EQ(j1, j2);
    641   EXPECT_STREQ(s1, s2);
    642   EXPECT_EQ(d1, d2);
    643 
    644   va_end(dup);
    645 }
    646 
    647 TEST(StringUtilTest, VAList) {
    648   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
    649 }
    650 
    651 // Test for Tokenize
    652 template <typename STR>
    653 void TokenizeTest() {
    654   std::vector<STR> r;
    655   size_t size;
    656 
    657   size = Tokenize(STR("This is a string"), STR(" "), &r);
    658   EXPECT_EQ(4U, size);
    659   ASSERT_EQ(4U, r.size());
    660   EXPECT_EQ(r[0], STR("This"));
    661   EXPECT_EQ(r[1], STR("is"));
    662   EXPECT_EQ(r[2], STR("a"));
    663   EXPECT_EQ(r[3], STR("string"));
    664   r.clear();
    665 
    666   size = Tokenize(STR("one,two,three"), STR(","), &r);
    667   EXPECT_EQ(3U, size);
    668   ASSERT_EQ(3U, r.size());
    669   EXPECT_EQ(r[0], STR("one"));
    670   EXPECT_EQ(r[1], STR("two"));
    671   EXPECT_EQ(r[2], STR("three"));
    672   r.clear();
    673 
    674   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
    675   EXPECT_EQ(3U, size);
    676   ASSERT_EQ(3U, r.size());
    677   EXPECT_EQ(r[0], STR("one"));
    678   EXPECT_EQ(r[1], STR("two"));
    679   EXPECT_EQ(r[2], STR("three;four"));
    680   r.clear();
    681 
    682   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
    683   EXPECT_EQ(4U, size);
    684   ASSERT_EQ(4U, r.size());
    685   EXPECT_EQ(r[0], STR("one"));
    686   EXPECT_EQ(r[1], STR("two"));
    687   EXPECT_EQ(r[2], STR("three"));
    688   EXPECT_EQ(r[3], STR("four"));
    689   r.clear();
    690 
    691   size = Tokenize(STR("one, two, three"), STR(","), &r);
    692   EXPECT_EQ(3U, size);
    693   ASSERT_EQ(3U, r.size());
    694   EXPECT_EQ(r[0], STR("one"));
    695   EXPECT_EQ(r[1], STR(" two"));
    696   EXPECT_EQ(r[2], STR(" three"));
    697   r.clear();
    698 
    699   size = Tokenize(STR("one, two, three, "), STR(","), &r);
    700   EXPECT_EQ(4U, size);
    701   ASSERT_EQ(4U, r.size());
    702   EXPECT_EQ(r[0], STR("one"));
    703   EXPECT_EQ(r[1], STR(" two"));
    704   EXPECT_EQ(r[2], STR(" three"));
    705   EXPECT_EQ(r[3], STR(" "));
    706   r.clear();
    707 
    708   size = Tokenize(STR("one, two, three,"), STR(","), &r);
    709   EXPECT_EQ(3U, size);
    710   ASSERT_EQ(3U, r.size());
    711   EXPECT_EQ(r[0], STR("one"));
    712   EXPECT_EQ(r[1], STR(" two"));
    713   EXPECT_EQ(r[2], STR(" three"));
    714   r.clear();
    715 
    716   size = Tokenize(STR(), STR(","), &r);
    717   EXPECT_EQ(0U, size);
    718   ASSERT_EQ(0U, r.size());
    719   r.clear();
    720 
    721   size = Tokenize(STR(","), STR(","), &r);
    722   EXPECT_EQ(0U, size);
    723   ASSERT_EQ(0U, r.size());
    724   r.clear();
    725 
    726   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
    727   EXPECT_EQ(0U, size);
    728   ASSERT_EQ(0U, r.size());
    729   r.clear();
    730 
    731   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
    732   EXPECT_EQ(1U, size);
    733   ASSERT_EQ(1U, r.size());
    734   EXPECT_EQ(r[0], STR("a"));
    735   r.clear();
    736 
    737   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
    738   EXPECT_EQ(2U, size);
    739   ASSERT_EQ(2U, r.size());
    740   EXPECT_EQ(r[0], STR("\ta\t"));
    741   EXPECT_EQ(r[1], STR("b\tcc"));
    742   r.clear();
    743 }
    744 
    745 TEST(StringUtilTest, TokenizeStdString) {
    746   TokenizeTest<std::string>();
    747 }
    748 
    749 TEST(StringUtilTest, TokenizeStringPiece) {
    750   TokenizeTest<base::StringPiece>();
    751 }
    752 
    753 // Test for JoinString
    754 TEST(StringUtilTest, JoinString) {
    755   std::vector<std::string> in;
    756   EXPECT_EQ("", JoinString(in, ','));
    757 
    758   in.push_back("a");
    759   EXPECT_EQ("a", JoinString(in, ','));
    760 
    761   in.push_back("b");
    762   in.push_back("c");
    763   EXPECT_EQ("a,b,c", JoinString(in, ','));
    764 
    765   in.push_back(std::string());
    766   EXPECT_EQ("a,b,c,", JoinString(in, ','));
    767   in.push_back(" ");
    768   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
    769 }
    770 
    771 // Test for JoinString overloaded with std::string separator
    772 TEST(StringUtilTest, JoinStringWithString) {
    773   std::string separator(", ");
    774   std::vector<std::string> parts;
    775   EXPECT_EQ(std::string(), JoinString(parts, separator));
    776 
    777   parts.push_back("a");
    778   EXPECT_EQ("a", JoinString(parts, separator));
    779 
    780   parts.push_back("b");
    781   parts.push_back("c");
    782   EXPECT_EQ("a, b, c", JoinString(parts, separator));
    783 
    784   parts.push_back(std::string());
    785   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
    786   parts.push_back(" ");
    787   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
    788 }
    789 
    790 // Test for JoinString overloaded with string16 separator
    791 TEST(StringUtilTest, JoinStringWithString16) {
    792   string16 separator = ASCIIToUTF16(", ");
    793   std::vector<string16> parts;
    794   EXPECT_EQ(string16(), JoinString(parts, separator));
    795 
    796   parts.push_back(ASCIIToUTF16("a"));
    797   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
    798 
    799   parts.push_back(ASCIIToUTF16("b"));
    800   parts.push_back(ASCIIToUTF16("c"));
    801   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
    802 
    803   parts.push_back(ASCIIToUTF16(""));
    804   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
    805   parts.push_back(ASCIIToUTF16(" "));
    806   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
    807 }
    808 
    809 TEST(StringUtilTest, StartsWith) {
    810   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
    811   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
    812   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
    813   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
    814   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
    815   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
    816   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
    817   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
    818   EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
    819   EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
    820 
    821   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
    822   EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
    823   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
    824   EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
    825   EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
    826   EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
    827   EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", false));
    828   EXPECT_FALSE(StartsWith(std::wstring(), L"javascript", true));
    829   EXPECT_TRUE(StartsWith(L"java", std::wstring(), false));
    830   EXPECT_TRUE(StartsWith(L"java", std::wstring(), true));
    831 }
    832 
    833 TEST(StringUtilTest, EndsWith) {
    834   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
    835   EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
    836   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
    837   EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
    838   EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
    839   EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
    840   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
    841   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
    842   EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", false));
    843   EXPECT_FALSE(EndsWith(std::wstring(), L".plugin", true));
    844   EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), false));
    845   EXPECT_TRUE(EndsWith(L"Foo.plugin", std::wstring(), true));
    846   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
    847   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
    848   EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), false));
    849   EXPECT_TRUE(EndsWith(std::wstring(), std::wstring(), true));
    850 }
    851 
    852 TEST(StringUtilTest, GetStringFWithOffsets) {
    853   std::vector<string16> subst;
    854   subst.push_back(ASCIIToUTF16("1"));
    855   subst.push_back(ASCIIToUTF16("2"));
    856   std::vector<size_t> offsets;
    857 
    858   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
    859                             subst,
    860                             &offsets);
    861   EXPECT_EQ(2U, offsets.size());
    862   EXPECT_EQ(7U, offsets[0]);
    863   EXPECT_EQ(25U, offsets[1]);
    864   offsets.clear();
    865 
    866   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
    867                             subst,
    868                             &offsets);
    869   EXPECT_EQ(2U, offsets.size());
    870   EXPECT_EQ(25U, offsets[0]);
    871   EXPECT_EQ(7U, offsets[1]);
    872   offsets.clear();
    873 }
    874 
    875 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
    876   // Test whether replacestringplaceholders works as expected when there
    877   // are fewer inputs than outputs.
    878   std::vector<string16> subst;
    879   subst.push_back(ASCIIToUTF16("9a"));
    880   subst.push_back(ASCIIToUTF16("8b"));
    881   subst.push_back(ASCIIToUTF16("7c"));
    882 
    883   string16 formatted =
    884       ReplaceStringPlaceholders(
    885           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
    886 
    887   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
    888 }
    889 
    890 TEST(StringUtilTest, ReplaceStringPlaceholders) {
    891   std::vector<string16> subst;
    892   subst.push_back(ASCIIToUTF16("9a"));
    893   subst.push_back(ASCIIToUTF16("8b"));
    894   subst.push_back(ASCIIToUTF16("7c"));
    895   subst.push_back(ASCIIToUTF16("6d"));
    896   subst.push_back(ASCIIToUTF16("5e"));
    897   subst.push_back(ASCIIToUTF16("4f"));
    898   subst.push_back(ASCIIToUTF16("3g"));
    899   subst.push_back(ASCIIToUTF16("2h"));
    900   subst.push_back(ASCIIToUTF16("1i"));
    901 
    902   string16 formatted =
    903       ReplaceStringPlaceholders(
    904           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
    905 
    906   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
    907 }
    908 
    909 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
    910   std::vector<string16> subst;
    911   subst.push_back(ASCIIToUTF16("9a"));
    912   subst.push_back(ASCIIToUTF16("8b"));
    913   subst.push_back(ASCIIToUTF16("7c"));
    914   subst.push_back(ASCIIToUTF16("6d"));
    915   subst.push_back(ASCIIToUTF16("5e"));
    916   subst.push_back(ASCIIToUTF16("4f"));
    917   subst.push_back(ASCIIToUTF16("3g"));
    918   subst.push_back(ASCIIToUTF16("2h"));
    919   subst.push_back(ASCIIToUTF16("1i"));
    920   subst.push_back(ASCIIToUTF16("0j"));
    921   subst.push_back(ASCIIToUTF16("-1k"));
    922   subst.push_back(ASCIIToUTF16("-2l"));
    923   subst.push_back(ASCIIToUTF16("-3m"));
    924   subst.push_back(ASCIIToUTF16("-4n"));
    925 
    926   string16 formatted =
    927       ReplaceStringPlaceholders(
    928           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
    929                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
    930 
    931   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
    932                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
    933 }
    934 
    935 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
    936   std::vector<std::string> subst;
    937   subst.push_back("9a");
    938   subst.push_back("8b");
    939   subst.push_back("7c");
    940   subst.push_back("6d");
    941   subst.push_back("5e");
    942   subst.push_back("4f");
    943   subst.push_back("3g");
    944   subst.push_back("2h");
    945   subst.push_back("1i");
    946 
    947   std::string formatted =
    948       ReplaceStringPlaceholders(
    949           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
    950 
    951   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
    952 }
    953 
    954 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
    955   std::vector<std::string> subst;
    956   subst.push_back("a");
    957   subst.push_back("b");
    958   subst.push_back("c");
    959   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
    960             "$1 $$2 $$$3");
    961 }
    962 
    963 TEST(StringUtilTest, MatchPatternTest) {
    964   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
    965   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
    966   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
    967   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
    968   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
    969   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
    970   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
    971   EXPECT_FALSE(MatchPattern("", "*.*"));
    972   EXPECT_TRUE(MatchPattern("", "*"));
    973   EXPECT_TRUE(MatchPattern("", "?"));
    974   EXPECT_TRUE(MatchPattern("", ""));
    975   EXPECT_FALSE(MatchPattern("Hello", ""));
    976   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
    977   // Stop after a certain recursion depth.
    978   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
    979 
    980   // Test UTF8 matching.
    981   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
    982   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
    983   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
    984   // Invalid sequences should be handled as a single invalid character.
    985   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
    986   // If the pattern has invalid characters, it shouldn't match anything.
    987   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
    988 
    989   // Test UTF16 character matching.
    990   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
    991                            UTF8ToUTF16("*.com")));
    992   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
    993                            UTF8ToUTF16("He??o\\*1*")));
    994 
    995   // This test verifies that consecutive wild cards are collapsed into 1
    996   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
    997   // recursion depth).
    998   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
    999                            UTF8ToUTF16("He********************************o")));
   1000 }
   1001 
   1002 TEST(StringUtilTest, LcpyTest) {
   1003   // Test the normal case where we fit in our buffer.
   1004   {
   1005     char dst[10];
   1006     wchar_t wdst[10];
   1007     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1008     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1009     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1010     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1011   }
   1012 
   1013   // Test dst_size == 0, nothing should be written to |dst| and we should
   1014   // have the equivalent of strlen(src).
   1015   {
   1016     char dst[2] = {1, 2};
   1017     wchar_t wdst[2] = {1, 2};
   1018     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
   1019     EXPECT_EQ(1, dst[0]);
   1020     EXPECT_EQ(2, dst[1]);
   1021     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
   1022 #if defined(WCHAR_T_IS_UNSIGNED)
   1023     EXPECT_EQ(1U, wdst[0]);
   1024     EXPECT_EQ(2U, wdst[1]);
   1025 #else
   1026     EXPECT_EQ(1, wdst[0]);
   1027     EXPECT_EQ(2, wdst[1]);
   1028 #endif
   1029   }
   1030 
   1031   // Test the case were we _just_ competely fit including the null.
   1032   {
   1033     char dst[8];
   1034     wchar_t wdst[8];
   1035     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1036     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1037     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1038     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1039   }
   1040 
   1041   // Test the case were we we are one smaller, so we can't fit the null.
   1042   {
   1043     char dst[7];
   1044     wchar_t wdst[7];
   1045     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1046     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
   1047     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1048     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
   1049   }
   1050 
   1051   // Test the case were we are just too small.
   1052   {
   1053     char dst[3];
   1054     wchar_t wdst[3];
   1055     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1056     EXPECT_EQ(0, memcmp(dst, "ab", 3));
   1057     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1058     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
   1059   }
   1060 }
   1061 
   1062 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
   1063   static const struct {
   1064     const wchar_t* input;
   1065     bool portable;
   1066   } cases[] = {
   1067     { L"%ls", true },
   1068     { L"%s", false },
   1069     { L"%S", false },
   1070     { L"%lS", false },
   1071     { L"Hello, %s", false },
   1072     { L"%lc", true },
   1073     { L"%c", false },
   1074     { L"%C", false },
   1075     { L"%lC", false },
   1076     { L"%ls %s", false },
   1077     { L"%s %ls", false },
   1078     { L"%s %ls %s", false },
   1079     { L"%f", true },
   1080     { L"%f %F", false },
   1081     { L"%d %D", false },
   1082     { L"%o %O", false },
   1083     { L"%u %U", false },
   1084     { L"%f %d %o %u", true },
   1085     { L"%-8d (%02.1f%)", true },
   1086     { L"% 10s", false },
   1087     { L"% 10ls", true }
   1088   };
   1089   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
   1090     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
   1091 }
   1092 
   1093 TEST(StringUtilTest, RemoveChars) {
   1094   const char* kRemoveChars = "-/+*";
   1095   std::string input = "A-+bc/d!*";
   1096   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
   1097   EXPECT_EQ("Abcd!", input);
   1098 
   1099   // No characters match kRemoveChars.
   1100   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1101   EXPECT_EQ("Abcd!", input);
   1102 
   1103   // Empty string.
   1104   input.clear();
   1105   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1106   EXPECT_EQ(std::string(), input);
   1107 }
   1108 
   1109 TEST(StringUtilTest, ReplaceChars) {
   1110   struct TestData {
   1111     const char* input;
   1112     const char* replace_chars;
   1113     const char* replace_with;
   1114     const char* output;
   1115     bool result;
   1116   } cases[] = {
   1117     { "", "", "", "", false },
   1118     { "test", "", "", "test", false },
   1119     { "test", "", "!", "test", false },
   1120     { "test", "z", "!", "test", false },
   1121     { "test", "e", "!", "t!st", true },
   1122     { "test", "e", "!?", "t!?st", true },
   1123     { "test", "ez", "!", "t!st", true },
   1124     { "test", "zed", "!?", "t!?st", true },
   1125     { "test", "t", "!?", "!?es!?", true },
   1126     { "test", "et", "!>", "!>!>s!>", true },
   1127     { "test", "zest", "!", "!!!!", true },
   1128     { "test", "szt", "!", "!e!!", true },
   1129     { "test", "t", "test", "testestest", true },
   1130   };
   1131 
   1132   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
   1133     std::string output;
   1134     bool result = ReplaceChars(cases[i].input,
   1135                                cases[i].replace_chars,
   1136                                cases[i].replace_with,
   1137                                &output);
   1138     EXPECT_EQ(cases[i].result, result);
   1139     EXPECT_EQ(cases[i].output, output);
   1140   }
   1141 }
   1142 
   1143 TEST(StringUtilTest, ContainsOnlyChars) {
   1144   // Providing an empty list of characters should return false but for the empty
   1145   // string.
   1146   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
   1147   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
   1148 
   1149   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
   1150   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
   1151   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
   1152   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
   1153   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
   1154 }
   1155 
   1156 class WriteIntoTest : public testing::Test {
   1157  protected:
   1158   static void WritesCorrectly(size_t num_chars) {
   1159     std::string buffer;
   1160     char kOriginal[] = "supercali";
   1161     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
   1162     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
   1163     // string at the first \0.
   1164     EXPECT_EQ(std::string(kOriginal,
   1165                           std::min(num_chars, arraysize(kOriginal) - 1)),
   1166               std::string(buffer.c_str()));
   1167     EXPECT_EQ(num_chars, buffer.size());
   1168   }
   1169 };
   1170 
   1171 TEST_F(WriteIntoTest, WriteInto) {
   1172   // Validate that WriteInto reserves enough space and
   1173   // sizes a string correctly.
   1174   WritesCorrectly(1);
   1175   WritesCorrectly(2);
   1176   WritesCorrectly(5000);
   1177 
   1178   // Validate that WriteInto doesn't modify other strings
   1179   // when using a Copy-on-Write implementation.
   1180   const char kLive[] = "live";
   1181   const char kDead[] = "dead";
   1182   const std::string live = kLive;
   1183   std::string dead = live;
   1184   strncpy(WriteInto(&dead, 5), kDead, 4);
   1185   EXPECT_EQ(kDead, dead);
   1186   EXPECT_EQ(4u, dead.size());
   1187   EXPECT_EQ(kLive, live);
   1188   EXPECT_EQ(4u, live.size());
   1189 }
   1190 
   1191 }  // namespace base
   1192