Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <math.h>
      8 #include <stdarg.h>
      9 
     10 #include <algorithm>
     11 
     12 #include "base/basictypes.h"
     13 #include "base/strings/string16.h"
     14 #include "base/strings/utf_string_conversions.h"
     15 #include "testing/gmock/include/gmock/gmock.h"
     16 #include "testing/gtest/include/gtest/gtest.h"
     17 
     18 using ::testing::ElementsAre;
     19 
     20 namespace base {
     21 
     22 static const struct trim_case {
     23   const wchar_t* input;
     24   const TrimPositions positions;
     25   const wchar_t* output;
     26   const TrimPositions return_value;
     27 } trim_cases[] = {
     28   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
     29   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
     30   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
     31   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
     32   {L"", TRIM_ALL, L"", TRIM_NONE},
     33   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
     34   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
     35   {L"  ", TRIM_ALL, L"", TRIM_ALL},
     36   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
     37   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
     38 };
     39 
     40 static const struct trim_case_ascii {
     41   const char* input;
     42   const TrimPositions positions;
     43   const char* output;
     44   const TrimPositions return_value;
     45 } trim_cases_ascii[] = {
     46   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
     47   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
     48   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
     49   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
     50   {"", TRIM_ALL, "", TRIM_NONE},
     51   {"  ", TRIM_LEADING, "", TRIM_LEADING},
     52   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
     53   {"  ", TRIM_ALL, "", TRIM_ALL},
     54   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
     55 };
     56 
     57 namespace {
     58 
     59 // Helper used to test TruncateUTF8ToByteSize.
     60 bool Truncated(const std::string& input, const size_t byte_size,
     61                std::string* output) {
     62     size_t prev = input.length();
     63     TruncateUTF8ToByteSize(input, byte_size, output);
     64     return prev != output->length();
     65 }
     66 
     67 }  // namespace
     68 
     69 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
     70   std::string output;
     71 
     72   // Empty strings and invalid byte_size arguments
     73   EXPECT_FALSE(Truncated(std::string(), 0, &output));
     74   EXPECT_EQ(output, "");
     75   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
     76   EXPECT_EQ(output, "");
     77   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
     78   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
     79 
     80   // Testing the truncation of valid UTF8 correctly
     81   EXPECT_TRUE(Truncated("abc", 2, &output));
     82   EXPECT_EQ(output, "ab");
     83   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
     84   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     85   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
     86   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     87   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
     88   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
     89 
     90   {
     91     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
     92     const std::string array_string(array, arraysize(array));
     93     EXPECT_TRUE(Truncated(array_string, 4, &output));
     94     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
     95   }
     96 
     97   {
     98     const char array[] = "\x00\xc2\x81\xc2\x81";
     99     const std::string array_string(array, arraysize(array));
    100     EXPECT_TRUE(Truncated(array_string, 4, &output));
    101     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
    102   }
    103 
    104   // Testing invalid UTF8
    105   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
    106   EXPECT_EQ(output.compare(""), 0);
    107   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
    108   EXPECT_EQ(output.compare(""), 0);
    109   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
    110   EXPECT_EQ(output.compare(""), 0);
    111 
    112   // Testing invalid UTF8 mixed with valid UTF8
    113   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
    114   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
    115   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
    116   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
    117   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
    118               10, &output));
    119   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
    120   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
    121               10, &output));
    122   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
    123   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
    124   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
    125 
    126   // Overlong sequences
    127   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
    128   EXPECT_EQ(output.compare(""), 0);
    129   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
    130   EXPECT_EQ(output.compare(""), 0);
    131   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
    132   EXPECT_EQ(output.compare(""), 0);
    133   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
    134   EXPECT_EQ(output.compare(""), 0);
    135   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
    136   EXPECT_EQ(output.compare(""), 0);
    137   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
    138   EXPECT_EQ(output.compare(""), 0);
    139   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
    140   EXPECT_EQ(output.compare(""), 0);
    141   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
    142   EXPECT_EQ(output.compare(""), 0);
    143   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
    144   EXPECT_EQ(output.compare(""), 0);
    145   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
    146   EXPECT_EQ(output.compare(""), 0);
    147   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
    148   EXPECT_EQ(output.compare(""), 0);
    149 
    150   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    151   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
    152   EXPECT_EQ(output.compare(""), 0);
    153   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
    154   EXPECT_EQ(output.compare(""), 0);
    155   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
    156   EXPECT_EQ(output.compare(""), 0);
    157 
    158   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    159   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
    160   EXPECT_EQ(output.compare(""), 0);
    161   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
    162   EXPECT_EQ(output.compare(""), 0);
    163 
    164   {
    165     const char array[] = "\x00\x00\xfe\xff";
    166     const std::string array_string(array, arraysize(array));
    167     EXPECT_TRUE(Truncated(array_string, 4, &output));
    168     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
    169   }
    170 
    171   // Variants on the previous test
    172   {
    173     const char array[] = "\xff\xfe\x00\x00";
    174     const std::string array_string(array, 4);
    175     EXPECT_FALSE(Truncated(array_string, 4, &output));
    176     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
    177   }
    178   {
    179     const char array[] = "\xff\x00\x00\xfe";
    180     const std::string array_string(array, arraysize(array));
    181     EXPECT_TRUE(Truncated(array_string, 4, &output));
    182     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
    183   }
    184 
    185   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    186   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
    187   EXPECT_EQ(output.compare(""), 0);
    188   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
    189   EXPECT_EQ(output.compare(""), 0);
    190   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
    191   EXPECT_EQ(output.compare(""), 0);
    192   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
    193   EXPECT_EQ(output.compare(""), 0);
    194   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
    195   EXPECT_EQ(output.compare(""), 0);
    196 
    197   // Strings in legacy encodings that are valid in UTF-8, but
    198   // are invalid as UTF-8 in real data.
    199   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
    200   EXPECT_EQ(output.compare("caf"), 0);
    201   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
    202   EXPECT_EQ(output.compare(""), 0);
    203   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
    204   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    205   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
    206               &output));
    207   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    208 
    209   // Testing using the same string as input and output.
    210   EXPECT_FALSE(Truncated(output, 4, &output));
    211   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    212   EXPECT_TRUE(Truncated(output, 3, &output));
    213   EXPECT_EQ(output.compare("\xa7\x41"), 0);
    214 
    215   // "abc" with U+201[CD] in windows-125[0-8]
    216   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
    217   EXPECT_EQ(output.compare("\x93" "abc"), 0);
    218 
    219   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    220   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
    221   EXPECT_EQ(output.compare(""), 0);
    222 
    223   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    224   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
    225   EXPECT_EQ(output.compare(""), 0);
    226 }
    227 
    228 TEST(StringUtilTest, TrimWhitespace) {
    229   string16 output;  // Allow contents to carry over to next testcase
    230   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
    231     const trim_case& value = trim_cases[i];
    232     EXPECT_EQ(value.return_value,
    233               TrimWhitespace(WideToUTF16(value.input), value.positions,
    234                              &output));
    235     EXPECT_EQ(WideToUTF16(value.output), output);
    236   }
    237 
    238   // Test that TrimWhitespace() can take the same string for input and output
    239   output = ASCIIToUTF16("  This is a test \r\n");
    240   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    241   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
    242 
    243   // Once more, but with a string of whitespace
    244   output = ASCIIToUTF16("  \r\n");
    245   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    246   EXPECT_EQ(string16(), output);
    247 
    248   std::string output_ascii;
    249   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
    250     const trim_case_ascii& value = trim_cases_ascii[i];
    251     EXPECT_EQ(value.return_value,
    252               TrimWhitespace(value.input, value.positions, &output_ascii));
    253     EXPECT_EQ(value.output, output_ascii);
    254   }
    255 }
    256 
    257 static const struct collapse_case {
    258   const wchar_t* input;
    259   const bool trim;
    260   const wchar_t* output;
    261 } collapse_cases[] = {
    262   {L" Google Video ", false, L"Google Video"},
    263   {L"Google Video", false, L"Google Video"},
    264   {L"", false, L""},
    265   {L"  ", false, L""},
    266   {L"\t\rTest String\n", false, L"Test String"},
    267   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
    268   {L"    Test     \n  \t String    ", false, L"Test String"},
    269   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
    270   {L"   Test String", false, L"Test String"},
    271   {L"Test String    ", false, L"Test String"},
    272   {L"Test String", false, L"Test String"},
    273   {L"", true, L""},
    274   {L"\n", true, L""},
    275   {L"  \r  ", true, L""},
    276   {L"\nFoo", true, L"Foo"},
    277   {L"\r  Foo  ", true, L"Foo"},
    278   {L" Foo bar ", true, L"Foo bar"},
    279   {L"  \tFoo  bar  \n", true, L"Foo bar"},
    280   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
    281 };
    282 
    283 TEST(StringUtilTest, CollapseWhitespace) {
    284   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
    285     const collapse_case& value = collapse_cases[i];
    286     EXPECT_EQ(WideToUTF16(value.output),
    287               CollapseWhitespace(WideToUTF16(value.input), value.trim));
    288   }
    289 }
    290 
    291 static const struct collapse_case_ascii {
    292   const char* input;
    293   const bool trim;
    294   const char* output;
    295 } collapse_cases_ascii[] = {
    296   {" Google Video ", false, "Google Video"},
    297   {"Google Video", false, "Google Video"},
    298   {"", false, ""},
    299   {"  ", false, ""},
    300   {"\t\rTest String\n", false, "Test String"},
    301   {"    Test     \n  \t String    ", false, "Test String"},
    302   {"   Test String", false, "Test String"},
    303   {"Test String    ", false, "Test String"},
    304   {"Test String", false, "Test String"},
    305   {"", true, ""},
    306   {"\n", true, ""},
    307   {"  \r  ", true, ""},
    308   {"\nFoo", true, "Foo"},
    309   {"\r  Foo  ", true, "Foo"},
    310   {" Foo bar ", true, "Foo bar"},
    311   {"  \tFoo  bar  \n", true, "Foo bar"},
    312   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
    313 };
    314 
    315 TEST(StringUtilTest, CollapseWhitespaceASCII) {
    316   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
    317     const collapse_case_ascii& value = collapse_cases_ascii[i];
    318     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
    319   }
    320 }
    321 
    322 TEST(StringUtilTest, IsStringUTF8) {
    323   EXPECT_TRUE(IsStringUTF8("abc"));
    324   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
    325   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
    326   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
    327   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
    328   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
    329 
    330   // surrogate code points
    331   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
    332   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
    333   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
    334 
    335   // overlong sequences
    336   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
    337   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
    338   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
    339   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
    340   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
    341   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
    342   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
    343   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
    344   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
    345   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
    346   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
    347 
    348   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    349   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
    350   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
    351   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
    352 
    353   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    354   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
    355   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
    356   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
    357   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
    358 
    359   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    360   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
    361   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
    362   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
    363   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
    364   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
    365   // Strings in legacy encodings. We can certainly make up strings
    366   // in a legacy encoding that are valid in UTF-8, but in real data,
    367   // most of them are invalid as UTF-8.
    368   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
    369   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
    370   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
    371   // "abc" with U+201[CD] in windows-125[0-8]
    372   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
    373   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    374   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
    375   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    376   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
    377 
    378   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
    379   // representation, and the second uses a 2-byte sequence. The second version
    380   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
    381   // given codepoint must be used.
    382   static const char kEmbeddedNull[] = "embedded\0null";
    383   EXPECT_TRUE(IsStringUTF8(
    384       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
    385   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
    386 }
    387 
    388 TEST(StringUtilTest, ConvertASCII) {
    389   static const char* char_cases[] = {
    390     "Google Video",
    391     "Hello, world\n",
    392     "0123ABCDwxyz \a\b\t\r\n!+,.~"
    393   };
    394 
    395   static const wchar_t* const wchar_cases[] = {
    396     L"Google Video",
    397     L"Hello, world\n",
    398     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
    399   };
    400 
    401   for (size_t i = 0; i < arraysize(char_cases); ++i) {
    402     EXPECT_TRUE(IsStringASCII(char_cases[i]));
    403     string16 utf16 = ASCIIToUTF16(char_cases[i]);
    404     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
    405 
    406     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
    407     EXPECT_EQ(char_cases[i], ascii);
    408   }
    409 
    410   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
    411 
    412   // Convert empty strings.
    413   string16 empty16;
    414   std::string empty;
    415   EXPECT_EQ(empty, UTF16ToASCII(empty16));
    416   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
    417 
    418   // Convert strings with an embedded NUL character.
    419   const char chars_with_nul[] = "test\0string";
    420   const int length_with_nul = arraysize(chars_with_nul) - 1;
    421   std::string string_with_nul(chars_with_nul, length_with_nul);
    422   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
    423   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
    424             wide_with_nul.length());
    425   std::string narrow_with_nul = UTF16ToASCII(WideToUTF16(wide_with_nul));
    426   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
    427             narrow_with_nul.length());
    428   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
    429 }
    430 
    431 TEST(StringUtilTest, ToUpperASCII) {
    432   EXPECT_EQ('C', ToUpperASCII('C'));
    433   EXPECT_EQ('C', ToUpperASCII('c'));
    434   EXPECT_EQ('2', ToUpperASCII('2'));
    435 
    436   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
    437   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
    438   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
    439 
    440   std::string in_place_a("Cc2");
    441   StringToUpperASCII(&in_place_a);
    442   EXPECT_EQ("CC2", in_place_a);
    443 
    444   std::wstring in_place_w(L"Cc2");
    445   StringToUpperASCII(&in_place_w);
    446   EXPECT_EQ(L"CC2", in_place_w);
    447 
    448   std::string original_a("Cc2");
    449   std::string upper_a = StringToUpperASCII(original_a);
    450   EXPECT_EQ("CC2", upper_a);
    451 
    452   std::wstring original_w(L"Cc2");
    453   std::wstring upper_w = StringToUpperASCII(original_w);
    454   EXPECT_EQ(L"CC2", upper_w);
    455 }
    456 
    457 TEST(StringUtilTest, LowerCaseEqualsASCII) {
    458   static const struct {
    459     const char*    src_a;
    460     const char*    dst;
    461   } lowercase_cases[] = {
    462     { "FoO", "foo" },
    463     { "foo", "foo" },
    464     { "FOO", "foo" },
    465   };
    466 
    467   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
    468     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
    469                                      lowercase_cases[i].dst));
    470     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
    471                                      lowercase_cases[i].dst));
    472   }
    473 }
    474 
    475 TEST(StringUtilTest, FormatBytesUnlocalized) {
    476   static const struct {
    477     int64 bytes;
    478     const char* expected;
    479   } cases[] = {
    480     // Expected behavior: we show one post-decimal digit when we have
    481     // under two pre-decimal digits, except in cases where it makes no
    482     // sense (zero or bytes).
    483     // Since we switch units once we cross the 1000 mark, this keeps
    484     // the display of file sizes or bytes consistently around three
    485     // digits.
    486     {0, "0 B"},
    487     {512, "512 B"},
    488     {1024*1024, "1.0 MB"},
    489     {1024*1024*1024, "1.0 GB"},
    490     {10LL*1024*1024*1024, "10.0 GB"},
    491     {99LL*1024*1024*1024, "99.0 GB"},
    492     {105LL*1024*1024*1024, "105 GB"},
    493     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
    494     {~(1LL<<63), "8192 PB"},
    495 
    496     {99*1024 + 103, "99.1 kB"},
    497     {1024*1024 + 103, "1.0 MB"},
    498     {1024*1024 + 205 * 1024, "1.2 MB"},
    499     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
    500     {10LL*1024*1024*1024, "10.0 GB"},
    501     {100LL*1024*1024*1024, "100 GB"},
    502   };
    503 
    504   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    505     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
    506               FormatBytesUnlocalized(cases[i].bytes));
    507   }
    508 }
    509 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
    510   static const struct {
    511     const char* str;
    512     string16::size_type start_offset;
    513     const char* find_this;
    514     const char* replace_with;
    515     const char* expected;
    516   } cases[] = {
    517     {"aaa", 0, "a", "b", "bbb"},
    518     {"abb", 0, "ab", "a", "ab"},
    519     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
    520     {"Not found", 0, "x", "0", "Not found"},
    521     {"Not found again", 5, "x", "0", "Not found again"},
    522     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    523      "Four score and seven years agoMakingFour score and seven years agoit"
    524      "Four score and seven years agomuchFour score and seven years agolonger"
    525      "Four score and seven years ago"},
    526     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    527     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
    528     {"abababab", 2, "ab", "c", "abccc"},
    529   };
    530 
    531   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    532     string16 str = ASCIIToUTF16(cases[i].str);
    533     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
    534                                  ASCIIToUTF16(cases[i].find_this),
    535                                  ASCIIToUTF16(cases[i].replace_with));
    536     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    537   }
    538 }
    539 
    540 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
    541   static const struct {
    542     const char* str;
    543     string16::size_type start_offset;
    544     const char* find_this;
    545     const char* replace_with;
    546     const char* expected;
    547   } cases[] = {
    548     {"aaa", 0, "a", "b", "baa"},
    549     {"abb", 0, "ab", "a", "ab"},
    550     {"Removing some substrings inging", 0, "ing", "",
    551       "Remov some substrings inging"},
    552     {"Not found", 0, "x", "0", "Not found"},
    553     {"Not found again", 5, "x", "0", "Not found again"},
    554     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    555      "Four score and seven years agoMaking it much longer "},
    556     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    557     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
    558     {"abababab", 2, "ab", "c", "abcabab"},
    559   };
    560 
    561   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    562     string16 str = ASCIIToUTF16(cases[i].str);
    563     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
    564                                      ASCIIToUTF16(cases[i].find_this),
    565                                      ASCIIToUTF16(cases[i].replace_with));
    566     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    567   }
    568 }
    569 
    570 TEST(StringUtilTest, HexDigitToInt) {
    571   EXPECT_EQ(0, HexDigitToInt('0'));
    572   EXPECT_EQ(1, HexDigitToInt('1'));
    573   EXPECT_EQ(2, HexDigitToInt('2'));
    574   EXPECT_EQ(3, HexDigitToInt('3'));
    575   EXPECT_EQ(4, HexDigitToInt('4'));
    576   EXPECT_EQ(5, HexDigitToInt('5'));
    577   EXPECT_EQ(6, HexDigitToInt('6'));
    578   EXPECT_EQ(7, HexDigitToInt('7'));
    579   EXPECT_EQ(8, HexDigitToInt('8'));
    580   EXPECT_EQ(9, HexDigitToInt('9'));
    581   EXPECT_EQ(10, HexDigitToInt('A'));
    582   EXPECT_EQ(11, HexDigitToInt('B'));
    583   EXPECT_EQ(12, HexDigitToInt('C'));
    584   EXPECT_EQ(13, HexDigitToInt('D'));
    585   EXPECT_EQ(14, HexDigitToInt('E'));
    586   EXPECT_EQ(15, HexDigitToInt('F'));
    587 
    588   // Verify the lower case as well.
    589   EXPECT_EQ(10, HexDigitToInt('a'));
    590   EXPECT_EQ(11, HexDigitToInt('b'));
    591   EXPECT_EQ(12, HexDigitToInt('c'));
    592   EXPECT_EQ(13, HexDigitToInt('d'));
    593   EXPECT_EQ(14, HexDigitToInt('e'));
    594   EXPECT_EQ(15, HexDigitToInt('f'));
    595 }
    596 
    597 // This checks where we can use the assignment operator for a va_list. We need
    598 // a way to do this since Visual C doesn't support va_copy, but assignment on
    599 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
    600 // capability.
    601 static void VariableArgsFunc(const char* format, ...) {
    602   va_list org;
    603   va_start(org, format);
    604 
    605   va_list dup;
    606   GG_VA_COPY(dup, org);
    607   int i1 = va_arg(org, int);
    608   int j1 = va_arg(org, int);
    609   char* s1 = va_arg(org, char*);
    610   double d1 = va_arg(org, double);
    611   va_end(org);
    612 
    613   int i2 = va_arg(dup, int);
    614   int j2 = va_arg(dup, int);
    615   char* s2 = va_arg(dup, char*);
    616   double d2 = va_arg(dup, double);
    617 
    618   EXPECT_EQ(i1, i2);
    619   EXPECT_EQ(j1, j2);
    620   EXPECT_STREQ(s1, s2);
    621   EXPECT_EQ(d1, d2);
    622 
    623   va_end(dup);
    624 }
    625 
    626 TEST(StringUtilTest, VAList) {
    627   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
    628 }
    629 
    630 // Test for Tokenize
    631 template <typename STR>
    632 void TokenizeTest() {
    633   std::vector<STR> r;
    634   size_t size;
    635 
    636   size = Tokenize(STR("This is a string"), STR(" "), &r);
    637   EXPECT_EQ(4U, size);
    638   ASSERT_EQ(4U, r.size());
    639   EXPECT_EQ(r[0], STR("This"));
    640   EXPECT_EQ(r[1], STR("is"));
    641   EXPECT_EQ(r[2], STR("a"));
    642   EXPECT_EQ(r[3], STR("string"));
    643   r.clear();
    644 
    645   size = Tokenize(STR("one,two,three"), STR(","), &r);
    646   EXPECT_EQ(3U, size);
    647   ASSERT_EQ(3U, r.size());
    648   EXPECT_EQ(r[0], STR("one"));
    649   EXPECT_EQ(r[1], STR("two"));
    650   EXPECT_EQ(r[2], STR("three"));
    651   r.clear();
    652 
    653   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
    654   EXPECT_EQ(3U, size);
    655   ASSERT_EQ(3U, r.size());
    656   EXPECT_EQ(r[0], STR("one"));
    657   EXPECT_EQ(r[1], STR("two"));
    658   EXPECT_EQ(r[2], STR("three;four"));
    659   r.clear();
    660 
    661   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
    662   EXPECT_EQ(4U, size);
    663   ASSERT_EQ(4U, r.size());
    664   EXPECT_EQ(r[0], STR("one"));
    665   EXPECT_EQ(r[1], STR("two"));
    666   EXPECT_EQ(r[2], STR("three"));
    667   EXPECT_EQ(r[3], STR("four"));
    668   r.clear();
    669 
    670   size = Tokenize(STR("one, two, three"), STR(","), &r);
    671   EXPECT_EQ(3U, size);
    672   ASSERT_EQ(3U, r.size());
    673   EXPECT_EQ(r[0], STR("one"));
    674   EXPECT_EQ(r[1], STR(" two"));
    675   EXPECT_EQ(r[2], STR(" three"));
    676   r.clear();
    677 
    678   size = Tokenize(STR("one, two, three, "), STR(","), &r);
    679   EXPECT_EQ(4U, size);
    680   ASSERT_EQ(4U, r.size());
    681   EXPECT_EQ(r[0], STR("one"));
    682   EXPECT_EQ(r[1], STR(" two"));
    683   EXPECT_EQ(r[2], STR(" three"));
    684   EXPECT_EQ(r[3], STR(" "));
    685   r.clear();
    686 
    687   size = Tokenize(STR("one, two, three,"), STR(","), &r);
    688   EXPECT_EQ(3U, size);
    689   ASSERT_EQ(3U, r.size());
    690   EXPECT_EQ(r[0], STR("one"));
    691   EXPECT_EQ(r[1], STR(" two"));
    692   EXPECT_EQ(r[2], STR(" three"));
    693   r.clear();
    694 
    695   size = Tokenize(STR(), STR(","), &r);
    696   EXPECT_EQ(0U, size);
    697   ASSERT_EQ(0U, r.size());
    698   r.clear();
    699 
    700   size = Tokenize(STR(","), STR(","), &r);
    701   EXPECT_EQ(0U, size);
    702   ASSERT_EQ(0U, r.size());
    703   r.clear();
    704 
    705   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
    706   EXPECT_EQ(0U, size);
    707   ASSERT_EQ(0U, r.size());
    708   r.clear();
    709 
    710   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
    711   EXPECT_EQ(1U, size);
    712   ASSERT_EQ(1U, r.size());
    713   EXPECT_EQ(r[0], STR("a"));
    714   r.clear();
    715 
    716   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
    717   EXPECT_EQ(2U, size);
    718   ASSERT_EQ(2U, r.size());
    719   EXPECT_EQ(r[0], STR("\ta\t"));
    720   EXPECT_EQ(r[1], STR("b\tcc"));
    721   r.clear();
    722 }
    723 
    724 TEST(StringUtilTest, TokenizeStdString) {
    725   TokenizeTest<std::string>();
    726 }
    727 
    728 TEST(StringUtilTest, TokenizeStringPiece) {
    729   TokenizeTest<base::StringPiece>();
    730 }
    731 
    732 // Test for JoinString
    733 TEST(StringUtilTest, JoinString) {
    734   std::vector<std::string> in;
    735   EXPECT_EQ("", JoinString(in, ','));
    736 
    737   in.push_back("a");
    738   EXPECT_EQ("a", JoinString(in, ','));
    739 
    740   in.push_back("b");
    741   in.push_back("c");
    742   EXPECT_EQ("a,b,c", JoinString(in, ','));
    743 
    744   in.push_back(std::string());
    745   EXPECT_EQ("a,b,c,", JoinString(in, ','));
    746   in.push_back(" ");
    747   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
    748 }
    749 
    750 // Test for JoinString overloaded with std::string separator
    751 TEST(StringUtilTest, JoinStringWithString) {
    752   std::string separator(", ");
    753   std::vector<std::string> parts;
    754   EXPECT_EQ(std::string(), JoinString(parts, separator));
    755 
    756   parts.push_back("a");
    757   EXPECT_EQ("a", JoinString(parts, separator));
    758 
    759   parts.push_back("b");
    760   parts.push_back("c");
    761   EXPECT_EQ("a, b, c", JoinString(parts, separator));
    762 
    763   parts.push_back(std::string());
    764   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
    765   parts.push_back(" ");
    766   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
    767 }
    768 
    769 // Test for JoinString overloaded with string16 separator
    770 TEST(StringUtilTest, JoinStringWithString16) {
    771   string16 separator = ASCIIToUTF16(", ");
    772   std::vector<string16> parts;
    773   EXPECT_EQ(string16(), JoinString(parts, separator));
    774 
    775   parts.push_back(ASCIIToUTF16("a"));
    776   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
    777 
    778   parts.push_back(ASCIIToUTF16("b"));
    779   parts.push_back(ASCIIToUTF16("c"));
    780   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
    781 
    782   parts.push_back(ASCIIToUTF16(""));
    783   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
    784   parts.push_back(ASCIIToUTF16(" "));
    785   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
    786 }
    787 
    788 TEST(StringUtilTest, StartsWith) {
    789   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
    790   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
    791   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
    792   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
    793   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
    794   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
    795   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", false));
    796   EXPECT_FALSE(StartsWithASCII(std::string(), "javascript", true));
    797   EXPECT_TRUE(StartsWithASCII("java", std::string(), false));
    798   EXPECT_TRUE(StartsWithASCII("java", std::string(), true));
    799 
    800   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
    801                          ASCIIToUTF16("javascript"), true));
    802   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
    803                           ASCIIToUTF16("javascript"), true));
    804   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
    805                          ASCIIToUTF16("javascript"), false));
    806   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
    807                          ASCIIToUTF16("javascript"), false));
    808   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
    809                           ASCIIToUTF16("javascript"), true));
    810   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"),
    811                           ASCIIToUTF16("javascript"), false));
    812   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), false));
    813   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), true));
    814   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), false));
    815   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), true));
    816 }
    817 
    818 TEST(StringUtilTest, EndsWith) {
    819   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
    820                        ASCIIToUTF16(".plugin"), true));
    821   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
    822                         ASCIIToUTF16(".plugin"), true));
    823   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"),
    824                        ASCIIToUTF16(".plugin"), false));
    825   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"),
    826                        ASCIIToUTF16(".plugin"), false));
    827   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), true));
    828   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), false));
    829   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
    830                         ASCIIToUTF16(".plugin"), true));
    831   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"),
    832                         ASCIIToUTF16(".plugin"), false));
    833   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), false));
    834   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), true));
    835   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), false));
    836   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), true));
    837   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"),
    838                        ASCIIToUTF16(".plugin"), false));
    839   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), true));
    840   EXPECT_TRUE(EndsWith(string16(), string16(), false));
    841   EXPECT_TRUE(EndsWith(string16(), string16(), true));
    842 }
    843 
    844 TEST(StringUtilTest, GetStringFWithOffsets) {
    845   std::vector<string16> subst;
    846   subst.push_back(ASCIIToUTF16("1"));
    847   subst.push_back(ASCIIToUTF16("2"));
    848   std::vector<size_t> offsets;
    849 
    850   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
    851                             subst,
    852                             &offsets);
    853   EXPECT_EQ(2U, offsets.size());
    854   EXPECT_EQ(7U, offsets[0]);
    855   EXPECT_EQ(25U, offsets[1]);
    856   offsets.clear();
    857 
    858   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
    859                             subst,
    860                             &offsets);
    861   EXPECT_EQ(2U, offsets.size());
    862   EXPECT_EQ(25U, offsets[0]);
    863   EXPECT_EQ(7U, offsets[1]);
    864   offsets.clear();
    865 }
    866 
    867 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
    868   // Test whether replacestringplaceholders works as expected when there
    869   // are fewer inputs than outputs.
    870   std::vector<string16> subst;
    871   subst.push_back(ASCIIToUTF16("9a"));
    872   subst.push_back(ASCIIToUTF16("8b"));
    873   subst.push_back(ASCIIToUTF16("7c"));
    874 
    875   string16 formatted =
    876       ReplaceStringPlaceholders(
    877           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
    878 
    879   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
    880 }
    881 
    882 TEST(StringUtilTest, ReplaceStringPlaceholders) {
    883   std::vector<string16> subst;
    884   subst.push_back(ASCIIToUTF16("9a"));
    885   subst.push_back(ASCIIToUTF16("8b"));
    886   subst.push_back(ASCIIToUTF16("7c"));
    887   subst.push_back(ASCIIToUTF16("6d"));
    888   subst.push_back(ASCIIToUTF16("5e"));
    889   subst.push_back(ASCIIToUTF16("4f"));
    890   subst.push_back(ASCIIToUTF16("3g"));
    891   subst.push_back(ASCIIToUTF16("2h"));
    892   subst.push_back(ASCIIToUTF16("1i"));
    893 
    894   string16 formatted =
    895       ReplaceStringPlaceholders(
    896           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
    897 
    898   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
    899 }
    900 
    901 TEST(StringUtilTest, ReplaceStringPlaceholdersMoreThan9Replacements) {
    902   std::vector<string16> subst;
    903   subst.push_back(ASCIIToUTF16("9a"));
    904   subst.push_back(ASCIIToUTF16("8b"));
    905   subst.push_back(ASCIIToUTF16("7c"));
    906   subst.push_back(ASCIIToUTF16("6d"));
    907   subst.push_back(ASCIIToUTF16("5e"));
    908   subst.push_back(ASCIIToUTF16("4f"));
    909   subst.push_back(ASCIIToUTF16("3g"));
    910   subst.push_back(ASCIIToUTF16("2h"));
    911   subst.push_back(ASCIIToUTF16("1i"));
    912   subst.push_back(ASCIIToUTF16("0j"));
    913   subst.push_back(ASCIIToUTF16("-1k"));
    914   subst.push_back(ASCIIToUTF16("-2l"));
    915   subst.push_back(ASCIIToUTF16("-3m"));
    916   subst.push_back(ASCIIToUTF16("-4n"));
    917 
    918   string16 formatted =
    919       ReplaceStringPlaceholders(
    920           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i,"
    921                        "$10j,$11k,$12l,$13m,$14n,$1"), subst, NULL);
    922 
    923   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,"
    924                                     "1ii,0jj,-1kk,-2ll,-3mm,-4nn,9a"));
    925 }
    926 
    927 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
    928   std::vector<std::string> subst;
    929   subst.push_back("9a");
    930   subst.push_back("8b");
    931   subst.push_back("7c");
    932   subst.push_back("6d");
    933   subst.push_back("5e");
    934   subst.push_back("4f");
    935   subst.push_back("3g");
    936   subst.push_back("2h");
    937   subst.push_back("1i");
    938 
    939   std::string formatted =
    940       ReplaceStringPlaceholders(
    941           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
    942 
    943   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
    944 }
    945 
    946 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
    947   std::vector<std::string> subst;
    948   subst.push_back("a");
    949   subst.push_back("b");
    950   subst.push_back("c");
    951   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
    952             "$1 $$2 $$$3");
    953 }
    954 
    955 TEST(StringUtilTest, MatchPatternTest) {
    956   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
    957   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
    958   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
    959   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
    960   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
    961   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
    962   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
    963   EXPECT_FALSE(MatchPattern("", "*.*"));
    964   EXPECT_TRUE(MatchPattern("", "*"));
    965   EXPECT_TRUE(MatchPattern("", "?"));
    966   EXPECT_TRUE(MatchPattern("", ""));
    967   EXPECT_FALSE(MatchPattern("Hello", ""));
    968   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
    969   // Stop after a certain recursion depth.
    970   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
    971 
    972   // Test UTF8 matching.
    973   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
    974   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
    975   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
    976   // Invalid sequences should be handled as a single invalid character.
    977   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
    978   // If the pattern has invalid characters, it shouldn't match anything.
    979   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
    980 
    981   // Test UTF16 character matching.
    982   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
    983                            UTF8ToUTF16("*.com")));
    984   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
    985                            UTF8ToUTF16("He??o\\*1*")));
    986 
    987   // This test verifies that consecutive wild cards are collapsed into 1
    988   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
    989   // recursion depth).
    990   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
    991                            UTF8ToUTF16("He********************************o")));
    992 }
    993 
    994 TEST(StringUtilTest, LcpyTest) {
    995   // Test the normal case where we fit in our buffer.
    996   {
    997     char dst[10];
    998     wchar_t wdst[10];
    999     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1000     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1001     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1002     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1003   }
   1004 
   1005   // Test dst_size == 0, nothing should be written to |dst| and we should
   1006   // have the equivalent of strlen(src).
   1007   {
   1008     char dst[2] = {1, 2};
   1009     wchar_t wdst[2] = {1, 2};
   1010     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
   1011     EXPECT_EQ(1, dst[0]);
   1012     EXPECT_EQ(2, dst[1]);
   1013     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
   1014     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
   1015     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
   1016   }
   1017 
   1018   // Test the case were we _just_ competely fit including the null.
   1019   {
   1020     char dst[8];
   1021     wchar_t wdst[8];
   1022     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1023     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1024     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1025     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1026   }
   1027 
   1028   // Test the case were we we are one smaller, so we can't fit the null.
   1029   {
   1030     char dst[7];
   1031     wchar_t wdst[7];
   1032     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1033     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
   1034     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1035     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
   1036   }
   1037 
   1038   // Test the case were we are just too small.
   1039   {
   1040     char dst[3];
   1041     wchar_t wdst[3];
   1042     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1043     EXPECT_EQ(0, memcmp(dst, "ab", 3));
   1044     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1045     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
   1046   }
   1047 }
   1048 
   1049 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
   1050   static const struct {
   1051     const wchar_t* input;
   1052     bool portable;
   1053   } cases[] = {
   1054     { L"%ls", true },
   1055     { L"%s", false },
   1056     { L"%S", false },
   1057     { L"%lS", false },
   1058     { L"Hello, %s", false },
   1059     { L"%lc", true },
   1060     { L"%c", false },
   1061     { L"%C", false },
   1062     { L"%lC", false },
   1063     { L"%ls %s", false },
   1064     { L"%s %ls", false },
   1065     { L"%s %ls %s", false },
   1066     { L"%f", true },
   1067     { L"%f %F", false },
   1068     { L"%d %D", false },
   1069     { L"%o %O", false },
   1070     { L"%u %U", false },
   1071     { L"%f %d %o %u", true },
   1072     { L"%-8d (%02.1f%)", true },
   1073     { L"% 10s", false },
   1074     { L"% 10ls", true }
   1075   };
   1076   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
   1077     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
   1078 }
   1079 
   1080 TEST(StringUtilTest, RemoveChars) {
   1081   const char* kRemoveChars = "-/+*";
   1082   std::string input = "A-+bc/d!*";
   1083   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
   1084   EXPECT_EQ("Abcd!", input);
   1085 
   1086   // No characters match kRemoveChars.
   1087   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1088   EXPECT_EQ("Abcd!", input);
   1089 
   1090   // Empty string.
   1091   input.clear();
   1092   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1093   EXPECT_EQ(std::string(), input);
   1094 }
   1095 
   1096 TEST(StringUtilTest, ReplaceChars) {
   1097   struct TestData {
   1098     const char* input;
   1099     const char* replace_chars;
   1100     const char* replace_with;
   1101     const char* output;
   1102     bool result;
   1103   } cases[] = {
   1104     { "", "", "", "", false },
   1105     { "test", "", "", "test", false },
   1106     { "test", "", "!", "test", false },
   1107     { "test", "z", "!", "test", false },
   1108     { "test", "e", "!", "t!st", true },
   1109     { "test", "e", "!?", "t!?st", true },
   1110     { "test", "ez", "!", "t!st", true },
   1111     { "test", "zed", "!?", "t!?st", true },
   1112     { "test", "t", "!?", "!?es!?", true },
   1113     { "test", "et", "!>", "!>!>s!>", true },
   1114     { "test", "zest", "!", "!!!!", true },
   1115     { "test", "szt", "!", "!e!!", true },
   1116     { "test", "t", "test", "testestest", true },
   1117   };
   1118 
   1119   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
   1120     std::string output;
   1121     bool result = ReplaceChars(cases[i].input,
   1122                                cases[i].replace_chars,
   1123                                cases[i].replace_with,
   1124                                &output);
   1125     EXPECT_EQ(cases[i].result, result);
   1126     EXPECT_EQ(cases[i].output, output);
   1127   }
   1128 }
   1129 
   1130 TEST(StringUtilTest, ContainsOnlyChars) {
   1131   // Providing an empty list of characters should return false but for the empty
   1132   // string.
   1133   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
   1134   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
   1135 
   1136   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
   1137   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
   1138   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
   1139   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
   1140   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
   1141 
   1142   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
   1143   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
   1144   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
   1145   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
   1146   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
   1147   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
   1148 
   1149   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
   1150   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
   1151   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
   1152   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
   1153   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
   1154   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
   1155                                   kWhitespaceUTF16));
   1156 }
   1157 
   1158 class WriteIntoTest : public testing::Test {
   1159  protected:
   1160   static void WritesCorrectly(size_t num_chars) {
   1161     std::string buffer;
   1162     char kOriginal[] = "supercali";
   1163     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
   1164     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
   1165     // string at the first \0.
   1166     EXPECT_EQ(std::string(kOriginal,
   1167                           std::min(num_chars, arraysize(kOriginal) - 1)),
   1168               std::string(buffer.c_str()));
   1169     EXPECT_EQ(num_chars, buffer.size());
   1170   }
   1171 };
   1172 
   1173 TEST_F(WriteIntoTest, WriteInto) {
   1174   // Validate that WriteInto reserves enough space and
   1175   // sizes a string correctly.
   1176   WritesCorrectly(1);
   1177   WritesCorrectly(2);
   1178   WritesCorrectly(5000);
   1179 
   1180   // Validate that WriteInto doesn't modify other strings
   1181   // when using a Copy-on-Write implementation.
   1182   const char kLive[] = "live";
   1183   const char kDead[] = "dead";
   1184   const std::string live = kLive;
   1185   std::string dead = live;
   1186   strncpy(WriteInto(&dead, 5), kDead, 4);
   1187   EXPECT_EQ(kDead, dead);
   1188   EXPECT_EQ(4u, dead.size());
   1189   EXPECT_EQ(kLive, live);
   1190   EXPECT_EQ(4u, live.size());
   1191 }
   1192 
   1193 }  // namespace base
   1194