Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <math.h>
      6 #include <stdarg.h>
      7 
      8 #include <limits>
      9 #include <sstream>
     10 
     11 #include "base/basictypes.h"
     12 #include "base/string_util.h"
     13 #include "base/utf_string_conversions.h"
     14 #include "testing/gmock/include/gmock/gmock.h"
     15 #include "testing/gtest/include/gtest/gtest.h"
     16 
     17 using ::testing::ElementsAre;
     18 
     19 namespace base {
     20 
     21 static const struct trim_case {
     22   const wchar_t* input;
     23   const TrimPositions positions;
     24   const wchar_t* output;
     25   const TrimPositions return_value;
     26 } trim_cases[] = {
     27   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
     28   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
     29   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
     30   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
     31   {L"", TRIM_ALL, L"", TRIM_NONE},
     32   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
     33   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
     34   {L"  ", TRIM_ALL, L"", TRIM_ALL},
     35   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
     36   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
     37 };
     38 
     39 static const struct trim_case_ascii {
     40   const char* input;
     41   const TrimPositions positions;
     42   const char* output;
     43   const TrimPositions return_value;
     44 } trim_cases_ascii[] = {
     45   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
     46   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
     47   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
     48   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
     49   {"", TRIM_ALL, "", TRIM_NONE},
     50   {"  ", TRIM_LEADING, "", TRIM_LEADING},
     51   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
     52   {"  ", TRIM_ALL, "", TRIM_ALL},
     53   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
     54 };
     55 
     56 namespace {
     57 
     58 // Helper used to test TruncateUTF8ToByteSize.
     59 bool Truncated(const std::string& input, const size_t byte_size,
     60                std::string* output) {
     61     size_t prev = input.length();
     62     TruncateUTF8ToByteSize(input, byte_size, output);
     63     return prev != output->length();
     64 }
     65 
     66 }  // namespace
     67 
     68 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
     69   std::string output;
     70 
     71   // Empty strings and invalid byte_size arguments
     72   EXPECT_FALSE(Truncated("", 0, &output));
     73   EXPECT_EQ(output, "");
     74   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
     75   EXPECT_EQ(output, "");
     76   EXPECT_FALSE(Truncated("\xe1\x80\xbf", -1, &output));
     77   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
     78 
     79   // Testing the truncation of valid UTF8 correctly
     80   EXPECT_TRUE(Truncated("abc", 2, &output));
     81   EXPECT_EQ(output, "ab");
     82   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
     83   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     84   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
     85   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     86   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
     87   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
     88 
     89   {
     90     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
     91     const std::string array_string(array, arraysize(array));
     92     EXPECT_TRUE(Truncated(array_string, 4, &output));
     93     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
     94   }
     95 
     96   {
     97     const char array[] = "\x00\xc2\x81\xc2\x81";
     98     const std::string array_string(array, arraysize(array));
     99     EXPECT_TRUE(Truncated(array_string, 4, &output));
    100     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
    101   }
    102 
    103   // Testing invalid UTF8
    104   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
    105   EXPECT_EQ(output.compare(""), 0);
    106   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
    107   EXPECT_EQ(output.compare(""), 0);
    108   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
    109   EXPECT_EQ(output.compare(""), 0);
    110 
    111   // Testing invalid UTF8 mixed with valid UTF8
    112   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
    113   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
    114   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
    115   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
    116   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
    117               10, &output));
    118   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
    119   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
    120               10, &output));
    121   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
    122   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
    123   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
    124 
    125   // Overlong sequences
    126   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
    127   EXPECT_EQ(output.compare(""), 0);
    128   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
    129   EXPECT_EQ(output.compare(""), 0);
    130   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
    131   EXPECT_EQ(output.compare(""), 0);
    132   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
    133   EXPECT_EQ(output.compare(""), 0);
    134   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
    135   EXPECT_EQ(output.compare(""), 0);
    136   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
    137   EXPECT_EQ(output.compare(""), 0);
    138   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
    139   EXPECT_EQ(output.compare(""), 0);
    140   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
    141   EXPECT_EQ(output.compare(""), 0);
    142   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
    143   EXPECT_EQ(output.compare(""), 0);
    144   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
    145   EXPECT_EQ(output.compare(""), 0);
    146   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
    147   EXPECT_EQ(output.compare(""), 0);
    148 
    149   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    150   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
    151   EXPECT_EQ(output.compare(""), 0);
    152   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
    153   EXPECT_EQ(output.compare(""), 0);
    154   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
    155   EXPECT_EQ(output.compare(""), 0);
    156 
    157   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    158   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
    159   EXPECT_EQ(output.compare(""), 0);
    160   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
    161   EXPECT_EQ(output.compare(""), 0);
    162 
    163   {
    164     const char array[] = "\x00\x00\xfe\xff";
    165     const std::string array_string(array, arraysize(array));
    166     EXPECT_TRUE(Truncated(array_string, 4, &output));
    167     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
    168   }
    169 
    170   // Variants on the previous test
    171   {
    172     const char array[] = "\xff\xfe\x00\x00";
    173     const std::string array_string(array, 4);
    174     EXPECT_FALSE(Truncated(array_string, 4, &output));
    175     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
    176   }
    177   {
    178     const char array[] = "\xff\x00\x00\xfe";
    179     const std::string array_string(array, arraysize(array));
    180     EXPECT_TRUE(Truncated(array_string, 4, &output));
    181     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
    182   }
    183 
    184   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    185   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
    186   EXPECT_EQ(output.compare(""), 0);
    187   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
    188   EXPECT_EQ(output.compare(""), 0);
    189   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
    190   EXPECT_EQ(output.compare(""), 0);
    191   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
    192   EXPECT_EQ(output.compare(""), 0);
    193   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
    194   EXPECT_EQ(output.compare(""), 0);
    195 
    196   // Strings in legacy encodings that are valid in UTF-8, but
    197   // are invalid as UTF-8 in real data.
    198   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
    199   EXPECT_EQ(output.compare("caf"), 0);
    200   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
    201   EXPECT_EQ(output.compare(""), 0);
    202   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
    203   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    204   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
    205               &output));
    206   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    207 
    208   // Testing using the same string as input and output.
    209   EXPECT_FALSE(Truncated(output, 4, &output));
    210   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    211   EXPECT_TRUE(Truncated(output, 3, &output));
    212   EXPECT_EQ(output.compare("\xa7\x41"), 0);
    213 
    214   // "abc" with U+201[CD] in windows-125[0-8]
    215   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
    216   EXPECT_EQ(output.compare("\x93" "abc"), 0);
    217 
    218   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    219   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
    220   EXPECT_EQ(output.compare(""), 0);
    221 
    222   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    223   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
    224   EXPECT_EQ(output.compare(""), 0);
    225 }
    226 
    227 TEST(StringUtilTest, TrimWhitespace) {
    228   std::wstring output;  // Allow contents to carry over to next testcase
    229   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
    230     const trim_case& value = trim_cases[i];
    231     EXPECT_EQ(value.return_value,
    232               TrimWhitespace(value.input, value.positions, &output));
    233     EXPECT_EQ(value.output, output);
    234   }
    235 
    236   // Test that TrimWhitespace() can take the same string for input and output
    237   output = L"  This is a test \r\n";
    238   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    239   EXPECT_EQ(L"This is a test", output);
    240 
    241   // Once more, but with a string of whitespace
    242   output = L"  \r\n";
    243   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    244   EXPECT_EQ(L"", output);
    245 
    246   std::string output_ascii;
    247   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
    248     const trim_case_ascii& value = trim_cases_ascii[i];
    249     EXPECT_EQ(value.return_value,
    250               TrimWhitespace(value.input, value.positions, &output_ascii));
    251     EXPECT_EQ(value.output, output_ascii);
    252   }
    253 }
    254 
    255 static const struct collapse_case {
    256   const wchar_t* input;
    257   const bool trim;
    258   const wchar_t* output;
    259 } collapse_cases[] = {
    260   {L" Google Video ", false, L"Google Video"},
    261   {L"Google Video", false, L"Google Video"},
    262   {L"", false, L""},
    263   {L"  ", false, L""},
    264   {L"\t\rTest String\n", false, L"Test String"},
    265   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
    266   {L"    Test     \n  \t String    ", false, L"Test String"},
    267   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
    268   {L"   Test String", false, L"Test String"},
    269   {L"Test String    ", false, L"Test String"},
    270   {L"Test String", false, L"Test String"},
    271   {L"", true, L""},
    272   {L"\n", true, L""},
    273   {L"  \r  ", true, L""},
    274   {L"\nFoo", true, L"Foo"},
    275   {L"\r  Foo  ", true, L"Foo"},
    276   {L" Foo bar ", true, L"Foo bar"},
    277   {L"  \tFoo  bar  \n", true, L"Foo bar"},
    278   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
    279 };
    280 
    281 TEST(StringUtilTest, CollapseWhitespace) {
    282   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
    283     const collapse_case& value = collapse_cases[i];
    284     EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
    285   }
    286 }
    287 
    288 static const struct collapse_case_ascii {
    289   const char* input;
    290   const bool trim;
    291   const char* output;
    292 } collapse_cases_ascii[] = {
    293   {" Google Video ", false, "Google Video"},
    294   {"Google Video", false, "Google Video"},
    295   {"", false, ""},
    296   {"  ", false, ""},
    297   {"\t\rTest String\n", false, "Test String"},
    298   {"    Test     \n  \t String    ", false, "Test String"},
    299   {"   Test String", false, "Test String"},
    300   {"Test String    ", false, "Test String"},
    301   {"Test String", false, "Test String"},
    302   {"", true, ""},
    303   {"\n", true, ""},
    304   {"  \r  ", true, ""},
    305   {"\nFoo", true, "Foo"},
    306   {"\r  Foo  ", true, "Foo"},
    307   {" Foo bar ", true, "Foo bar"},
    308   {"  \tFoo  bar  \n", true, "Foo bar"},
    309   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
    310 };
    311 
    312 TEST(StringUtilTest, CollapseWhitespaceASCII) {
    313   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
    314     const collapse_case_ascii& value = collapse_cases_ascii[i];
    315     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
    316   }
    317 }
    318 
    319 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
    320   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
    321   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
    322   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
    323   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
    324   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
    325   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
    326 }
    327 
    328 TEST(StringUtilTest, ContainsOnlyWhitespace) {
    329   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
    330   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
    331   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
    332   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
    333   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
    334   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
    335 }
    336 
    337 TEST(StringUtilTest, IsStringUTF8) {
    338   EXPECT_TRUE(IsStringUTF8("abc"));
    339   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
    340   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
    341   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
    342   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
    343   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
    344 
    345   // surrogate code points
    346   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
    347   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
    348   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
    349 
    350   // overlong sequences
    351   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
    352   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
    353   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
    354   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
    355   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
    356   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
    357   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
    358   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
    359   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
    360   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
    361   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
    362 
    363   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    364   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
    365   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
    366   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
    367 
    368   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    369   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
    370   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
    371   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
    372   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
    373 
    374   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    375   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
    376   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
    377   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
    378   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
    379   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
    380   // Strings in legacy encodings. We can certainly make up strings
    381   // in a legacy encoding that are valid in UTF-8, but in real data,
    382   // most of them are invalid as UTF-8.
    383   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
    384   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
    385   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
    386   // "abc" with U+201[CD] in windows-125[0-8]
    387   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
    388   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    389   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
    390   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    391   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
    392 
    393   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
    394   // representation, and the second uses a 2-byte sequence. The second version
    395   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
    396   // given codepoint must be used.
    397   static const char kEmbeddedNull[] = "embedded\0null";
    398   EXPECT_TRUE(IsStringUTF8(
    399       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
    400   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
    401 }
    402 
    403 TEST(StringUtilTest, ConvertASCII) {
    404   static const char* char_cases[] = {
    405     "Google Video",
    406     "Hello, world\n",
    407     "0123ABCDwxyz \a\b\t\r\n!+,.~"
    408   };
    409 
    410   static const wchar_t* const wchar_cases[] = {
    411     L"Google Video",
    412     L"Hello, world\n",
    413     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
    414   };
    415 
    416   for (size_t i = 0; i < arraysize(char_cases); ++i) {
    417     EXPECT_TRUE(IsStringASCII(char_cases[i]));
    418     std::wstring wide = ASCIIToWide(char_cases[i]);
    419     EXPECT_EQ(wchar_cases[i], wide);
    420 
    421     EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
    422     std::string ascii = WideToASCII(wchar_cases[i]);
    423     EXPECT_EQ(char_cases[i], ascii);
    424   }
    425 
    426   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
    427   EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
    428 
    429   // Convert empty strings.
    430   std::wstring wempty;
    431   std::string empty;
    432   EXPECT_EQ(empty, WideToASCII(wempty));
    433   EXPECT_EQ(wempty, ASCIIToWide(empty));
    434 
    435   // Convert strings with an embedded NUL character.
    436   const char chars_with_nul[] = "test\0string";
    437   const int length_with_nul = arraysize(chars_with_nul) - 1;
    438   std::string string_with_nul(chars_with_nul, length_with_nul);
    439   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
    440   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
    441             wide_with_nul.length());
    442   std::string narrow_with_nul = WideToASCII(wide_with_nul);
    443   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
    444             narrow_with_nul.length());
    445   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
    446 }
    447 
    448 TEST(StringUtilTest, ToUpperASCII) {
    449   EXPECT_EQ('C', ToUpperASCII('C'));
    450   EXPECT_EQ('C', ToUpperASCII('c'));
    451   EXPECT_EQ('2', ToUpperASCII('2'));
    452 
    453   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
    454   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
    455   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
    456 
    457   std::string in_place_a("Cc2");
    458   StringToUpperASCII(&in_place_a);
    459   EXPECT_EQ("CC2", in_place_a);
    460 
    461   std::wstring in_place_w(L"Cc2");
    462   StringToUpperASCII(&in_place_w);
    463   EXPECT_EQ(L"CC2", in_place_w);
    464 
    465   std::string original_a("Cc2");
    466   std::string upper_a = StringToUpperASCII(original_a);
    467   EXPECT_EQ("CC2", upper_a);
    468 
    469   std::wstring original_w(L"Cc2");
    470   std::wstring upper_w = StringToUpperASCII(original_w);
    471   EXPECT_EQ(L"CC2", upper_w);
    472 }
    473 
    474 static const struct {
    475   const wchar_t* src_w;
    476   const char*    src_a;
    477   const char*    dst;
    478 } lowercase_cases[] = {
    479   {L"FoO", "FoO", "foo"},
    480   {L"foo", "foo", "foo"},
    481   {L"FOO", "FOO", "foo"},
    482 };
    483 
    484 TEST(StringUtilTest, LowerCaseEqualsASCII) {
    485   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
    486     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
    487                                      lowercase_cases[i].dst));
    488     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
    489                                      lowercase_cases[i].dst));
    490   }
    491 }
    492 
    493 TEST(StringUtilTest, GetByteDisplayUnits) {
    494   static const struct {
    495     int64 bytes;
    496     DataUnits expected;
    497   } cases[] = {
    498     {0, DATA_UNITS_BYTE},
    499     {512, DATA_UNITS_BYTE},
    500     {10*1024, DATA_UNITS_KIBIBYTE},
    501     {10*1024*1024, DATA_UNITS_MEBIBYTE},
    502     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
    503     {~(1LL<<63), DATA_UNITS_GIBIBYTE},
    504 #ifdef NDEBUG
    505     {-1, DATA_UNITS_BYTE},
    506 #endif
    507   };
    508 
    509   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
    510     EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
    511 }
    512 
    513 TEST(StringUtilTest, FormatBytes) {
    514   static const struct {
    515     int64 bytes;
    516     DataUnits units;
    517     const char* expected;
    518     const char* expected_with_units;
    519   } cases[] = {
    520     // Expected behavior: we show one post-decimal digit when we have
    521     // under two pre-decimal digits, except in cases where it makes no
    522     // sense (zero or bytes).
    523     // Since we switch units once we cross the 1000 mark, this keeps
    524     // the display of file sizes or bytes consistently around three
    525     // digits.
    526     {0, DATA_UNITS_BYTE, "0", "0 B"},
    527     {512, DATA_UNITS_BYTE, "512", "512 B"},
    528     {512, DATA_UNITS_KIBIBYTE, "0.5", "0.5 kB"},
    529     {1024*1024, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
    530     {1024*1024, DATA_UNITS_MEBIBYTE, "1.0", "1.0 MB"},
    531     {1024*1024*1024, DATA_UNITS_GIBIBYTE, "1.0", "1.0 GB"},
    532     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
    533     {99LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "99.0", "99.0 GB"},
    534     {105LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "105", "105 GB"},
    535     {105LL*1024*1024*1024 + 500LL*1024*1024, DATA_UNITS_GIBIBYTE,
    536      "105", "105 GB"},
    537     {~(1LL<<63), DATA_UNITS_GIBIBYTE, "8589934592", "8589934592 GB"},
    538 
    539     {99*1024 + 103, DATA_UNITS_KIBIBYTE, "99.1", "99.1 kB"},
    540     {1024*1024 + 103, DATA_UNITS_KIBIBYTE, "1024", "1024 kB"},
    541     {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, "1.2", "1.2 MB"},
    542     {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
    543      "1.9", "1.9 GB"},
    544     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "10.0", "10.0 GB"},
    545     {100LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, "100", "100 GB"},
    546 #ifdef NDEBUG
    547     {-1, DATA_UNITS_BYTE, "", ""},
    548 #endif
    549   };
    550 
    551   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    552     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
    553               FormatBytes(cases[i].bytes, cases[i].units, false));
    554     EXPECT_EQ(ASCIIToUTF16(cases[i].expected_with_units),
    555               FormatBytes(cases[i].bytes, cases[i].units, true));
    556   }
    557 }
    558 
    559 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
    560   static const struct {
    561     const char* str;
    562     string16::size_type start_offset;
    563     const char* find_this;
    564     const char* replace_with;
    565     const char* expected;
    566   } cases[] = {
    567     {"aaa", 0, "a", "b", "bbb"},
    568     {"abb", 0, "ab", "a", "ab"},
    569     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
    570     {"Not found", 0, "x", "0", "Not found"},
    571     {"Not found again", 5, "x", "0", "Not found again"},
    572     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    573      "Four score and seven years agoMakingFour score and seven years agoit"
    574      "Four score and seven years agomuchFour score and seven years agolonger"
    575      "Four score and seven years ago"},
    576     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    577     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
    578     {"abababab", 2, "ab", "c", "abccc"},
    579   };
    580 
    581   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    582     string16 str = ASCIIToUTF16(cases[i].str);
    583     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
    584                                  ASCIIToUTF16(cases[i].find_this),
    585                                  ASCIIToUTF16(cases[i].replace_with));
    586     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    587   }
    588 }
    589 
    590 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
    591   static const struct {
    592     const char* str;
    593     string16::size_type start_offset;
    594     const char* find_this;
    595     const char* replace_with;
    596     const char* expected;
    597   } cases[] = {
    598     {"aaa", 0, "a", "b", "baa"},
    599     {"abb", 0, "ab", "a", "ab"},
    600     {"Removing some substrings inging", 0, "ing", "",
    601       "Remov some substrings inging"},
    602     {"Not found", 0, "x", "0", "Not found"},
    603     {"Not found again", 5, "x", "0", "Not found again"},
    604     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    605      "Four score and seven years agoMaking it much longer "},
    606     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    607     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
    608     {"abababab", 2, "ab", "c", "abcabab"},
    609   };
    610 
    611   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    612     string16 str = ASCIIToUTF16(cases[i].str);
    613     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
    614                                      ASCIIToUTF16(cases[i].find_this),
    615                                      ASCIIToUTF16(cases[i].replace_with));
    616     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    617   }
    618 }
    619 
    620 TEST(StringUtilTest, HexDigitToInt) {
    621   EXPECT_EQ(0, HexDigitToInt('0'));
    622   EXPECT_EQ(1, HexDigitToInt('1'));
    623   EXPECT_EQ(2, HexDigitToInt('2'));
    624   EXPECT_EQ(3, HexDigitToInt('3'));
    625   EXPECT_EQ(4, HexDigitToInt('4'));
    626   EXPECT_EQ(5, HexDigitToInt('5'));
    627   EXPECT_EQ(6, HexDigitToInt('6'));
    628   EXPECT_EQ(7, HexDigitToInt('7'));
    629   EXPECT_EQ(8, HexDigitToInt('8'));
    630   EXPECT_EQ(9, HexDigitToInt('9'));
    631   EXPECT_EQ(10, HexDigitToInt('A'));
    632   EXPECT_EQ(11, HexDigitToInt('B'));
    633   EXPECT_EQ(12, HexDigitToInt('C'));
    634   EXPECT_EQ(13, HexDigitToInt('D'));
    635   EXPECT_EQ(14, HexDigitToInt('E'));
    636   EXPECT_EQ(15, HexDigitToInt('F'));
    637 
    638   // Verify the lower case as well.
    639   EXPECT_EQ(10, HexDigitToInt('a'));
    640   EXPECT_EQ(11, HexDigitToInt('b'));
    641   EXPECT_EQ(12, HexDigitToInt('c'));
    642   EXPECT_EQ(13, HexDigitToInt('d'));
    643   EXPECT_EQ(14, HexDigitToInt('e'));
    644   EXPECT_EQ(15, HexDigitToInt('f'));
    645 }
    646 
    647 // This checks where we can use the assignment operator for a va_list. We need
    648 // a way to do this since Visual C doesn't support va_copy, but assignment on
    649 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
    650 // capability.
    651 static void VariableArgsFunc(const char* format, ...) {
    652   va_list org;
    653   va_start(org, format);
    654 
    655   va_list dup;
    656   GG_VA_COPY(dup, org);
    657   int i1 = va_arg(org, int);
    658   int j1 = va_arg(org, int);
    659   char* s1 = va_arg(org, char*);
    660   double d1 = va_arg(org, double);
    661   va_end(org);
    662 
    663   int i2 = va_arg(dup, int);
    664   int j2 = va_arg(dup, int);
    665   char* s2 = va_arg(dup, char*);
    666   double d2 = va_arg(dup, double);
    667 
    668   EXPECT_EQ(i1, i2);
    669   EXPECT_EQ(j1, j2);
    670   EXPECT_STREQ(s1, s2);
    671   EXPECT_EQ(d1, d2);
    672 
    673   va_end(dup);
    674 }
    675 
    676 TEST(StringUtilTest, VAList) {
    677   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
    678 }
    679 
    680 // Test for Tokenize
    681 template <typename STR>
    682 void TokenizeTest() {
    683   std::vector<STR> r;
    684   size_t size;
    685 
    686   size = Tokenize(STR("This is a string"), STR(" "), &r);
    687   EXPECT_EQ(4U, size);
    688   ASSERT_EQ(4U, r.size());
    689   EXPECT_EQ(r[0], STR("This"));
    690   EXPECT_EQ(r[1], STR("is"));
    691   EXPECT_EQ(r[2], STR("a"));
    692   EXPECT_EQ(r[3], STR("string"));
    693   r.clear();
    694 
    695   size = Tokenize(STR("one,two,three"), STR(","), &r);
    696   EXPECT_EQ(3U, size);
    697   ASSERT_EQ(3U, r.size());
    698   EXPECT_EQ(r[0], STR("one"));
    699   EXPECT_EQ(r[1], STR("two"));
    700   EXPECT_EQ(r[2], STR("three"));
    701   r.clear();
    702 
    703   size = Tokenize(STR("one,two:three;four"), STR(",:"), &r);
    704   EXPECT_EQ(3U, size);
    705   ASSERT_EQ(3U, r.size());
    706   EXPECT_EQ(r[0], STR("one"));
    707   EXPECT_EQ(r[1], STR("two"));
    708   EXPECT_EQ(r[2], STR("three;four"));
    709   r.clear();
    710 
    711   size = Tokenize(STR("one,two:three;four"), STR(";,:"), &r);
    712   EXPECT_EQ(4U, size);
    713   ASSERT_EQ(4U, r.size());
    714   EXPECT_EQ(r[0], STR("one"));
    715   EXPECT_EQ(r[1], STR("two"));
    716   EXPECT_EQ(r[2], STR("three"));
    717   EXPECT_EQ(r[3], STR("four"));
    718   r.clear();
    719 
    720   size = Tokenize(STR("one, two, three"), STR(","), &r);
    721   EXPECT_EQ(3U, size);
    722   ASSERT_EQ(3U, r.size());
    723   EXPECT_EQ(r[0], STR("one"));
    724   EXPECT_EQ(r[1], STR(" two"));
    725   EXPECT_EQ(r[2], STR(" three"));
    726   r.clear();
    727 
    728   size = Tokenize(STR("one, two, three, "), STR(","), &r);
    729   EXPECT_EQ(4U, size);
    730   ASSERT_EQ(4U, r.size());
    731   EXPECT_EQ(r[0], STR("one"));
    732   EXPECT_EQ(r[1], STR(" two"));
    733   EXPECT_EQ(r[2], STR(" three"));
    734   EXPECT_EQ(r[3], STR(" "));
    735   r.clear();
    736 
    737   size = Tokenize(STR("one, two, three,"), STR(","), &r);
    738   EXPECT_EQ(3U, size);
    739   ASSERT_EQ(3U, r.size());
    740   EXPECT_EQ(r[0], STR("one"));
    741   EXPECT_EQ(r[1], STR(" two"));
    742   EXPECT_EQ(r[2], STR(" three"));
    743   r.clear();
    744 
    745   size = Tokenize(STR(""), STR(","), &r);
    746   EXPECT_EQ(0U, size);
    747   ASSERT_EQ(0U, r.size());
    748   r.clear();
    749 
    750   size = Tokenize(STR(","), STR(","), &r);
    751   EXPECT_EQ(0U, size);
    752   ASSERT_EQ(0U, r.size());
    753   r.clear();
    754 
    755   size = Tokenize(STR(",;:."), STR(".:;,"), &r);
    756   EXPECT_EQ(0U, size);
    757   ASSERT_EQ(0U, r.size());
    758   r.clear();
    759 
    760   size = Tokenize(STR("\t\ta\t"), STR("\t"), &r);
    761   EXPECT_EQ(1U, size);
    762   ASSERT_EQ(1U, r.size());
    763   EXPECT_EQ(r[0], STR("a"));
    764   r.clear();
    765 
    766   size = Tokenize(STR("\ta\t\nb\tcc"), STR("\n"), &r);
    767   EXPECT_EQ(2U, size);
    768   ASSERT_EQ(2U, r.size());
    769   EXPECT_EQ(r[0], STR("\ta\t"));
    770   EXPECT_EQ(r[1], STR("b\tcc"));
    771   r.clear();
    772 }
    773 
    774 TEST(StringUtilTest, TokenizeStdString) {
    775   TokenizeTest<std::string>();
    776 }
    777 
    778 TEST(StringUtilTest, TokenizeStringPiece) {
    779   TokenizeTest<base::StringPiece>();
    780 }
    781 
    782 // Test for JoinString
    783 TEST(StringUtilTest, JoinString) {
    784   std::vector<std::string> in;
    785   EXPECT_EQ("", JoinString(in, ','));
    786 
    787   in.push_back("a");
    788   EXPECT_EQ("a", JoinString(in, ','));
    789 
    790   in.push_back("b");
    791   in.push_back("c");
    792   EXPECT_EQ("a,b,c", JoinString(in, ','));
    793 
    794   in.push_back("");
    795   EXPECT_EQ("a,b,c,", JoinString(in, ','));
    796   in.push_back(" ");
    797   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
    798 }
    799 
    800 TEST(StringUtilTest, StartsWith) {
    801   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
    802   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
    803   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
    804   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
    805   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
    806   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
    807   EXPECT_FALSE(StartsWithASCII("", "javascript", false));
    808   EXPECT_FALSE(StartsWithASCII("", "javascript", true));
    809   EXPECT_TRUE(StartsWithASCII("java", "", false));
    810   EXPECT_TRUE(StartsWithASCII("java", "", true));
    811 
    812   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
    813   EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
    814   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
    815   EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
    816   EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
    817   EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
    818   EXPECT_FALSE(StartsWith(L"", L"javascript", false));
    819   EXPECT_FALSE(StartsWith(L"", L"javascript", true));
    820   EXPECT_TRUE(StartsWith(L"java", L"", false));
    821   EXPECT_TRUE(StartsWith(L"java", L"", true));
    822 }
    823 
    824 TEST(StringUtilTest, EndsWith) {
    825   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
    826   EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
    827   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
    828   EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
    829   EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
    830   EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
    831   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
    832   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
    833   EXPECT_FALSE(EndsWith(L"", L".plugin", false));
    834   EXPECT_FALSE(EndsWith(L"", L".plugin", true));
    835   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
    836   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
    837   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
    838   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
    839   EXPECT_TRUE(EndsWith(L"", L"", false));
    840   EXPECT_TRUE(EndsWith(L"", L"", true));
    841 }
    842 
    843 TEST(StringUtilTest, GetStringFWithOffsets) {
    844   std::vector<string16> subst;
    845   subst.push_back(ASCIIToUTF16("1"));
    846   subst.push_back(ASCIIToUTF16("2"));
    847   std::vector<size_t> offsets;
    848 
    849   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
    850                             subst,
    851                             &offsets);
    852   EXPECT_EQ(2U, offsets.size());
    853   EXPECT_EQ(7U, offsets[0]);
    854   EXPECT_EQ(25U, offsets[1]);
    855   offsets.clear();
    856 
    857   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
    858                             subst,
    859                             &offsets);
    860   EXPECT_EQ(2U, offsets.size());
    861   EXPECT_EQ(25U, offsets[0]);
    862   EXPECT_EQ(7U, offsets[1]);
    863   offsets.clear();
    864 }
    865 
    866 TEST(StringUtilTest, ReplaceStringPlaceholders) {
    867   std::vector<string16> subst;
    868   subst.push_back(ASCIIToUTF16("9a"));
    869   subst.push_back(ASCIIToUTF16("8b"));
    870   subst.push_back(ASCIIToUTF16("7c"));
    871   subst.push_back(ASCIIToUTF16("6d"));
    872   subst.push_back(ASCIIToUTF16("5e"));
    873   subst.push_back(ASCIIToUTF16("4f"));
    874   subst.push_back(ASCIIToUTF16("3g"));
    875   subst.push_back(ASCIIToUTF16("2h"));
    876   subst.push_back(ASCIIToUTF16("1i"));
    877 
    878   string16 formatted =
    879       ReplaceStringPlaceholders(
    880           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
    881 
    882   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
    883 }
    884 
    885 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
    886   // Test whether replacestringplaceholders works as expected when there
    887   // are fewer inputs than outputs.
    888   std::vector<string16> subst;
    889   subst.push_back(ASCIIToUTF16("9a"));
    890   subst.push_back(ASCIIToUTF16("8b"));
    891   subst.push_back(ASCIIToUTF16("7c"));
    892 
    893   string16 formatted =
    894       ReplaceStringPlaceholders(
    895           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
    896 
    897   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
    898 }
    899 
    900 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
    901   std::vector<std::string> subst;
    902   subst.push_back("9a");
    903   subst.push_back("8b");
    904   subst.push_back("7c");
    905   subst.push_back("6d");
    906   subst.push_back("5e");
    907   subst.push_back("4f");
    908   subst.push_back("3g");
    909   subst.push_back("2h");
    910   subst.push_back("1i");
    911 
    912   std::string formatted =
    913       ReplaceStringPlaceholders(
    914           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
    915 
    916   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
    917 }
    918 
    919 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
    920   std::vector<std::string> subst;
    921   subst.push_back("a");
    922   subst.push_back("b");
    923   subst.push_back("c");
    924   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, NULL),
    925             "$1 $$2 $$$3");
    926 }
    927 
    928 TEST(StringUtilTest, MatchPatternTest) {
    929   EXPECT_TRUE(MatchPattern("www.google.com", "*.com"));
    930   EXPECT_TRUE(MatchPattern("www.google.com", "*"));
    931   EXPECT_FALSE(MatchPattern("www.google.com", "www*.g*.org"));
    932   EXPECT_TRUE(MatchPattern("Hello", "H?l?o"));
    933   EXPECT_FALSE(MatchPattern("www.google.com", "http://*)"));
    934   EXPECT_FALSE(MatchPattern("www.msn.com", "*.COM"));
    935   EXPECT_TRUE(MatchPattern("Hello*1234", "He??o\\*1*"));
    936   EXPECT_FALSE(MatchPattern("", "*.*"));
    937   EXPECT_TRUE(MatchPattern("", "*"));
    938   EXPECT_TRUE(MatchPattern("", "?"));
    939   EXPECT_TRUE(MatchPattern("", ""));
    940   EXPECT_FALSE(MatchPattern("Hello", ""));
    941   EXPECT_TRUE(MatchPattern("Hello*", "Hello*"));
    942   // Stop after a certain recursion depth.
    943   EXPECT_FALSE(MatchPattern("123456789012345678", "?????????????????*"));
    944 
    945   // Test UTF8 matching.
    946   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0", "*\xe2\x99\xa0"));
    947   EXPECT_TRUE(MatchPattern("heart: \xe2\x99\xa0.", "heart: ?."));
    948   EXPECT_TRUE(MatchPattern("hearts: \xe2\x99\xa0\xe2\x99\xa0", "*"));
    949   // Invalid sequences should be handled as a single invalid character.
    950   EXPECT_TRUE(MatchPattern("invalid: \xef\xbf\xbe", "invalid: ?"));
    951   // If the pattern has invalid characters, it shouldn't match anything.
    952   EXPECT_FALSE(MatchPattern("\xf4\x90\x80\x80", "\xf4\x90\x80\x80"));
    953 
    954   // Test UTF16 character matching.
    955   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("www.google.com"),
    956                            UTF8ToUTF16("*.com")));
    957   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello*1234"),
    958                            UTF8ToUTF16("He??o\\*1*")));
    959 
    960   // This test verifies that consecutive wild cards are collapsed into 1
    961   // wildcard (when this doesn't occur, MatchPattern reaches it's maximum
    962   // recursion depth).
    963   EXPECT_TRUE(MatchPattern(UTF8ToUTF16("Hello"),
    964                            UTF8ToUTF16("He********************************o")));
    965 }
    966 
    967 TEST(StringUtilTest, LcpyTest) {
    968   // Test the normal case where we fit in our buffer.
    969   {
    970     char dst[10];
    971     wchar_t wdst[10];
    972     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
    973     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
    974     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
    975     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
    976   }
    977 
    978   // Test dst_size == 0, nothing should be written to |dst| and we should
    979   // have the equivalent of strlen(src).
    980   {
    981     char dst[2] = {1, 2};
    982     wchar_t wdst[2] = {1, 2};
    983     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
    984     EXPECT_EQ(1, dst[0]);
    985     EXPECT_EQ(2, dst[1]);
    986     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
    987 #if defined(WCHAR_T_IS_UNSIGNED)
    988     EXPECT_EQ(1U, wdst[0]);
    989     EXPECT_EQ(2U, wdst[1]);
    990 #else
    991     EXPECT_EQ(1, wdst[0]);
    992     EXPECT_EQ(2, wdst[1]);
    993 #endif
    994   }
    995 
    996   // Test the case were we _just_ competely fit including the null.
    997   {
    998     char dst[8];
    999     wchar_t wdst[8];
   1000     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1001     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1002     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1003     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1004   }
   1005 
   1006   // Test the case were we we are one smaller, so we can't fit the null.
   1007   {
   1008     char dst[7];
   1009     wchar_t wdst[7];
   1010     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1011     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
   1012     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1013     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
   1014   }
   1015 
   1016   // Test the case were we are just too small.
   1017   {
   1018     char dst[3];
   1019     wchar_t wdst[3];
   1020     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1021     EXPECT_EQ(0, memcmp(dst, "ab", 3));
   1022     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1023     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
   1024   }
   1025 }
   1026 
   1027 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
   1028   struct TestData {
   1029     const wchar_t* input;
   1030     bool portable;
   1031   } cases[] = {
   1032     { L"%ls", true },
   1033     { L"%s", false },
   1034     { L"%S", false },
   1035     { L"%lS", false },
   1036     { L"Hello, %s", false },
   1037     { L"%lc", true },
   1038     { L"%c", false },
   1039     { L"%C", false },
   1040     { L"%lC", false },
   1041     { L"%ls %s", false },
   1042     { L"%s %ls", false },
   1043     { L"%s %ls %s", false },
   1044     { L"%f", true },
   1045     { L"%f %F", false },
   1046     { L"%d %D", false },
   1047     { L"%o %O", false },
   1048     { L"%u %U", false },
   1049     { L"%f %d %o %u", true },
   1050     { L"%-8d (%02.1f%)", true },
   1051     { L"% 10s", false },
   1052     { L"% 10ls", true }
   1053   };
   1054   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
   1055     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
   1056   }
   1057 }
   1058 
   1059 TEST(StringUtilTest, RemoveChars) {
   1060   const char* kRemoveChars = "-/+*";
   1061   std::string input = "A-+bc/d!*";
   1062   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
   1063   EXPECT_EQ("Abcd!", input);
   1064 
   1065   // No characters match kRemoveChars.
   1066   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1067   EXPECT_EQ("Abcd!", input);
   1068 
   1069   // Empty string.
   1070   input.clear();
   1071   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1072   EXPECT_EQ(std::string(), input);
   1073 }
   1074 
   1075 TEST(StringUtilTest, ContainsOnlyChars) {
   1076   // Providing an empty list of characters should return false but for the empty
   1077   // string.
   1078   EXPECT_TRUE(ContainsOnlyChars("", ""));
   1079   EXPECT_FALSE(ContainsOnlyChars("Hello", ""));
   1080 
   1081   EXPECT_TRUE(ContainsOnlyChars("", "1234"));
   1082   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
   1083   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
   1084   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
   1085   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
   1086 }
   1087 
   1088 }  // namespace base
   1089