Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <math.h>
      6 #include <stdarg.h>
      7 
      8 #include <limits>
      9 #include <sstream>
     10 
     11 #include "base/basictypes.h"
     12 #include "base/string_util.h"
     13 #include "testing/gtest/include/gtest/gtest.h"
     14 
     15 namespace base {
     16 
     17 namespace {
     18 
     19 // Given a null-terminated string of wchar_t with each wchar_t representing
     20 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
     21 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
     22 // should be represented as a surrogate pair (two UTF-16 units)
     23 // *even* where wchar_t is 32-bit (Linux and Mac).
     24 //
     25 // This is to help write tests for functions with string16 params until
     26 // the C++ 0x UTF-16 literal is well-supported by compilers.
     27 string16 BuildString16(const wchar_t* s) {
     28 #if defined(WCHAR_T_IS_UTF16)
     29   return string16(s);
     30 #elif defined(WCHAR_T_IS_UTF32)
     31   string16 u16;
     32   while (*s != 0) {
     33     DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);
     34     u16.push_back(*s++);
     35   }
     36   return u16;
     37 #endif
     38 }
     39 
     40 }  // namespace
     41 
     42 static const struct trim_case {
     43   const wchar_t* input;
     44   const TrimPositions positions;
     45   const wchar_t* output;
     46   const TrimPositions return_value;
     47 } trim_cases[] = {
     48   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
     49   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
     50   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
     51   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
     52   {L"", TRIM_ALL, L"", TRIM_NONE},
     53   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
     54   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
     55   {L"  ", TRIM_ALL, L"", TRIM_ALL},
     56   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
     57   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
     58 };
     59 
     60 static const struct trim_case_ascii {
     61   const char* input;
     62   const TrimPositions positions;
     63   const char* output;
     64   const TrimPositions return_value;
     65 } trim_cases_ascii[] = {
     66   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
     67   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
     68   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
     69   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
     70   {"", TRIM_ALL, "", TRIM_NONE},
     71   {"  ", TRIM_LEADING, "", TRIM_LEADING},
     72   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
     73   {"  ", TRIM_ALL, "", TRIM_ALL},
     74   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
     75 };
     76 
     77 TEST(StringUtilTest, TrimWhitespace) {
     78   std::wstring output;  // Allow contents to carry over to next testcase
     79   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
     80     const trim_case& value = trim_cases[i];
     81     EXPECT_EQ(value.return_value,
     82               TrimWhitespace(value.input, value.positions, &output));
     83     EXPECT_EQ(value.output, output);
     84   }
     85 
     86   // Test that TrimWhitespace() can take the same string for input and output
     87   output = L"  This is a test \r\n";
     88   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
     89   EXPECT_EQ(L"This is a test", output);
     90 
     91   // Once more, but with a string of whitespace
     92   output = L"  \r\n";
     93   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
     94   EXPECT_EQ(L"", output);
     95 
     96   std::string output_ascii;
     97   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
     98     const trim_case_ascii& value = trim_cases_ascii[i];
     99     EXPECT_EQ(value.return_value,
    100               TrimWhitespace(value.input, value.positions, &output_ascii));
    101     EXPECT_EQ(value.output, output_ascii);
    102   }
    103 }
    104 
    105 static const struct collapse_case {
    106   const wchar_t* input;
    107   const bool trim;
    108   const wchar_t* output;
    109 } collapse_cases[] = {
    110   {L" Google Video ", false, L"Google Video"},
    111   {L"Google Video", false, L"Google Video"},
    112   {L"", false, L""},
    113   {L"  ", false, L""},
    114   {L"\t\rTest String\n", false, L"Test String"},
    115   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
    116   {L"    Test     \n  \t String    ", false, L"Test String"},
    117   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
    118   {L"   Test String", false, L"Test String"},
    119   {L"Test String    ", false, L"Test String"},
    120   {L"Test String", false, L"Test String"},
    121   {L"", true, L""},
    122   {L"\n", true, L""},
    123   {L"  \r  ", true, L""},
    124   {L"\nFoo", true, L"Foo"},
    125   {L"\r  Foo  ", true, L"Foo"},
    126   {L" Foo bar ", true, L"Foo bar"},
    127   {L"  \tFoo  bar  \n", true, L"Foo bar"},
    128   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
    129 };
    130 
    131 TEST(StringUtilTest, CollapseWhitespace) {
    132   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
    133     const collapse_case& value = collapse_cases[i];
    134     EXPECT_EQ(value.output, CollapseWhitespace(value.input, value.trim));
    135   }
    136 }
    137 
    138 static const struct collapse_case_ascii {
    139   const char* input;
    140   const bool trim;
    141   const char* output;
    142 } collapse_cases_ascii[] = {
    143   {" Google Video ", false, "Google Video"},
    144   {"Google Video", false, "Google Video"},
    145   {"", false, ""},
    146   {"  ", false, ""},
    147   {"\t\rTest String\n", false, "Test String"},
    148   {"    Test     \n  \t String    ", false, "Test String"},
    149   {"   Test String", false, "Test String"},
    150   {"Test String    ", false, "Test String"},
    151   {"Test String", false, "Test String"},
    152   {"", true, ""},
    153   {"\n", true, ""},
    154   {"  \r  ", true, ""},
    155   {"\nFoo", true, "Foo"},
    156   {"\r  Foo  ", true, "Foo"},
    157   {" Foo bar ", true, "Foo bar"},
    158   {"  \tFoo  bar  \n", true, "Foo bar"},
    159   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
    160 };
    161 
    162 TEST(StringUtilTest, CollapseWhitespaceASCII) {
    163   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
    164     const collapse_case_ascii& value = collapse_cases_ascii[i];
    165     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
    166   }
    167 }
    168 
    169 TEST(StringUtilTest, ContainsOnlyWhitespaceASCII) {
    170   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(""));
    171   EXPECT_TRUE(ContainsOnlyWhitespaceASCII(" "));
    172   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t"));
    173   EXPECT_TRUE(ContainsOnlyWhitespaceASCII("\t \r \n  "));
    174   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("a"));
    175   EXPECT_FALSE(ContainsOnlyWhitespaceASCII("\thello\r \n  "));
    176 }
    177 
    178 TEST(StringUtilTest, ContainsOnlyWhitespace) {
    179   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("")));
    180   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16(" ")));
    181   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t")));
    182   EXPECT_TRUE(ContainsOnlyWhitespace(ASCIIToUTF16("\t \r \n  ")));
    183   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("a")));
    184   EXPECT_FALSE(ContainsOnlyWhitespace(ASCIIToUTF16("\thello\r \n  ")));
    185 }
    186 
    187 TEST(StringUtilTest, IsStringUTF8) {
    188   EXPECT_TRUE(IsStringUTF8("abc"));
    189   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
    190   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
    191   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
    192   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
    193   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
    194 
    195   // surrogate code points
    196   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
    197   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
    198   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
    199 
    200   // overlong sequences
    201   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
    202   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
    203   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
    204   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
    205   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
    206   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
    207   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
    208   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
    209   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
    210   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
    211   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
    212 
    213   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    214   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
    215   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
    216   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
    217 
    218   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    219   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
    220   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
    221   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
    222   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
    223 
    224   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    225   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
    226   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
    227   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
    228 
    229   // This should also be false, but currently we pass them through.
    230   // Disable them for now.
    231 #if 0
    232   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
    233   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
    234 #endif
    235 
    236   // Strings in legacy encodings. We can certainly make up strings
    237   // in a legacy encoding that are valid in UTF-8, but in real data,
    238   // most of them are invalid as UTF-8.
    239   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
    240   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
    241   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
    242   // "abc" with U+201[CD] in windows-125[0-8]
    243   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
    244   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    245   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
    246   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    247   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
    248 }
    249 
    250 TEST(StringUtilTest, ConvertASCII) {
    251   static const char* char_cases[] = {
    252     "Google Video",
    253     "Hello, world\n",
    254     "0123ABCDwxyz \a\b\t\r\n!+,.~"
    255   };
    256 
    257   static const wchar_t* const wchar_cases[] = {
    258     L"Google Video",
    259     L"Hello, world\n",
    260     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
    261   };
    262 
    263   for (size_t i = 0; i < arraysize(char_cases); ++i) {
    264     EXPECT_TRUE(IsStringASCII(char_cases[i]));
    265     std::wstring wide = ASCIIToWide(char_cases[i]);
    266     EXPECT_EQ(wchar_cases[i], wide);
    267 
    268     EXPECT_TRUE(IsStringASCII(wchar_cases[i]));
    269     std::string ascii = WideToASCII(wchar_cases[i]);
    270     EXPECT_EQ(char_cases[i], ascii);
    271   }
    272 
    273   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
    274   EXPECT_FALSE(IsStringASCII(L"Google \x80Video"));
    275 
    276   // Convert empty strings.
    277   std::wstring wempty;
    278   std::string empty;
    279   EXPECT_EQ(empty, WideToASCII(wempty));
    280   EXPECT_EQ(wempty, ASCIIToWide(empty));
    281 
    282   // Convert strings with an embedded NUL character.
    283   const char chars_with_nul[] = "test\0string";
    284   const int length_with_nul = arraysize(chars_with_nul) - 1;
    285   std::string string_with_nul(chars_with_nul, length_with_nul);
    286   std::wstring wide_with_nul = ASCIIToWide(string_with_nul);
    287   EXPECT_EQ(static_cast<std::wstring::size_type>(length_with_nul),
    288             wide_with_nul.length());
    289   std::string narrow_with_nul = WideToASCII(wide_with_nul);
    290   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
    291             narrow_with_nul.length());
    292   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
    293 }
    294 
    295 TEST(StringUtilTest, ToUpperASCII) {
    296   EXPECT_EQ('C', ToUpperASCII('C'));
    297   EXPECT_EQ('C', ToUpperASCII('c'));
    298   EXPECT_EQ('2', ToUpperASCII('2'));
    299 
    300   EXPECT_EQ(L'C', ToUpperASCII(L'C'));
    301   EXPECT_EQ(L'C', ToUpperASCII(L'c'));
    302   EXPECT_EQ(L'2', ToUpperASCII(L'2'));
    303 
    304   std::string in_place_a("Cc2");
    305   StringToUpperASCII(&in_place_a);
    306   EXPECT_EQ("CC2", in_place_a);
    307 
    308   std::wstring in_place_w(L"Cc2");
    309   StringToUpperASCII(&in_place_w);
    310   EXPECT_EQ(L"CC2", in_place_w);
    311 
    312   std::string original_a("Cc2");
    313   std::string upper_a = StringToUpperASCII(original_a);
    314   EXPECT_EQ("CC2", upper_a);
    315 
    316   std::wstring original_w(L"Cc2");
    317   std::wstring upper_w = StringToUpperASCII(original_w);
    318   EXPECT_EQ(L"CC2", upper_w);
    319 }
    320 
    321 static const struct {
    322   const wchar_t* src_w;
    323   const char*    src_a;
    324   const char*    dst;
    325 } lowercase_cases[] = {
    326   {L"FoO", "FoO", "foo"},
    327   {L"foo", "foo", "foo"},
    328   {L"FOO", "FOO", "foo"},
    329 };
    330 
    331 TEST(StringUtilTest, LowerCaseEqualsASCII) {
    332   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(lowercase_cases); ++i) {
    333     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_w,
    334                                      lowercase_cases[i].dst));
    335     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
    336                                      lowercase_cases[i].dst));
    337   }
    338 }
    339 
    340 TEST(StringUtilTest, GetByteDisplayUnits) {
    341   static const struct {
    342     int64 bytes;
    343     DataUnits expected;
    344   } cases[] = {
    345     {0, DATA_UNITS_BYTE},
    346     {512, DATA_UNITS_BYTE},
    347     {10*1024, DATA_UNITS_KIBIBYTE},
    348     {10*1024*1024, DATA_UNITS_MEBIBYTE},
    349     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE},
    350     {~(1LL<<63), DATA_UNITS_GIBIBYTE},
    351 #ifdef NDEBUG
    352     {-1, DATA_UNITS_BYTE},
    353 #endif
    354   };
    355 
    356   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
    357     EXPECT_EQ(cases[i].expected, GetByteDisplayUnits(cases[i].bytes));
    358 }
    359 
    360 TEST(StringUtilTest, FormatBytes) {
    361   static const struct {
    362     int64 bytes;
    363     DataUnits units;
    364     const wchar_t* expected;
    365     const wchar_t* expected_with_units;
    366   } cases[] = {
    367     {0, DATA_UNITS_BYTE, L"0", L"0 B"},
    368     {512, DATA_UNITS_BYTE, L"512", L"512 B"},
    369     {512, DATA_UNITS_KIBIBYTE, L"0.5", L"0.5 kB"},
    370     {1024*1024, DATA_UNITS_KIBIBYTE, L"1024", L"1024 kB"},
    371     {1024*1024, DATA_UNITS_MEBIBYTE, L"1", L"1 MB"},
    372     {1024*1024*1024, DATA_UNITS_GIBIBYTE, L"1", L"1 GB"},
    373     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"10", L"10 GB"},
    374     {~(1LL<<63), DATA_UNITS_GIBIBYTE, L"8589934592", L"8589934592 GB"},
    375     // Make sure the first digit of the fractional part works.
    376     {1024*1024 + 103, DATA_UNITS_KIBIBYTE, L"1024.1", L"1024.1 kB"},
    377     {1024*1024 + 205 * 1024, DATA_UNITS_MEBIBYTE, L"1.2", L"1.2 MB"},
    378     {1024*1024*1024 + (927 * 1024*1024), DATA_UNITS_GIBIBYTE,
    379      L"1.9", L"1.9 GB"},
    380     {10LL*1024*1024*1024, DATA_UNITS_GIBIBYTE, L"10", L"10 GB"},
    381 #ifdef NDEBUG
    382     {-1, DATA_UNITS_BYTE, L"", L""},
    383 #endif
    384   };
    385 
    386   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    387     EXPECT_EQ(cases[i].expected,
    388               FormatBytes(cases[i].bytes, cases[i].units, false));
    389     EXPECT_EQ(cases[i].expected_with_units,
    390               FormatBytes(cases[i].bytes, cases[i].units, true));
    391   }
    392 }
    393 
    394 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
    395   static const struct {
    396     const char* str;
    397     string16::size_type start_offset;
    398     const char* find_this;
    399     const char* replace_with;
    400     const char* expected;
    401   } cases[] = {
    402     {"aaa", 0, "a", "b", "bbb"},
    403     {"abb", 0, "ab", "a", "ab"},
    404     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
    405     {"Not found", 0, "x", "0", "Not found"},
    406     {"Not found again", 5, "x", "0", "Not found again"},
    407     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    408      "Four score and seven years agoMakingFour score and seven years agoit"
    409      "Four score and seven years agomuchFour score and seven years agolonger"
    410      "Four score and seven years ago"},
    411     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    412     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
    413     {"abababab", 2, "ab", "c", "abccc"},
    414   };
    415 
    416   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    417     string16 str = ASCIIToUTF16(cases[i].str);
    418     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
    419                                  ASCIIToUTF16(cases[i].find_this),
    420                                  ASCIIToUTF16(cases[i].replace_with));
    421     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    422   }
    423 }
    424 
    425 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
    426   static const struct {
    427     const char* str;
    428     string16::size_type start_offset;
    429     const char* find_this;
    430     const char* replace_with;
    431     const char* expected;
    432   } cases[] = {
    433     {"aaa", 0, "a", "b", "baa"},
    434     {"abb", 0, "ab", "a", "ab"},
    435     {"Removing some substrings inging", 0, "ing", "",
    436       "Remov some substrings inging"},
    437     {"Not found", 0, "x", "0", "Not found"},
    438     {"Not found again", 5, "x", "0", "Not found again"},
    439     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    440      "Four score and seven years agoMaking it much longer "},
    441     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    442     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
    443     {"abababab", 2, "ab", "c", "abcabab"},
    444   };
    445 
    446   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); i++) {
    447     string16 str = ASCIIToUTF16(cases[i].str);
    448     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
    449                                      ASCIIToUTF16(cases[i].find_this),
    450                                      ASCIIToUTF16(cases[i].replace_with));
    451     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    452   }
    453 }
    454 
    455 namespace {
    456 
    457 template <typename INT>
    458 struct IntToStringTest {
    459   INT num;
    460   const char* sexpected;
    461   const char* uexpected;
    462 };
    463 
    464 }
    465 
    466 TEST(StringUtilTest, IntToString) {
    467   static const IntToStringTest<int> int_tests[] = {
    468       { 0, "0", "0" },
    469       { -1, "-1", "4294967295" },
    470       { std::numeric_limits<int>::max(), "2147483647", "2147483647" },
    471       { std::numeric_limits<int>::min(), "-2147483648", "2147483648" },
    472   };
    473   static const IntToStringTest<int64> int64_tests[] = {
    474       { 0, "0", "0" },
    475       { -1, "-1", "18446744073709551615" },
    476       { std::numeric_limits<int64>::max(),
    477         "9223372036854775807",
    478         "9223372036854775807", },
    479       { std::numeric_limits<int64>::min(),
    480         "-9223372036854775808",
    481         "9223372036854775808" },
    482   };
    483 
    484   for (size_t i = 0; i < arraysize(int_tests); ++i) {
    485     const IntToStringTest<int>* test = &int_tests[i];
    486     EXPECT_EQ(IntToString(test->num), test->sexpected);
    487     EXPECT_EQ(IntToWString(test->num), UTF8ToWide(test->sexpected));
    488     EXPECT_EQ(UintToString(test->num), test->uexpected);
    489     EXPECT_EQ(UintToWString(test->num), UTF8ToWide(test->uexpected));
    490   }
    491   for (size_t i = 0; i < arraysize(int64_tests); ++i) {
    492     const IntToStringTest<int64>* test = &int64_tests[i];
    493     EXPECT_EQ(Int64ToString(test->num), test->sexpected);
    494     EXPECT_EQ(Int64ToWString(test->num), UTF8ToWide(test->sexpected));
    495     EXPECT_EQ(Uint64ToString(test->num), test->uexpected);
    496     EXPECT_EQ(Uint64ToWString(test->num), UTF8ToWide(test->uexpected));
    497   }
    498 }
    499 
    500 TEST(StringUtilTest, Uint64ToString) {
    501   static const struct {
    502     uint64 input;
    503     std::string output;
    504   } cases[] = {
    505     {0, "0"},
    506     {42, "42"},
    507     {INT_MAX, "2147483647"},
    508     {kuint64max, "18446744073709551615"},
    509   };
    510 
    511   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i)
    512     EXPECT_EQ(cases[i].output, Uint64ToString(cases[i].input));
    513 }
    514 
    515 TEST(StringUtilTest, StringToInt) {
    516   static const struct {
    517     std::string input;
    518     int output;
    519     bool success;
    520   } cases[] = {
    521     {"0", 0, true},
    522     {"42", 42, true},
    523     {"-2147483648", INT_MIN, true},
    524     {"2147483647", INT_MAX, true},
    525     {"", 0, false},
    526     {" 42", 42, false},
    527     {"42 ", 42, false},
    528     {"\t\n\v\f\r 42", 42, false},
    529     {"blah42", 0, false},
    530     {"42blah", 42, false},
    531     {"blah42blah", 0, false},
    532     {"-273.15", -273, false},
    533     {"+98.6", 98, false},
    534     {"--123", 0, false},
    535     {"++123", 0, false},
    536     {"-+123", 0, false},
    537     {"+-123", 0, false},
    538     {"-", 0, false},
    539     {"-2147483649", INT_MIN, false},
    540     {"-99999999999", INT_MIN, false},
    541     {"2147483648", INT_MAX, false},
    542     {"99999999999", INT_MAX, false},
    543   };
    544 
    545   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    546     EXPECT_EQ(cases[i].output, StringToInt(cases[i].input));
    547     int output;
    548     EXPECT_EQ(cases[i].success, StringToInt(cases[i].input, &output));
    549     EXPECT_EQ(cases[i].output, output);
    550 
    551     std::wstring wide_input = ASCIIToWide(cases[i].input);
    552     EXPECT_EQ(cases[i].output, StringToInt(WideToUTF16Hack(wide_input)));
    553     EXPECT_EQ(cases[i].success, StringToInt(WideToUTF16Hack(wide_input),
    554                                             &output));
    555     EXPECT_EQ(cases[i].output, output);
    556   }
    557 
    558   // One additional test to verify that conversion of numbers in strings with
    559   // embedded NUL characters.  The NUL and extra data after it should be
    560   // interpreted as junk after the number.
    561   const char input[] = "6\06";
    562   std::string input_string(input, arraysize(input) - 1);
    563   int output;
    564   EXPECT_FALSE(StringToInt(input_string, &output));
    565   EXPECT_EQ(6, output);
    566 
    567   std::wstring wide_input = ASCIIToWide(input_string);
    568   EXPECT_FALSE(StringToInt(WideToUTF16Hack(wide_input), &output));
    569   EXPECT_EQ(6, output);
    570 }
    571 
    572 TEST(StringUtilTest, StringToInt64) {
    573   static const struct {
    574     std::string input;
    575     int64 output;
    576     bool success;
    577   } cases[] = {
    578     {"0", 0, true},
    579     {"42", 42, true},
    580     {"-2147483648", INT_MIN, true},
    581     {"2147483647", INT_MAX, true},
    582     {"-2147483649", GG_INT64_C(-2147483649), true},
    583     {"-99999999999", GG_INT64_C(-99999999999), true},
    584     {"2147483648", GG_INT64_C(2147483648), true},
    585     {"99999999999", GG_INT64_C(99999999999), true},
    586     {"9223372036854775807", kint64max, true},
    587     {"-9223372036854775808", kint64min, true},
    588     {"09", 9, true},
    589     {"-09", -9, true},
    590     {"", 0, false},
    591     {" 42", 42, false},
    592     {"42 ", 42, false},
    593     {"\t\n\v\f\r 42", 42, false},
    594     {"blah42", 0, false},
    595     {"42blah", 42, false},
    596     {"blah42blah", 0, false},
    597     {"-273.15", -273, false},
    598     {"+98.6", 98, false},
    599     {"--123", 0, false},
    600     {"++123", 0, false},
    601     {"-+123", 0, false},
    602     {"+-123", 0, false},
    603     {"-", 0, false},
    604     {"-9223372036854775809", kint64min, false},
    605     {"-99999999999999999999", kint64min, false},
    606     {"9223372036854775808", kint64max, false},
    607     {"99999999999999999999", kint64max, false},
    608   };
    609 
    610   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    611     EXPECT_EQ(cases[i].output, StringToInt64(cases[i].input));
    612     int64 output;
    613     EXPECT_EQ(cases[i].success, StringToInt64(cases[i].input, &output));
    614     EXPECT_EQ(cases[i].output, output);
    615 
    616     std::wstring wide_input = ASCIIToWide(cases[i].input);
    617     EXPECT_EQ(cases[i].output, StringToInt64(WideToUTF16Hack(wide_input)));
    618     EXPECT_EQ(cases[i].success, StringToInt64(WideToUTF16Hack(wide_input),
    619                                               &output));
    620     EXPECT_EQ(cases[i].output, output);
    621   }
    622 
    623   // One additional test to verify that conversion of numbers in strings with
    624   // embedded NUL characters.  The NUL and extra data after it should be
    625   // interpreted as junk after the number.
    626   const char input[] = "6\06";
    627   std::string input_string(input, arraysize(input) - 1);
    628   int64 output;
    629   EXPECT_FALSE(StringToInt64(input_string, &output));
    630   EXPECT_EQ(6, output);
    631 
    632   std::wstring wide_input = ASCIIToWide(input_string);
    633   EXPECT_FALSE(StringToInt64(WideToUTF16Hack(wide_input), &output));
    634   EXPECT_EQ(6, output);
    635 }
    636 
    637 TEST(StringUtilTest, HexStringToInt) {
    638   static const struct {
    639     std::string input;
    640     int output;
    641     bool success;
    642   } cases[] = {
    643     {"0", 0, true},
    644     {"42", 66, true},
    645     {"-42", -66, true},
    646     {"+42", 66, true},
    647     {"7fffffff", INT_MAX, true},
    648     {"80000000", INT_MIN, true},
    649     {"ffffffff", -1, true},
    650     {"DeadBeef", 0xdeadbeef, true},
    651     {"0x42", 66, true},
    652     {"-0x42", -66, true},
    653     {"+0x42", 66, true},
    654     {"0x7fffffff", INT_MAX, true},
    655     {"0x80000000", INT_MIN, true},
    656     {"0xffffffff", -1, true},
    657     {"0XDeadBeef", 0xdeadbeef, true},
    658     {"0x0f", 15, true},
    659     {"0f", 15, true},
    660     {" 45", 0x45, false},
    661     {"\t\n\v\f\r 0x45", 0x45, false},
    662     {" 45", 0x45, false},
    663     {"45 ", 0x45, false},
    664     {"efgh", 0xef, false},
    665     {"0xefgh", 0xef, false},
    666     {"hgfe", 0, false},
    667     {"100000000", -1, false},  // don't care about |output|, just |success|
    668     {"-", 0, false},
    669     {"", 0, false},
    670   };
    671 
    672   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    673     EXPECT_EQ(cases[i].output, HexStringToInt(cases[i].input));
    674     int output;
    675     EXPECT_EQ(cases[i].success, HexStringToInt(cases[i].input, &output));
    676     EXPECT_EQ(cases[i].output, output);
    677 
    678     std::wstring wide_input = ASCIIToWide(cases[i].input);
    679     EXPECT_EQ(cases[i].output, HexStringToInt(WideToUTF16Hack(wide_input)));
    680     EXPECT_EQ(cases[i].success, HexStringToInt(WideToUTF16Hack(wide_input),
    681                                                &output));
    682     EXPECT_EQ(cases[i].output, output);
    683   }
    684   // One additional test to verify that conversion of numbers in strings with
    685   // embedded NUL characters.  The NUL and extra data after it should be
    686   // interpreted as junk after the number.
    687   const char input[] = "0xc0ffee\09";
    688   std::string input_string(input, arraysize(input) - 1);
    689   int output;
    690   EXPECT_FALSE(HexStringToInt(input_string, &output));
    691   EXPECT_EQ(0xc0ffee, output);
    692 
    693   std::wstring wide_input = ASCIIToWide(input_string);
    694   EXPECT_FALSE(HexStringToInt(WideToUTF16Hack(wide_input), &output));
    695   EXPECT_EQ(0xc0ffee, output);
    696 }
    697 
    698 TEST(StringUtilTest, HexStringToBytes) {
    699   static const struct {
    700     const std::string input;
    701     const char* output;
    702     size_t output_len;
    703     bool success;
    704   } cases[] = {
    705     {"0", "", 0, false},  // odd number of characters fails
    706     {"00", "\0", 1, true},
    707     {"42", "\x42", 1, true},
    708     {"-42", "", 0, false},  // any non-hex value fails
    709     {"+42", "", 0, false},
    710     {"7fffffff", "\x7f\xff\xff\xff", 4, true},
    711     {"80000000", "\x80\0\0\0", 4, true},
    712     {"deadbeef", "\xde\xad\xbe\xef", 4, true},
    713     {"DeadBeef", "\xde\xad\xbe\xef", 4, true},
    714     {"0x42", "", 0, false},  // leading 0x fails (x is not hex)
    715     {"0f", "\xf", 1, true},
    716     {"45  ", "\x45", 1, false},
    717     {"efgh", "\xef", 1, false},
    718     {"", "", 0, false},
    719     {"0123456789ABCDEF", "\x01\x23\x45\x67\x89\xAB\xCD\xEF", 8, true},
    720     {"0123456789ABCDEF012345",
    721      "\x01\x23\x45\x67\x89\xAB\xCD\xEF\x01\x23\x45", 11, true},
    722   };
    723 
    724 
    725   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    726     std::vector<uint8> output;
    727     std::vector<uint8> compare;
    728     EXPECT_EQ(cases[i].success, HexStringToBytes(cases[i].input, &output)) <<
    729         i << ": " << cases[i].input;
    730     for (size_t j = 0; j < cases[i].output_len; ++j)
    731       compare.push_back(static_cast<uint8>(cases[i].output[j]));
    732     ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input;
    733     EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) <<
    734         i << ": " << cases[i].input;
    735 
    736     output.clear();
    737     compare.clear();
    738 
    739     std::wstring wide_input = ASCIIToWide(cases[i].input);
    740     EXPECT_EQ(cases[i].success,
    741               HexStringToBytes(WideToUTF16Hack(wide_input), &output)) <<
    742         i << ": " << cases[i].input;
    743     for (size_t j = 0; j < cases[i].output_len; ++j)
    744       compare.push_back(static_cast<uint8>(cases[i].output[j]));
    745     ASSERT_EQ(output.size(), compare.size()) << i << ": " << cases[i].input;
    746     EXPECT_TRUE(std::equal(output.begin(), output.end(), compare.begin())) <<
    747         i << ": " << cases[i].input;
    748   }
    749 }
    750 
    751 TEST(StringUtilTest, StringToDouble) {
    752   static const struct {
    753     std::string input;
    754     double output;
    755     bool success;
    756   } cases[] = {
    757     {"0", 0.0, true},
    758     {"42", 42.0, true},
    759     {"-42", -42.0, true},
    760     {"123.45", 123.45, true},
    761     {"-123.45", -123.45, true},
    762     {"+123.45", 123.45, true},
    763     {"2.99792458e8", 299792458.0, true},
    764     {"149597870.691E+3", 149597870691.0, true},
    765     {"6.", 6.0, true},
    766     {"9e99999999999999999999", HUGE_VAL, false},
    767     {"-9e99999999999999999999", -HUGE_VAL, false},
    768     {"1e-2", 0.01, true},
    769     {" 1e-2", 0.01, false},
    770     {"1e-2 ", 0.01, false},
    771     {"-1E-7", -0.0000001, true},
    772     {"01e02", 100, true},
    773     {"2.3e15", 2.3e15, true},
    774     {"\t\n\v\f\r -123.45e2", -12345.0, false},
    775     {"+123 e4", 123.0, false},
    776     {"123e ", 123.0, false},
    777     {"123e", 123.0, false},
    778     {" 2.99", 2.99, false},
    779     {"1e3.4", 1000.0, false},
    780     {"nothing", 0.0, false},
    781     {"-", 0.0, false},
    782     {"+", 0.0, false},
    783     {"", 0.0, false},
    784   };
    785 
    786   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
    787     EXPECT_DOUBLE_EQ(cases[i].output, StringToDouble(cases[i].input));
    788     double output;
    789     EXPECT_EQ(cases[i].success, StringToDouble(cases[i].input, &output));
    790     EXPECT_DOUBLE_EQ(cases[i].output, output);
    791 
    792     std::wstring wide_input = ASCIIToWide(cases[i].input);
    793     EXPECT_DOUBLE_EQ(cases[i].output,
    794                      StringToDouble(WideToUTF16Hack(wide_input)));
    795     EXPECT_EQ(cases[i].success, StringToDouble(WideToUTF16Hack(wide_input),
    796                                                &output));
    797     EXPECT_DOUBLE_EQ(cases[i].output, output);
    798   }
    799 
    800   // One additional test to verify that conversion of numbers in strings with
    801   // embedded NUL characters.  The NUL and extra data after it should be
    802   // interpreted as junk after the number.
    803   const char input[] = "3.14\0159";
    804   std::string input_string(input, arraysize(input) - 1);
    805   double output;
    806   EXPECT_FALSE(StringToDouble(input_string, &output));
    807   EXPECT_DOUBLE_EQ(3.14, output);
    808 
    809   std::wstring wide_input = ASCIIToWide(input_string);
    810   EXPECT_FALSE(StringToDouble(WideToUTF16Hack(wide_input), &output));
    811   EXPECT_DOUBLE_EQ(3.14, output);
    812 }
    813 
    814 // This checks where we can use the assignment operator for a va_list. We need
    815 // a way to do this since Visual C doesn't support va_copy, but assignment on
    816 // va_list is not guaranteed to be a copy. See StringAppendVT which uses this
    817 // capability.
    818 static void VariableArgsFunc(const char* format, ...) {
    819   va_list org;
    820   va_start(org, format);
    821 
    822   va_list dup;
    823   GG_VA_COPY(dup, org);
    824   int i1 = va_arg(org, int);
    825   int j1 = va_arg(org, int);
    826   char* s1 = va_arg(org, char*);
    827   double d1 = va_arg(org, double);
    828   va_end(org);
    829 
    830   int i2 = va_arg(dup, int);
    831   int j2 = va_arg(dup, int);
    832   char* s2 = va_arg(dup, char*);
    833   double d2 = va_arg(dup, double);
    834 
    835   EXPECT_EQ(i1, i2);
    836   EXPECT_EQ(j1, j2);
    837   EXPECT_STREQ(s1, s2);
    838   EXPECT_EQ(d1, d2);
    839 
    840   va_end(dup);
    841 }
    842 
    843 TEST(StringUtilTest, VAList) {
    844   VariableArgsFunc("%d %d %s %lf", 45, 92, "This is interesting", 9.21);
    845 }
    846 
    847 TEST(StringUtilTest, StringPrintfEmpty) {
    848   EXPECT_EQ("", StringPrintf("%s", ""));
    849 }
    850 
    851 TEST(StringUtilTest, StringPrintfMisc) {
    852   EXPECT_EQ("123hello w", StringPrintf("%3d%2s %1c", 123, "hello", 'w'));
    853   EXPECT_EQ(L"123hello w", StringPrintf(L"%3d%2ls %1lc", 123, L"hello", 'w'));
    854 }
    855 
    856 TEST(StringUtilTest, StringAppendfEmptyString) {
    857   std::string value("Hello");
    858   StringAppendF(&value, "%s", "");
    859   EXPECT_EQ("Hello", value);
    860 
    861   std::wstring valuew(L"Hello");
    862   StringAppendF(&valuew, L"%ls", L"");
    863   EXPECT_EQ(L"Hello", valuew);
    864 }
    865 
    866 TEST(StringUtilTest, StringAppendfString) {
    867   std::string value("Hello");
    868   StringAppendF(&value, " %s", "World");
    869   EXPECT_EQ("Hello World", value);
    870 
    871   std::wstring valuew(L"Hello");
    872   StringAppendF(&valuew, L" %ls", L"World");
    873   EXPECT_EQ(L"Hello World", valuew);
    874 }
    875 
    876 TEST(StringUtilTest, StringAppendfInt) {
    877   std::string value("Hello");
    878   StringAppendF(&value, " %d", 123);
    879   EXPECT_EQ("Hello 123", value);
    880 
    881   std::wstring valuew(L"Hello");
    882   StringAppendF(&valuew, L" %d", 123);
    883   EXPECT_EQ(L"Hello 123", valuew);
    884 }
    885 
    886 // Make sure that lengths exactly around the initial buffer size are handled
    887 // correctly.
    888 TEST(StringUtilTest, StringPrintfBounds) {
    889   const int kSrcLen = 1026;
    890   char src[kSrcLen];
    891   for (size_t i = 0; i < arraysize(src); i++)
    892     src[i] = 'A';
    893 
    894   wchar_t srcw[kSrcLen];
    895   for (size_t i = 0; i < arraysize(srcw); i++)
    896     srcw[i] = 'A';
    897 
    898   for (int i = 1; i < 3; i++) {
    899     src[kSrcLen - i] = 0;
    900     std::string out;
    901     SStringPrintf(&out, "%s", src);
    902     EXPECT_STREQ(src, out.c_str());
    903 
    904     srcw[kSrcLen - i] = 0;
    905     std::wstring outw;
    906     SStringPrintf(&outw, L"%ls", srcw);
    907     EXPECT_STREQ(srcw, outw.c_str());
    908   }
    909 }
    910 
    911 // Test very large sprintfs that will cause the buffer to grow.
    912 TEST(StringUtilTest, Grow) {
    913   char src[1026];
    914   for (size_t i = 0; i < arraysize(src); i++)
    915     src[i] = 'A';
    916   src[1025] = 0;
    917 
    918   const char* fmt = "%sB%sB%sB%sB%sB%sB%s";
    919 
    920   std::string out;
    921   SStringPrintf(&out, fmt, src, src, src, src, src, src, src);
    922 
    923   const int kRefSize = 320000;
    924   char* ref = new char[kRefSize];
    925 #if defined(OS_WIN)
    926   sprintf_s(ref, kRefSize, fmt, src, src, src, src, src, src, src);
    927 #elif defined(OS_POSIX)
    928   snprintf(ref, kRefSize, fmt, src, src, src, src, src, src, src);
    929 #endif
    930 
    931   EXPECT_STREQ(ref, out.c_str());
    932   delete[] ref;
    933 }
    934 
    935 // A helper for the StringAppendV test that follows.
    936 // Just forwards its args to StringAppendV.
    937 static void StringAppendVTestHelper(std::string* out,
    938                                     const char* format,
    939                                     ...) PRINTF_FORMAT(2, 3);
    940 
    941 static void StringAppendVTestHelper(std::string* out, const char* format, ...) {
    942   va_list ap;
    943   va_start(ap, format);
    944   StringAppendV(out, format, ap);
    945   va_end(ap);
    946 }
    947 
    948 TEST(StringUtilTest, StringAppendV) {
    949   std::string out;
    950   StringAppendVTestHelper(&out, "%d foo %s", 1, "bar");
    951   EXPECT_EQ("1 foo bar", out);
    952 }
    953 
    954 // Test the boundary condition for the size of the string_util's
    955 // internal buffer.
    956 TEST(StringUtilTest, GrowBoundary) {
    957   const int string_util_buf_len = 1024;
    958   // Our buffer should be one larger than the size of StringAppendVT's stack
    959   // buffer.
    960   const int buf_len = string_util_buf_len + 1;
    961   char src[buf_len + 1];  // Need extra one for NULL-terminator.
    962   for (int i = 0; i < buf_len; ++i)
    963     src[i] = 'a';
    964   src[buf_len] = 0;
    965 
    966   std::string out;
    967   SStringPrintf(&out, "%s", src);
    968 
    969   EXPECT_STREQ(src, out.c_str());
    970 }
    971 
    972 // TODO(evanm): what's the proper cross-platform test here?
    973 #if defined(OS_WIN)
    974 // sprintf in Visual Studio fails when given U+FFFF. This tests that the
    975 // failure case is gracefuly handled.
    976 TEST(StringUtilTest, Invalid) {
    977   wchar_t invalid[2];
    978   invalid[0] = 0xffff;
    979   invalid[1] = 0;
    980 
    981   std::wstring out;
    982   SStringPrintf(&out, L"%ls", invalid);
    983   EXPECT_STREQ(L"", out.c_str());
    984 }
    985 #endif
    986 
    987 // Test for SplitString
    988 TEST(StringUtilTest, SplitString) {
    989   std::vector<std::wstring> r;
    990 
    991   SplitString(L"a,b,c", L',', &r);
    992   ASSERT_EQ(3U, r.size());
    993   EXPECT_EQ(r[0], L"a");
    994   EXPECT_EQ(r[1], L"b");
    995   EXPECT_EQ(r[2], L"c");
    996   r.clear();
    997 
    998   SplitString(L"a, b, c", L',', &r);
    999   ASSERT_EQ(3U, r.size());
   1000   EXPECT_EQ(r[0], L"a");
   1001   EXPECT_EQ(r[1], L"b");
   1002   EXPECT_EQ(r[2], L"c");
   1003   r.clear();
   1004 
   1005   SplitString(L"a,,c", L',', &r);
   1006   ASSERT_EQ(3U, r.size());
   1007   EXPECT_EQ(r[0], L"a");
   1008   EXPECT_EQ(r[1], L"");
   1009   EXPECT_EQ(r[2], L"c");
   1010   r.clear();
   1011 
   1012   SplitString(L"", L'*', &r);
   1013   ASSERT_EQ(1U, r.size());
   1014   EXPECT_EQ(r[0], L"");
   1015   r.clear();
   1016 
   1017   SplitString(L"foo", L'*', &r);
   1018   ASSERT_EQ(1U, r.size());
   1019   EXPECT_EQ(r[0], L"foo");
   1020   r.clear();
   1021 
   1022   SplitString(L"foo ,", L',', &r);
   1023   ASSERT_EQ(2U, r.size());
   1024   EXPECT_EQ(r[0], L"foo");
   1025   EXPECT_EQ(r[1], L"");
   1026   r.clear();
   1027 
   1028   SplitString(L",", L',', &r);
   1029   ASSERT_EQ(2U, r.size());
   1030   EXPECT_EQ(r[0], L"");
   1031   EXPECT_EQ(r[1], L"");
   1032   r.clear();
   1033 
   1034   SplitString(L"\t\ta\t", L'\t', &r);
   1035   ASSERT_EQ(4U, r.size());
   1036   EXPECT_EQ(r[0], L"");
   1037   EXPECT_EQ(r[1], L"");
   1038   EXPECT_EQ(r[2], L"a");
   1039   EXPECT_EQ(r[3], L"");
   1040   r.clear();
   1041 
   1042   SplitStringDontTrim(L"\t\ta\t", L'\t', &r);
   1043   ASSERT_EQ(4U, r.size());
   1044   EXPECT_EQ(r[0], L"");
   1045   EXPECT_EQ(r[1], L"");
   1046   EXPECT_EQ(r[2], L"a");
   1047   EXPECT_EQ(r[3], L"");
   1048   r.clear();
   1049 
   1050   SplitString(L"\ta\t\nb\tcc", L'\n', &r);
   1051   ASSERT_EQ(2U, r.size());
   1052   EXPECT_EQ(r[0], L"a");
   1053   EXPECT_EQ(r[1], L"b\tcc");
   1054   r.clear();
   1055 
   1056   SplitStringDontTrim(L"\ta\t\nb\tcc", L'\n', &r);
   1057   ASSERT_EQ(2U, r.size());
   1058   EXPECT_EQ(r[0], L"\ta\t");
   1059   EXPECT_EQ(r[1], L"b\tcc");
   1060   r.clear();
   1061 }
   1062 
   1063 // Test for Tokenize
   1064 TEST(StringUtilTest, Tokenize) {
   1065   std::vector<std::string> r;
   1066   size_t size;
   1067 
   1068   size = Tokenize("This is a string", " ", &r);
   1069   EXPECT_EQ(4U, size);
   1070   ASSERT_EQ(4U, r.size());
   1071   EXPECT_EQ(r[0], "This");
   1072   EXPECT_EQ(r[1], "is");
   1073   EXPECT_EQ(r[2], "a");
   1074   EXPECT_EQ(r[3], "string");
   1075   r.clear();
   1076 
   1077   size = Tokenize("one,two,three", ",", &r);
   1078   EXPECT_EQ(3U, size);
   1079   ASSERT_EQ(3U, r.size());
   1080   EXPECT_EQ(r[0], "one");
   1081   EXPECT_EQ(r[1], "two");
   1082   EXPECT_EQ(r[2], "three");
   1083   r.clear();
   1084 
   1085   size = Tokenize("one,two:three;four", ",:", &r);
   1086   EXPECT_EQ(3U, size);
   1087   ASSERT_EQ(3U, r.size());
   1088   EXPECT_EQ(r[0], "one");
   1089   EXPECT_EQ(r[1], "two");
   1090   EXPECT_EQ(r[2], "three;four");
   1091   r.clear();
   1092 
   1093   size = Tokenize("one,two:three;four", ";,:", &r);
   1094   EXPECT_EQ(4U, size);
   1095   ASSERT_EQ(4U, r.size());
   1096   EXPECT_EQ(r[0], "one");
   1097   EXPECT_EQ(r[1], "two");
   1098   EXPECT_EQ(r[2], "three");
   1099   EXPECT_EQ(r[3], "four");
   1100   r.clear();
   1101 
   1102   size = Tokenize("one, two, three", ",", &r);
   1103   EXPECT_EQ(3U, size);
   1104   ASSERT_EQ(3U, r.size());
   1105   EXPECT_EQ(r[0], "one");
   1106   EXPECT_EQ(r[1], " two");
   1107   EXPECT_EQ(r[2], " three");
   1108   r.clear();
   1109 
   1110   size = Tokenize("one, two, three, ", ",", &r);
   1111   EXPECT_EQ(4U, size);
   1112   ASSERT_EQ(4U, r.size());
   1113   EXPECT_EQ(r[0], "one");
   1114   EXPECT_EQ(r[1], " two");
   1115   EXPECT_EQ(r[2], " three");
   1116   EXPECT_EQ(r[3], " ");
   1117   r.clear();
   1118 
   1119   size = Tokenize("one, two, three,", ",", &r);
   1120   EXPECT_EQ(3U, size);
   1121   ASSERT_EQ(3U, r.size());
   1122   EXPECT_EQ(r[0], "one");
   1123   EXPECT_EQ(r[1], " two");
   1124   EXPECT_EQ(r[2], " three");
   1125   r.clear();
   1126 
   1127   size = Tokenize("", ",", &r);
   1128   EXPECT_EQ(0U, size);
   1129   ASSERT_EQ(0U, r.size());
   1130   r.clear();
   1131 
   1132   size = Tokenize(",", ",", &r);
   1133   EXPECT_EQ(0U, size);
   1134   ASSERT_EQ(0U, r.size());
   1135   r.clear();
   1136 
   1137   size = Tokenize(",;:.", ".:;,", &r);
   1138   EXPECT_EQ(0U, size);
   1139   ASSERT_EQ(0U, r.size());
   1140   r.clear();
   1141 
   1142   size = Tokenize("\t\ta\t", "\t", &r);
   1143   EXPECT_EQ(1U, size);
   1144   ASSERT_EQ(1U, r.size());
   1145   EXPECT_EQ(r[0], "a");
   1146   r.clear();
   1147 
   1148   size = Tokenize("\ta\t\nb\tcc", "\n", &r);
   1149   EXPECT_EQ(2U, size);
   1150   ASSERT_EQ(2U, r.size());
   1151   EXPECT_EQ(r[0], "\ta\t");
   1152   EXPECT_EQ(r[1], "b\tcc");
   1153   r.clear();
   1154 }
   1155 
   1156 // Test for JoinString
   1157 TEST(StringUtilTest, JoinString) {
   1158   std::vector<std::string> in;
   1159   EXPECT_EQ("", JoinString(in, ','));
   1160 
   1161   in.push_back("a");
   1162   EXPECT_EQ("a", JoinString(in, ','));
   1163 
   1164   in.push_back("b");
   1165   in.push_back("c");
   1166   EXPECT_EQ("a,b,c", JoinString(in, ','));
   1167 
   1168   in.push_back("");
   1169   EXPECT_EQ("a,b,c,", JoinString(in, ','));
   1170   in.push_back(" ");
   1171   EXPECT_EQ("a|b|c|| ", JoinString(in, '|'));
   1172 }
   1173 
   1174 TEST(StringUtilTest, StartsWith) {
   1175   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", true));
   1176   EXPECT_FALSE(StartsWithASCII("JavaScript:url", "javascript", true));
   1177   EXPECT_TRUE(StartsWithASCII("javascript:url", "javascript", false));
   1178   EXPECT_TRUE(StartsWithASCII("JavaScript:url", "javascript", false));
   1179   EXPECT_FALSE(StartsWithASCII("java", "javascript", true));
   1180   EXPECT_FALSE(StartsWithASCII("java", "javascript", false));
   1181   EXPECT_FALSE(StartsWithASCII("", "javascript", false));
   1182   EXPECT_FALSE(StartsWithASCII("", "javascript", true));
   1183   EXPECT_TRUE(StartsWithASCII("java", "", false));
   1184   EXPECT_TRUE(StartsWithASCII("java", "", true));
   1185 
   1186   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", true));
   1187   EXPECT_FALSE(StartsWith(L"JavaScript:url", L"javascript", true));
   1188   EXPECT_TRUE(StartsWith(L"javascript:url", L"javascript", false));
   1189   EXPECT_TRUE(StartsWith(L"JavaScript:url", L"javascript", false));
   1190   EXPECT_FALSE(StartsWith(L"java", L"javascript", true));
   1191   EXPECT_FALSE(StartsWith(L"java", L"javascript", false));
   1192   EXPECT_FALSE(StartsWith(L"", L"javascript", false));
   1193   EXPECT_FALSE(StartsWith(L"", L"javascript", true));
   1194   EXPECT_TRUE(StartsWith(L"java", L"", false));
   1195   EXPECT_TRUE(StartsWith(L"java", L"", true));
   1196 }
   1197 
   1198 TEST(StringUtilTest, EndsWith) {
   1199   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", true));
   1200   EXPECT_FALSE(EndsWith(L"Foo.Plugin", L".plugin", true));
   1201   EXPECT_TRUE(EndsWith(L"Foo.plugin", L".plugin", false));
   1202   EXPECT_TRUE(EndsWith(L"Foo.Plugin", L".plugin", false));
   1203   EXPECT_FALSE(EndsWith(L".plug", L".plugin", true));
   1204   EXPECT_FALSE(EndsWith(L".plug", L".plugin", false));
   1205   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", true));
   1206   EXPECT_FALSE(EndsWith(L"Foo.plugin Bar", L".plugin", false));
   1207   EXPECT_FALSE(EndsWith(L"", L".plugin", false));
   1208   EXPECT_FALSE(EndsWith(L"", L".plugin", true));
   1209   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", false));
   1210   EXPECT_TRUE(EndsWith(L"Foo.plugin", L"", true));
   1211   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", false));
   1212   EXPECT_TRUE(EndsWith(L".plugin", L".plugin", true));
   1213   EXPECT_TRUE(EndsWith(L"", L"", false));
   1214   EXPECT_TRUE(EndsWith(L"", L"", true));
   1215 }
   1216 
   1217 TEST(StringUtilTest, GetStringFWithOffsets) {
   1218   std::vector<string16> subst;
   1219   subst.push_back(ASCIIToUTF16("1"));
   1220   subst.push_back(ASCIIToUTF16("2"));
   1221   std::vector<size_t> offsets;
   1222 
   1223   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
   1224                             subst,
   1225                             &offsets);
   1226   EXPECT_EQ(2U, offsets.size());
   1227   EXPECT_EQ(7U, offsets[0]);
   1228   EXPECT_EQ(25U, offsets[1]);
   1229   offsets.clear();
   1230 
   1231   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
   1232                             subst,
   1233                             &offsets);
   1234   EXPECT_EQ(2U, offsets.size());
   1235   EXPECT_EQ(25U, offsets[0]);
   1236   EXPECT_EQ(7U, offsets[1]);
   1237   offsets.clear();
   1238 }
   1239 
   1240 TEST(StringUtilTest, ReplaceStringPlaceholders) {
   1241   std::vector<string16> subst;
   1242   subst.push_back(ASCIIToUTF16("9a"));
   1243   subst.push_back(ASCIIToUTF16("8b"));
   1244   subst.push_back(ASCIIToUTF16("7c"));
   1245   subst.push_back(ASCIIToUTF16("6d"));
   1246   subst.push_back(ASCIIToUTF16("5e"));
   1247   subst.push_back(ASCIIToUTF16("4f"));
   1248   subst.push_back(ASCIIToUTF16("3g"));
   1249   subst.push_back(ASCIIToUTF16("2h"));
   1250   subst.push_back(ASCIIToUTF16("1i"));
   1251 
   1252   string16 formatted =
   1253       ReplaceStringPlaceholders(
   1254           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, NULL);
   1255 
   1256   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"));
   1257 }
   1258 
   1259 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
   1260   // Test whether replacestringplaceholders works as expected when there
   1261   // are fewer inputs than outputs.
   1262   std::vector<string16> subst;
   1263   subst.push_back(ASCIIToUTF16("9a"));
   1264   subst.push_back(ASCIIToUTF16("8b"));
   1265   subst.push_back(ASCIIToUTF16("7c"));
   1266 
   1267   string16 formatted =
   1268       ReplaceStringPlaceholders(
   1269           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, NULL);
   1270 
   1271   EXPECT_EQ(formatted, ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"));
   1272 }
   1273 
   1274 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
   1275   std::vector<std::string> subst;
   1276   subst.push_back("9a");
   1277   subst.push_back("8b");
   1278   subst.push_back("7c");
   1279   subst.push_back("6d");
   1280   subst.push_back("5e");
   1281   subst.push_back("4f");
   1282   subst.push_back("3g");
   1283   subst.push_back("2h");
   1284   subst.push_back("1i");
   1285 
   1286   std::string formatted =
   1287       ReplaceStringPlaceholders(
   1288           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, NULL);
   1289 
   1290   EXPECT_EQ(formatted, "9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii");
   1291 }
   1292 
   1293 TEST(StringUtilTest, SplitStringAlongWhitespace) {
   1294   struct TestData {
   1295     const std::wstring input;
   1296     const size_t expected_result_count;
   1297     const std::wstring output1;
   1298     const std::wstring output2;
   1299   } data[] = {
   1300     { L"a",       1, L"a",  L""   },
   1301     { L" ",       0, L"",   L""   },
   1302     { L" a",      1, L"a",  L""   },
   1303     { L" ab ",    1, L"ab", L""   },
   1304     { L" ab c",   2, L"ab", L"c"  },
   1305     { L" ab c ",  2, L"ab", L"c"  },
   1306     { L" ab cd",  2, L"ab", L"cd" },
   1307     { L" ab cd ", 2, L"ab", L"cd" },
   1308     { L" \ta\t",  1, L"a",  L""   },
   1309     { L" b\ta\t", 2, L"b",  L"a"  },
   1310     { L" b\tat",  2, L"b",  L"at" },
   1311     { L"b\tat",   2, L"b",  L"at" },
   1312     { L"b\t at",  2, L"b",  L"at" },
   1313   };
   1314   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(data); ++i) {
   1315     std::vector<std::wstring> results;
   1316     SplitStringAlongWhitespace(data[i].input, &results);
   1317     ASSERT_EQ(data[i].expected_result_count, results.size());
   1318     if (data[i].expected_result_count > 0)
   1319       ASSERT_EQ(data[i].output1, results[0]);
   1320     if (data[i].expected_result_count > 1)
   1321       ASSERT_EQ(data[i].output2, results[1]);
   1322   }
   1323 }
   1324 
   1325 TEST(StringUtilTest, MatchPatternTest) {
   1326   EXPECT_EQ(MatchPatternASCII("www.google.com", "*.com"), true);
   1327   EXPECT_EQ(MatchPatternASCII("www.google.com", "*"), true);
   1328   EXPECT_EQ(MatchPatternASCII("www.google.com", "www*.g*.org"), false);
   1329   EXPECT_EQ(MatchPatternASCII("Hello", "H?l?o"), true);
   1330   EXPECT_EQ(MatchPatternASCII("www.google.com", "http://*)"), false);
   1331   EXPECT_EQ(MatchPatternASCII("www.msn.com", "*.COM"), false);
   1332   EXPECT_EQ(MatchPatternASCII("Hello*1234", "He??o\\*1*"), true);
   1333   EXPECT_EQ(MatchPatternASCII("", "*.*"), false);
   1334   EXPECT_EQ(MatchPatternASCII("", "*"), true);
   1335   EXPECT_EQ(MatchPatternASCII("", "?"), true);
   1336   EXPECT_EQ(MatchPatternASCII("", ""), true);
   1337   EXPECT_EQ(MatchPatternASCII("Hello", ""), false);
   1338   EXPECT_EQ(MatchPatternASCII("Hello*", "Hello*"), true);
   1339   // Stop after a certain recursion depth.
   1340   EXPECT_EQ(MatchPatternASCII("12345678901234567890", "???????????????????*"),
   1341                               false);
   1342 }
   1343 
   1344 TEST(StringUtilTest, LcpyTest) {
   1345   // Test the normal case where we fit in our buffer.
   1346   {
   1347     char dst[10];
   1348     wchar_t wdst[10];
   1349     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1350     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1351     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1352     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1353   }
   1354 
   1355   // Test dst_size == 0, nothing should be written to |dst| and we should
   1356   // have the equivalent of strlen(src).
   1357   {
   1358     char dst[2] = {1, 2};
   1359     wchar_t wdst[2] = {1, 2};
   1360     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", 0));
   1361     EXPECT_EQ(1, dst[0]);
   1362     EXPECT_EQ(2, dst[1]);
   1363     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", 0));
   1364 #if defined(WCHAR_T_IS_UNSIGNED)
   1365     EXPECT_EQ(1U, wdst[0]);
   1366     EXPECT_EQ(2U, wdst[1]);
   1367 #else
   1368     EXPECT_EQ(1, wdst[0]);
   1369     EXPECT_EQ(2, wdst[1]);
   1370 #endif
   1371   }
   1372 
   1373   // Test the case were we _just_ competely fit including the null.
   1374   {
   1375     char dst[8];
   1376     wchar_t wdst[8];
   1377     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1378     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1379     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1380     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1381   }
   1382 
   1383   // Test the case were we we are one smaller, so we can't fit the null.
   1384   {
   1385     char dst[7];
   1386     wchar_t wdst[7];
   1387     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1388     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
   1389     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1390     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
   1391   }
   1392 
   1393   // Test the case were we are just too small.
   1394   {
   1395     char dst[3];
   1396     wchar_t wdst[3];
   1397     EXPECT_EQ(7U, base::strlcpy(dst, "abcdefg", arraysize(dst)));
   1398     EXPECT_EQ(0, memcmp(dst, "ab", 3));
   1399     EXPECT_EQ(7U, base::wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1400     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
   1401   }
   1402 }
   1403 
   1404 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
   1405   struct TestData {
   1406     const wchar_t* input;
   1407     bool portable;
   1408   } cases[] = {
   1409     { L"%ls", true },
   1410     { L"%s", false },
   1411     { L"%S", false },
   1412     { L"%lS", false },
   1413     { L"Hello, %s", false },
   1414     { L"%lc", true },
   1415     { L"%c", false },
   1416     { L"%C", false },
   1417     { L"%lC", false },
   1418     { L"%ls %s", false },
   1419     { L"%s %ls", false },
   1420     { L"%s %ls %s", false },
   1421     { L"%f", true },
   1422     { L"%f %F", false },
   1423     { L"%d %D", false },
   1424     { L"%o %O", false },
   1425     { L"%u %U", false },
   1426     { L"%f %d %o %u", true },
   1427     { L"%-8d (%02.1f%)", true },
   1428     { L"% 10s", false },
   1429     { L"% 10ls", true }
   1430   };
   1431   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
   1432     EXPECT_EQ(cases[i].portable, base::IsWprintfFormatPortable(cases[i].input));
   1433   }
   1434 }
   1435 
   1436 TEST(StringUtilTest, ElideString) {
   1437   struct TestData {
   1438     const wchar_t* input;
   1439     int max_len;
   1440     bool result;
   1441     const wchar_t* output;
   1442   } cases[] = {
   1443     { L"Hello", 0, true, L"" },
   1444     { L"", 0, false, L"" },
   1445     { L"Hello, my name is Tom", 1, true, L"H" },
   1446     { L"Hello, my name is Tom", 2, true, L"He" },
   1447     { L"Hello, my name is Tom", 3, true, L"H.m" },
   1448     { L"Hello, my name is Tom", 4, true, L"H..m" },
   1449     { L"Hello, my name is Tom", 5, true, L"H...m" },
   1450     { L"Hello, my name is Tom", 6, true, L"He...m" },
   1451     { L"Hello, my name is Tom", 7, true, L"He...om" },
   1452     { L"Hello, my name is Tom", 10, true, L"Hell...Tom" },
   1453     { L"Hello, my name is Tom", 100, false, L"Hello, my name is Tom" }
   1454   };
   1455   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(cases); ++i) {
   1456     std::wstring output;
   1457     EXPECT_EQ(cases[i].result,
   1458               ElideString(cases[i].input, cases[i].max_len, &output));
   1459     EXPECT_TRUE(output == cases[i].output);
   1460   }
   1461 }
   1462 
   1463 TEST(StringUtilTest, HexEncode) {
   1464   std::string hex(HexEncode(NULL, 0));
   1465   EXPECT_EQ(hex.length(), 0U);
   1466   unsigned char bytes[] = {0x01, 0xff, 0x02, 0xfe, 0x03, 0x80, 0x81};
   1467   hex = HexEncode(bytes, sizeof(bytes));
   1468   EXPECT_EQ(hex.compare("01FF02FE038081"), 0);
   1469 }
   1470 
   1471 }  // namaspace base
   1472