Home | History | Annotate | Download | only in strings
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_util.h"
      6 
      7 #include <math.h>
      8 #include <stdarg.h>
      9 #include <stddef.h>
     10 #include <stdint.h>
     11 
     12 #include <algorithm>
     13 
     14 #include "base/macros.h"
     15 #include "base/strings/string16.h"
     16 #include "base/strings/utf_string_conversions.h"
     17 #include "testing/gmock/include/gmock/gmock.h"
     18 #include "testing/gtest/include/gtest/gtest.h"
     19 
     20 using ::testing::ElementsAre;
     21 
     22 namespace base {
     23 
     24 static const struct trim_case {
     25   const wchar_t* input;
     26   const TrimPositions positions;
     27   const wchar_t* output;
     28   const TrimPositions return_value;
     29 } trim_cases[] = {
     30   {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING},
     31   {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING},
     32   {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL},
     33   {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE},
     34   {L"", TRIM_ALL, L"", TRIM_NONE},
     35   {L"  ", TRIM_LEADING, L"", TRIM_LEADING},
     36   {L"  ", TRIM_TRAILING, L"", TRIM_TRAILING},
     37   {L"  ", TRIM_ALL, L"", TRIM_ALL},
     38   {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL},
     39   {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL},
     40 };
     41 
     42 static const struct trim_case_ascii {
     43   const char* input;
     44   const TrimPositions positions;
     45   const char* output;
     46   const TrimPositions return_value;
     47 } trim_cases_ascii[] = {
     48   {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING},
     49   {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING},
     50   {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL},
     51   {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE},
     52   {"", TRIM_ALL, "", TRIM_NONE},
     53   {"  ", TRIM_LEADING, "", TRIM_LEADING},
     54   {"  ", TRIM_TRAILING, "", TRIM_TRAILING},
     55   {"  ", TRIM_ALL, "", TRIM_ALL},
     56   {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL},
     57 };
     58 
     59 namespace {
     60 
     61 // Helper used to test TruncateUTF8ToByteSize.
     62 bool Truncated(const std::string& input,
     63                const size_t byte_size,
     64                std::string* output) {
     65     size_t prev = input.length();
     66     TruncateUTF8ToByteSize(input, byte_size, output);
     67     return prev != output->length();
     68 }
     69 
     70 }  // namespace
     71 
     72 TEST(StringUtilTest, TruncateUTF8ToByteSize) {
     73   std::string output;
     74 
     75   // Empty strings and invalid byte_size arguments
     76   EXPECT_FALSE(Truncated(std::string(), 0, &output));
     77   EXPECT_EQ(output, "");
     78   EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output));
     79   EXPECT_EQ(output, "");
     80   EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output));
     81   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output));
     82 
     83   // Testing the truncation of valid UTF8 correctly
     84   EXPECT_TRUE(Truncated("abc", 2, &output));
     85   EXPECT_EQ(output, "ab");
     86   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output));
     87   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     88   EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output));
     89   EXPECT_EQ(output.compare("\xc2\x81"), 0);
     90   EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output));
     91   EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0);
     92 
     93   {
     94     const char array[] = "\x00\x00\xc2\x81\xc2\x81";
     95     const std::string array_string(array, arraysize(array));
     96     EXPECT_TRUE(Truncated(array_string, 4, &output));
     97     EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0);
     98   }
     99 
    100   {
    101     const char array[] = "\x00\xc2\x81\xc2\x81";
    102     const std::string array_string(array, arraysize(array));
    103     EXPECT_TRUE(Truncated(array_string, 4, &output));
    104     EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0);
    105   }
    106 
    107   // Testing invalid UTF8
    108   EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output));
    109   EXPECT_EQ(output.compare(""), 0);
    110   EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output));
    111   EXPECT_EQ(output.compare(""), 0);
    112   EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output));
    113   EXPECT_EQ(output.compare(""), 0);
    114 
    115   // Testing invalid UTF8 mixed with valid UTF8
    116   EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output));
    117   EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0);
    118   EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output));
    119   EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0);
    120   EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf",
    121               10, &output));
    122   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0);
    123   EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0",
    124               10, &output));
    125   EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0);
    126   EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output));
    127   EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0);
    128 
    129   // Overlong sequences
    130   EXPECT_TRUE(Truncated("\xc0\x80", 2, &output));
    131   EXPECT_EQ(output.compare(""), 0);
    132   EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output));
    133   EXPECT_EQ(output.compare(""), 0);
    134   EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output));
    135   EXPECT_EQ(output.compare(""), 0);
    136   EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output));
    137   EXPECT_EQ(output.compare(""), 0);
    138   EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output));
    139   EXPECT_EQ(output.compare(""), 0);
    140   EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output));
    141   EXPECT_EQ(output.compare(""), 0);
    142   EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output));
    143   EXPECT_EQ(output.compare(""), 0);
    144   EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output));
    145   EXPECT_EQ(output.compare(""), 0);
    146   EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output));
    147   EXPECT_EQ(output.compare(""), 0);
    148   EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output));
    149   EXPECT_EQ(output.compare(""), 0);
    150   EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output));
    151   EXPECT_EQ(output.compare(""), 0);
    152 
    153   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    154   EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output));
    155   EXPECT_EQ(output.compare(""), 0);
    156   EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output));
    157   EXPECT_EQ(output.compare(""), 0);
    158   EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output));
    159   EXPECT_EQ(output.compare(""), 0);
    160 
    161   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    162   EXPECT_TRUE(Truncated("\xfe\xff", 2, &output));
    163   EXPECT_EQ(output.compare(""), 0);
    164   EXPECT_TRUE(Truncated("\xff\xfe", 2, &output));
    165   EXPECT_EQ(output.compare(""), 0);
    166 
    167   {
    168     const char array[] = "\x00\x00\xfe\xff";
    169     const std::string array_string(array, arraysize(array));
    170     EXPECT_TRUE(Truncated(array_string, 4, &output));
    171     EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0);
    172   }
    173 
    174   // Variants on the previous test
    175   {
    176     const char array[] = "\xff\xfe\x00\x00";
    177     const std::string array_string(array, 4);
    178     EXPECT_FALSE(Truncated(array_string, 4, &output));
    179     EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0);
    180   }
    181   {
    182     const char array[] = "\xff\x00\x00\xfe";
    183     const std::string array_string(array, arraysize(array));
    184     EXPECT_TRUE(Truncated(array_string, 4, &output));
    185     EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0);
    186   }
    187 
    188   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    189   EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output));
    190   EXPECT_EQ(output.compare(""), 0);
    191   EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output));
    192   EXPECT_EQ(output.compare(""), 0);
    193   EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output));
    194   EXPECT_EQ(output.compare(""), 0);
    195   EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output));
    196   EXPECT_EQ(output.compare(""), 0);
    197   EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output));
    198   EXPECT_EQ(output.compare(""), 0);
    199 
    200   // Strings in legacy encodings that are valid in UTF-8, but
    201   // are invalid as UTF-8 in real data.
    202   EXPECT_TRUE(Truncated("caf\xe9", 4, &output));
    203   EXPECT_EQ(output.compare("caf"), 0);
    204   EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output));
    205   EXPECT_EQ(output.compare(""), 0);
    206   EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output));
    207   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    208   EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7,
    209               &output));
    210   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    211 
    212   // Testing using the same string as input and output.
    213   EXPECT_FALSE(Truncated(output, 4, &output));
    214   EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0);
    215   EXPECT_TRUE(Truncated(output, 3, &output));
    216   EXPECT_EQ(output.compare("\xa7\x41"), 0);
    217 
    218   // "abc" with U+201[CD] in windows-125[0-8]
    219   EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output));
    220   EXPECT_EQ(output.compare("\x93" "abc"), 0);
    221 
    222   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    223   EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output));
    224   EXPECT_EQ(output.compare(""), 0);
    225 
    226   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    227   EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output));
    228   EXPECT_EQ(output.compare(""), 0);
    229 }
    230 
    231 TEST(StringUtilTest, TrimWhitespace) {
    232   string16 output;  // Allow contents to carry over to next testcase
    233   for (size_t i = 0; i < arraysize(trim_cases); ++i) {
    234     const trim_case& value = trim_cases[i];
    235     EXPECT_EQ(value.return_value,
    236               TrimWhitespace(WideToUTF16(value.input), value.positions,
    237                              &output));
    238     EXPECT_EQ(WideToUTF16(value.output), output);
    239   }
    240 
    241   // Test that TrimWhitespace() can take the same string for input and output
    242   output = ASCIIToUTF16("  This is a test \r\n");
    243   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    244   EXPECT_EQ(ASCIIToUTF16("This is a test"), output);
    245 
    246   // Once more, but with a string of whitespace
    247   output = ASCIIToUTF16("  \r\n");
    248   EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output));
    249   EXPECT_EQ(string16(), output);
    250 
    251   std::string output_ascii;
    252   for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
    253     const trim_case_ascii& value = trim_cases_ascii[i];
    254     EXPECT_EQ(value.return_value,
    255               TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
    256     EXPECT_EQ(value.output, output_ascii);
    257   }
    258 }
    259 
    260 static const struct collapse_case {
    261   const wchar_t* input;
    262   const bool trim;
    263   const wchar_t* output;
    264 } collapse_cases[] = {
    265   {L" Google Video ", false, L"Google Video"},
    266   {L"Google Video", false, L"Google Video"},
    267   {L"", false, L""},
    268   {L"  ", false, L""},
    269   {L"\t\rTest String\n", false, L"Test String"},
    270   {L"\x2002Test String\x00A0\x3000", false, L"Test String"},
    271   {L"    Test     \n  \t String    ", false, L"Test String"},
    272   {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"},
    273   {L"   Test String", false, L"Test String"},
    274   {L"Test String    ", false, L"Test String"},
    275   {L"Test String", false, L"Test String"},
    276   {L"", true, L""},
    277   {L"\n", true, L""},
    278   {L"  \r  ", true, L""},
    279   {L"\nFoo", true, L"Foo"},
    280   {L"\r  Foo  ", true, L"Foo"},
    281   {L" Foo bar ", true, L"Foo bar"},
    282   {L"  \tFoo  bar  \n", true, L"Foo bar"},
    283   {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"},
    284 };
    285 
    286 TEST(StringUtilTest, CollapseWhitespace) {
    287   for (size_t i = 0; i < arraysize(collapse_cases); ++i) {
    288     const collapse_case& value = collapse_cases[i];
    289     EXPECT_EQ(WideToUTF16(value.output),
    290               CollapseWhitespace(WideToUTF16(value.input), value.trim));
    291   }
    292 }
    293 
    294 static const struct collapse_case_ascii {
    295   const char* input;
    296   const bool trim;
    297   const char* output;
    298 } collapse_cases_ascii[] = {
    299   {" Google Video ", false, "Google Video"},
    300   {"Google Video", false, "Google Video"},
    301   {"", false, ""},
    302   {"  ", false, ""},
    303   {"\t\rTest String\n", false, "Test String"},
    304   {"    Test     \n  \t String    ", false, "Test String"},
    305   {"   Test String", false, "Test String"},
    306   {"Test String    ", false, "Test String"},
    307   {"Test String", false, "Test String"},
    308   {"", true, ""},
    309   {"\n", true, ""},
    310   {"  \r  ", true, ""},
    311   {"\nFoo", true, "Foo"},
    312   {"\r  Foo  ", true, "Foo"},
    313   {" Foo bar ", true, "Foo bar"},
    314   {"  \tFoo  bar  \n", true, "Foo bar"},
    315   {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"},
    316 };
    317 
    318 TEST(StringUtilTest, CollapseWhitespaceASCII) {
    319   for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) {
    320     const collapse_case_ascii& value = collapse_cases_ascii[i];
    321     EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim));
    322   }
    323 }
    324 
    325 TEST(StringUtilTest, IsStringUTF8) {
    326   EXPECT_TRUE(IsStringUTF8("abc"));
    327   EXPECT_TRUE(IsStringUTF8("\xc2\x81"));
    328   EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf"));
    329   EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf"));
    330   EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"));
    331   EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc"));  // UTF-8 BOM
    332 
    333   // surrogate code points
    334   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf"));
    335   EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f"));
    336   EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf"));
    337 
    338   // overlong sequences
    339   EXPECT_FALSE(IsStringUTF8("\xc0\x80"));  // U+0000
    340   EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81"));  // "AB"
    341   EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80"));  // U+0000
    342   EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80"));  // U+0080
    343   EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf"));  // U+07ff
    344   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D"));  // U+000D
    345   EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91"));  // U+0091
    346   EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80"));  // U+0800
    347   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf"));  // U+FEFF (BOM)
    348   EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf"));  // U+003F
    349   EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5"));  // U+00A5
    350 
    351   // Beyond U+10FFFF (the upper limit of Unicode codespace)
    352   EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80"));  // U+110000
    353   EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf"));  // 5 bytes
    354   EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80"));  // 6 bytes
    355 
    356   // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE)
    357   EXPECT_FALSE(IsStringUTF8("\xfe\xff"));
    358   EXPECT_FALSE(IsStringUTF8("\xff\xfe"));
    359   EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4)));
    360   EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00"));
    361 
    362   // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF>
    363   EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe"));  // U+FFFE)
    364   EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe"));  // U+1FFFE
    365   EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf"));  // U+10FFFF
    366   EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90"));  // U+FDD0
    367   EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf"));  // U+FDEF
    368   // Strings in legacy encodings. We can certainly make up strings
    369   // in a legacy encoding that are valid in UTF-8, but in real data,
    370   // most of them are invalid as UTF-8.
    371   EXPECT_FALSE(IsStringUTF8("caf\xe9"));  // cafe with U+00E9 in ISO-8859-1
    372   EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2"));  // U+AC00, U+AC001 in EUC-KR
    373   EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e"));  // U+4F60 U+597D in Big5
    374   // "abc" with U+201[CD] in windows-125[0-8]
    375   EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94"));
    376   // U+0639 U+064E U+0644 U+064E in ISO-8859-6
    377   EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee"));
    378   // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7
    379   EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC"));
    380 
    381   // Check that we support Embedded Nulls. The first uses the canonical UTF-8
    382   // representation, and the second uses a 2-byte sequence. The second version
    383   // is invalid UTF-8 since UTF-8 states that the shortest encoding for a
    384   // given codepoint must be used.
    385   static const char kEmbeddedNull[] = "embedded\0null";
    386   EXPECT_TRUE(IsStringUTF8(
    387       std::string(kEmbeddedNull, sizeof(kEmbeddedNull))));
    388   EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000"));
    389 }
    390 
    391 TEST(StringUtilTest, IsStringASCII) {
    392   static char char_ascii[] =
    393       "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF";
    394   static char16 char16_ascii[] = {
    395       '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A',
    396       'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6',
    397       '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 };
    398   static std::wstring wchar_ascii(
    399       L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF");
    400 
    401   // Test a variety of the fragment start positions and lengths in order to make
    402   // sure that bit masking in IsStringASCII works correctly.
    403   // Also, test that a non-ASCII character will be detected regardless of its
    404   // position inside the string.
    405   {
    406     const size_t string_length = arraysize(char_ascii) - 1;
    407     for (size_t offset = 0; offset < 8; ++offset) {
    408       for (size_t len = 0, max_len = string_length - offset; len < max_len;
    409            ++len) {
    410         EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len)));
    411         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
    412           char_ascii[char_pos] |= '\x80';
    413           EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len)));
    414           char_ascii[char_pos] &= ~'\x80';
    415         }
    416       }
    417     }
    418   }
    419 
    420   {
    421     const size_t string_length = arraysize(char16_ascii) - 1;
    422     for (size_t offset = 0; offset < 4; ++offset) {
    423       for (size_t len = 0, max_len = string_length - offset; len < max_len;
    424            ++len) {
    425         EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len)));
    426         for (size_t char_pos = offset; char_pos < len; ++char_pos) {
    427           char16_ascii[char_pos] |= 0x80;
    428           EXPECT_FALSE(
    429               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
    430           char16_ascii[char_pos] &= ~0x80;
    431           // Also test when the upper half is non-zero.
    432           char16_ascii[char_pos] |= 0x100;
    433           EXPECT_FALSE(
    434               IsStringASCII(StringPiece16(char16_ascii + offset, len)));
    435           char16_ascii[char_pos] &= ~0x100;
    436         }
    437       }
    438     }
    439   }
    440 
    441   {
    442     const size_t string_length = wchar_ascii.length();
    443     for (size_t len = 0; len < string_length; ++len) {
    444       EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len)));
    445       for (size_t char_pos = 0; char_pos < len; ++char_pos) {
    446         wchar_ascii[char_pos] |= 0x80;
    447         EXPECT_FALSE(
    448             IsStringASCII(wchar_ascii.substr(0, len)));
    449         wchar_ascii[char_pos] &= ~0x80;
    450         wchar_ascii[char_pos] |= 0x100;
    451         EXPECT_FALSE(
    452             IsStringASCII(wchar_ascii.substr(0, len)));
    453         wchar_ascii[char_pos] &= ~0x100;
    454 #if defined(WCHAR_T_IS_UTF32)
    455         wchar_ascii[char_pos] |= 0x10000;
    456         EXPECT_FALSE(
    457             IsStringASCII(wchar_ascii.substr(0, len)));
    458         wchar_ascii[char_pos] &= ~0x10000;
    459 #endif  // WCHAR_T_IS_UTF32
    460       }
    461     }
    462   }
    463 }
    464 
    465 TEST(StringUtilTest, ConvertASCII) {
    466   static const char* const char_cases[] = {
    467     "Google Video",
    468     "Hello, world\n",
    469     "0123ABCDwxyz \a\b\t\r\n!+,.~"
    470   };
    471 
    472   static const wchar_t* const wchar_cases[] = {
    473     L"Google Video",
    474     L"Hello, world\n",
    475     L"0123ABCDwxyz \a\b\t\r\n!+,.~"
    476   };
    477 
    478   for (size_t i = 0; i < arraysize(char_cases); ++i) {
    479     EXPECT_TRUE(IsStringASCII(char_cases[i]));
    480     string16 utf16 = ASCIIToUTF16(char_cases[i]);
    481     EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16);
    482 
    483     std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i]));
    484     EXPECT_EQ(char_cases[i], ascii);
    485   }
    486 
    487   EXPECT_FALSE(IsStringASCII("Google \x80Video"));
    488 
    489   // Convert empty strings.
    490   string16 empty16;
    491   std::string empty;
    492   EXPECT_EQ(empty, UTF16ToASCII(empty16));
    493   EXPECT_EQ(empty16, ASCIIToUTF16(empty));
    494 
    495   // Convert strings with an embedded NUL character.
    496   const char chars_with_nul[] = "test\0string";
    497   const int length_with_nul = arraysize(chars_with_nul) - 1;
    498   std::string string_with_nul(chars_with_nul, length_with_nul);
    499   string16 string16_with_nul = ASCIIToUTF16(string_with_nul);
    500   EXPECT_EQ(static_cast<string16::size_type>(length_with_nul),
    501             string16_with_nul.length());
    502   std::string narrow_with_nul = UTF16ToASCII(string16_with_nul);
    503   EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul),
    504             narrow_with_nul.length());
    505   EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul));
    506 }
    507 
    508 TEST(StringUtilTest, ToLowerASCII) {
    509   EXPECT_EQ('c', ToLowerASCII('C'));
    510   EXPECT_EQ('c', ToLowerASCII('c'));
    511   EXPECT_EQ('2', ToLowerASCII('2'));
    512 
    513   EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C')));
    514   EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c')));
    515   EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2')));
    516 
    517   EXPECT_EQ("cc2", ToLowerASCII("Cc2"));
    518   EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2")));
    519 }
    520 
    521 TEST(StringUtilTest, ToUpperASCII) {
    522   EXPECT_EQ('C', ToUpperASCII('C'));
    523   EXPECT_EQ('C', ToUpperASCII('c'));
    524   EXPECT_EQ('2', ToUpperASCII('2'));
    525 
    526   EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C')));
    527   EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c')));
    528   EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2')));
    529 
    530   EXPECT_EQ("CC2", ToUpperASCII("Cc2"));
    531   EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2")));
    532 }
    533 
    534 TEST(StringUtilTest, LowerCaseEqualsASCII) {
    535   static const struct {
    536     const char*    src_a;
    537     const char*    dst;
    538   } lowercase_cases[] = {
    539     { "FoO", "foo" },
    540     { "foo", "foo" },
    541     { "FOO", "foo" },
    542   };
    543 
    544   for (size_t i = 0; i < arraysize(lowercase_cases); ++i) {
    545     EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a),
    546                                      lowercase_cases[i].dst));
    547     EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a,
    548                                      lowercase_cases[i].dst));
    549   }
    550 }
    551 
    552 TEST(StringUtilTest, FormatBytesUnlocalized) {
    553   static const struct {
    554     int64_t bytes;
    555     const char* expected;
    556   } cases[] = {
    557     // Expected behavior: we show one post-decimal digit when we have
    558     // under two pre-decimal digits, except in cases where it makes no
    559     // sense (zero or bytes).
    560     // Since we switch units once we cross the 1000 mark, this keeps
    561     // the display of file sizes or bytes consistently around three
    562     // digits.
    563     {0, "0 B"},
    564     {512, "512 B"},
    565     {1024*1024, "1.0 MB"},
    566     {1024*1024*1024, "1.0 GB"},
    567     {10LL*1024*1024*1024, "10.0 GB"},
    568     {99LL*1024*1024*1024, "99.0 GB"},
    569     {105LL*1024*1024*1024, "105 GB"},
    570     {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"},
    571     {~(1LL << 63), "8192 PB"},
    572 
    573     {99*1024 + 103, "99.1 kB"},
    574     {1024*1024 + 103, "1.0 MB"},
    575     {1024*1024 + 205 * 1024, "1.2 MB"},
    576     {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"},
    577     {10LL*1024*1024*1024, "10.0 GB"},
    578     {100LL*1024*1024*1024, "100 GB"},
    579   };
    580 
    581   for (size_t i = 0; i < arraysize(cases); ++i) {
    582     EXPECT_EQ(ASCIIToUTF16(cases[i].expected),
    583               FormatBytesUnlocalized(cases[i].bytes));
    584   }
    585 }
    586 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) {
    587   static const struct {
    588     const char* str;
    589     string16::size_type start_offset;
    590     const char* find_this;
    591     const char* replace_with;
    592     const char* expected;
    593   } cases[] = {
    594     {"aaa", 0, "a", "b", "bbb"},
    595     {"abb", 0, "ab", "a", "ab"},
    596     {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "},
    597     {"Not found", 0, "x", "0", "Not found"},
    598     {"Not found again", 5, "x", "0", "Not found again"},
    599     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    600      "Four score and seven years agoMakingFour score and seven years agoit"
    601      "Four score and seven years agomuchFour score and seven years agolonger"
    602      "Four score and seven years ago"},
    603     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    604     {"Replace me only me once", 9, "me ", "", "Replace me only once"},
    605     {"abababab", 2, "ab", "c", "abccc"},
    606   };
    607 
    608   for (size_t i = 0; i < arraysize(cases); i++) {
    609     string16 str = ASCIIToUTF16(cases[i].str);
    610     ReplaceSubstringsAfterOffset(&str, cases[i].start_offset,
    611                                  ASCIIToUTF16(cases[i].find_this),
    612                                  ASCIIToUTF16(cases[i].replace_with));
    613     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    614   }
    615 }
    616 
    617 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) {
    618   static const struct {
    619     const char* str;
    620     string16::size_type start_offset;
    621     const char* find_this;
    622     const char* replace_with;
    623     const char* expected;
    624   } cases[] = {
    625     {"aaa", 0, "a", "b", "baa"},
    626     {"abb", 0, "ab", "a", "ab"},
    627     {"Removing some substrings inging", 0, "ing", "",
    628       "Remov some substrings inging"},
    629     {"Not found", 0, "x", "0", "Not found"},
    630     {"Not found again", 5, "x", "0", "Not found again"},
    631     {" Making it much longer ", 0, " ", "Four score and seven years ago",
    632      "Four score and seven years agoMaking it much longer "},
    633     {"Invalid offset", 9999, "t", "foobar", "Invalid offset"},
    634     {"Replace me only me once", 4, "me ", "", "Replace only me once"},
    635     {"abababab", 2, "ab", "c", "abcabab"},
    636   };
    637 
    638   for (size_t i = 0; i < arraysize(cases); i++) {
    639     string16 str = ASCIIToUTF16(cases[i].str);
    640     ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset,
    641                                      ASCIIToUTF16(cases[i].find_this),
    642                                      ASCIIToUTF16(cases[i].replace_with));
    643     EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str);
    644   }
    645 }
    646 
    647 TEST(StringUtilTest, HexDigitToInt) {
    648   EXPECT_EQ(0, HexDigitToInt('0'));
    649   EXPECT_EQ(1, HexDigitToInt('1'));
    650   EXPECT_EQ(2, HexDigitToInt('2'));
    651   EXPECT_EQ(3, HexDigitToInt('3'));
    652   EXPECT_EQ(4, HexDigitToInt('4'));
    653   EXPECT_EQ(5, HexDigitToInt('5'));
    654   EXPECT_EQ(6, HexDigitToInt('6'));
    655   EXPECT_EQ(7, HexDigitToInt('7'));
    656   EXPECT_EQ(8, HexDigitToInt('8'));
    657   EXPECT_EQ(9, HexDigitToInt('9'));
    658   EXPECT_EQ(10, HexDigitToInt('A'));
    659   EXPECT_EQ(11, HexDigitToInt('B'));
    660   EXPECT_EQ(12, HexDigitToInt('C'));
    661   EXPECT_EQ(13, HexDigitToInt('D'));
    662   EXPECT_EQ(14, HexDigitToInt('E'));
    663   EXPECT_EQ(15, HexDigitToInt('F'));
    664 
    665   // Verify the lower case as well.
    666   EXPECT_EQ(10, HexDigitToInt('a'));
    667   EXPECT_EQ(11, HexDigitToInt('b'));
    668   EXPECT_EQ(12, HexDigitToInt('c'));
    669   EXPECT_EQ(13, HexDigitToInt('d'));
    670   EXPECT_EQ(14, HexDigitToInt('e'));
    671   EXPECT_EQ(15, HexDigitToInt('f'));
    672 }
    673 
    674 TEST(StringUtilTest, JoinString) {
    675   std::string separator(", ");
    676   std::vector<std::string> parts;
    677   EXPECT_EQ(std::string(), JoinString(parts, separator));
    678 
    679   parts.push_back(std::string());
    680   EXPECT_EQ(std::string(), JoinString(parts, separator));
    681   parts.clear();
    682 
    683   parts.push_back("a");
    684   EXPECT_EQ("a", JoinString(parts, separator));
    685 
    686   parts.push_back("b");
    687   parts.push_back("c");
    688   EXPECT_EQ("a, b, c", JoinString(parts, separator));
    689 
    690   parts.push_back(std::string());
    691   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
    692   parts.push_back(" ");
    693   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
    694 }
    695 
    696 TEST(StringUtilTest, JoinString16) {
    697   string16 separator = ASCIIToUTF16(", ");
    698   std::vector<string16> parts;
    699   EXPECT_EQ(string16(), JoinString(parts, separator));
    700 
    701   parts.push_back(string16());
    702   EXPECT_EQ(string16(), JoinString(parts, separator));
    703   parts.clear();
    704 
    705   parts.push_back(ASCIIToUTF16("a"));
    706   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
    707 
    708   parts.push_back(ASCIIToUTF16("b"));
    709   parts.push_back(ASCIIToUTF16("c"));
    710   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
    711 
    712   parts.push_back(ASCIIToUTF16(""));
    713   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
    714   parts.push_back(ASCIIToUTF16(" "));
    715   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
    716 }
    717 
    718 TEST(StringUtilTest, JoinStringPiece) {
    719   std::string separator(", ");
    720   std::vector<StringPiece> parts;
    721   EXPECT_EQ(std::string(), JoinString(parts, separator));
    722 
    723   // Test empty first part (https://crbug.com/698073).
    724   parts.push_back(StringPiece());
    725   EXPECT_EQ(std::string(), JoinString(parts, separator));
    726   parts.clear();
    727 
    728   parts.push_back("a");
    729   EXPECT_EQ("a", JoinString(parts, separator));
    730 
    731   parts.push_back("b");
    732   parts.push_back("c");
    733   EXPECT_EQ("a, b, c", JoinString(parts, separator));
    734 
    735   parts.push_back(StringPiece());
    736   EXPECT_EQ("a, b, c, ", JoinString(parts, separator));
    737   parts.push_back(" ");
    738   EXPECT_EQ("a|b|c|| ", JoinString(parts, "|"));
    739 }
    740 
    741 TEST(StringUtilTest, JoinStringPiece16) {
    742   string16 separator = ASCIIToUTF16(", ");
    743   std::vector<StringPiece16> parts;
    744   EXPECT_EQ(string16(), JoinString(parts, separator));
    745 
    746   // Test empty first part (https://crbug.com/698073).
    747   parts.push_back(StringPiece16());
    748   EXPECT_EQ(string16(), JoinString(parts, separator));
    749   parts.clear();
    750 
    751   const string16 kA = ASCIIToUTF16("a");
    752   parts.push_back(kA);
    753   EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator));
    754 
    755   const string16 kB = ASCIIToUTF16("b");
    756   parts.push_back(kB);
    757   const string16 kC = ASCIIToUTF16("c");
    758   parts.push_back(kC);
    759   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator));
    760 
    761   parts.push_back(StringPiece16());
    762   EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator));
    763   const string16 kSpace = ASCIIToUTF16(" ");
    764   parts.push_back(kSpace);
    765   EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|")));
    766 }
    767 
    768 TEST(StringUtilTest, JoinStringInitializerList) {
    769   std::string separator(", ");
    770   EXPECT_EQ(std::string(), JoinString({}, separator));
    771 
    772   // Test empty first part (https://crbug.com/698073).
    773   EXPECT_EQ(std::string(), JoinString({StringPiece()}, separator));
    774 
    775   // With const char*s.
    776   EXPECT_EQ("a", JoinString({"a"}, separator));
    777   EXPECT_EQ("a, b, c", JoinString({"a", "b", "c"}, separator));
    778   EXPECT_EQ("a, b, c, ", JoinString({"a", "b", "c", StringPiece()}, separator));
    779   EXPECT_EQ("a|b|c|| ", JoinString({"a", "b", "c", StringPiece(), " "}, "|"));
    780 
    781   // With std::strings.
    782   const std::string kA = "a";
    783   const std::string kB = "b";
    784   EXPECT_EQ("a, b", JoinString({kA, kB}, separator));
    785 
    786   // With StringPieces.
    787   const StringPiece kPieceA = kA;
    788   const StringPiece kPieceB = kB;
    789   EXPECT_EQ("a, b", JoinString({kPieceA, kPieceB}, separator));
    790 }
    791 
    792 TEST(StringUtilTest, JoinStringInitializerList16) {
    793   string16 separator = ASCIIToUTF16(", ");
    794   EXPECT_EQ(string16(), JoinString({}, separator));
    795 
    796   // Test empty first part (https://crbug.com/698073).
    797   EXPECT_EQ(string16(), JoinString({StringPiece16()}, separator));
    798 
    799   // With string16s.
    800   const string16 kA = ASCIIToUTF16("a");
    801   EXPECT_EQ(ASCIIToUTF16("a"), JoinString({kA}, separator));
    802 
    803   const string16 kB = ASCIIToUTF16("b");
    804   const string16 kC = ASCIIToUTF16("c");
    805   EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString({kA, kB, kC}, separator));
    806 
    807   EXPECT_EQ(ASCIIToUTF16("a, b, c, "),
    808             JoinString({kA, kB, kC, StringPiece16()}, separator));
    809   const string16 kSpace = ASCIIToUTF16(" ");
    810   EXPECT_EQ(
    811       ASCIIToUTF16("a|b|c|| "),
    812       JoinString({kA, kB, kC, StringPiece16(), kSpace}, ASCIIToUTF16("|")));
    813 
    814   // With StringPiece16s.
    815   const StringPiece16 kPieceA = kA;
    816   const StringPiece16 kPieceB = kB;
    817   EXPECT_EQ(ASCIIToUTF16("a, b"), JoinString({kPieceA, kPieceB}, separator));
    818 }
    819 
    820 TEST(StringUtilTest, StartsWith) {
    821   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
    822                          base::CompareCase::SENSITIVE));
    823   EXPECT_FALSE(StartsWith("JavaScript:url", "javascript",
    824                           base::CompareCase::SENSITIVE));
    825   EXPECT_TRUE(StartsWith("javascript:url", "javascript",
    826                          base::CompareCase::INSENSITIVE_ASCII));
    827   EXPECT_TRUE(StartsWith("JavaScript:url", "javascript",
    828                          base::CompareCase::INSENSITIVE_ASCII));
    829   EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE));
    830   EXPECT_FALSE(StartsWith("java", "javascript",
    831                           base::CompareCase::INSENSITIVE_ASCII));
    832   EXPECT_FALSE(StartsWith(std::string(), "javascript",
    833                           base::CompareCase::INSENSITIVE_ASCII));
    834   EXPECT_FALSE(StartsWith(std::string(), "javascript",
    835                           base::CompareCase::SENSITIVE));
    836   EXPECT_TRUE(StartsWith("java", std::string(),
    837                          base::CompareCase::INSENSITIVE_ASCII));
    838   EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE));
    839 
    840   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
    841                          ASCIIToUTF16("javascript"),
    842                          base::CompareCase::SENSITIVE));
    843   EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"),
    844                           ASCIIToUTF16("javascript"),
    845                           base::CompareCase::SENSITIVE));
    846   EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"),
    847                          ASCIIToUTF16("javascript"),
    848                          base::CompareCase::INSENSITIVE_ASCII));
    849   EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"),
    850                          ASCIIToUTF16("javascript"),
    851                          base::CompareCase::INSENSITIVE_ASCII));
    852   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
    853                           base::CompareCase::SENSITIVE));
    854   EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"),
    855                           base::CompareCase::INSENSITIVE_ASCII));
    856   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
    857                           base::CompareCase::INSENSITIVE_ASCII));
    858   EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"),
    859                           base::CompareCase::SENSITIVE));
    860   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
    861                          base::CompareCase::INSENSITIVE_ASCII));
    862   EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(),
    863                          base::CompareCase::SENSITIVE));
    864 }
    865 
    866 TEST(StringUtilTest, EndsWith) {
    867   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
    868                        base::CompareCase::SENSITIVE));
    869   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
    870                         base::CompareCase::SENSITIVE));
    871   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"),
    872                        base::CompareCase::INSENSITIVE_ASCII));
    873   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"),
    874                        base::CompareCase::INSENSITIVE_ASCII));
    875   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
    876                         base::CompareCase::SENSITIVE));
    877   EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"),
    878                         base::CompareCase::INSENSITIVE_ASCII));
    879   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
    880                         base::CompareCase::SENSITIVE));
    881   EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"),
    882                         base::CompareCase::INSENSITIVE_ASCII));
    883   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
    884                         base::CompareCase::INSENSITIVE_ASCII));
    885   EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"),
    886                         base::CompareCase::SENSITIVE));
    887   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
    888                        base::CompareCase::INSENSITIVE_ASCII));
    889   EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(),
    890                        base::CompareCase::SENSITIVE));
    891   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
    892                        base::CompareCase::INSENSITIVE_ASCII));
    893   EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"),
    894                        base::CompareCase::SENSITIVE));
    895   EXPECT_TRUE(
    896       EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII));
    897   EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE));
    898 }
    899 
    900 TEST(StringUtilTest, GetStringFWithOffsets) {
    901   std::vector<string16> subst;
    902   subst.push_back(ASCIIToUTF16("1"));
    903   subst.push_back(ASCIIToUTF16("2"));
    904   std::vector<size_t> offsets;
    905 
    906   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."),
    907                             subst,
    908                             &offsets);
    909   EXPECT_EQ(2U, offsets.size());
    910   EXPECT_EQ(7U, offsets[0]);
    911   EXPECT_EQ(25U, offsets[1]);
    912   offsets.clear();
    913 
    914   ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."),
    915                             subst,
    916                             &offsets);
    917   EXPECT_EQ(2U, offsets.size());
    918   EXPECT_EQ(25U, offsets[0]);
    919   EXPECT_EQ(7U, offsets[1]);
    920   offsets.clear();
    921 }
    922 
    923 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) {
    924   // Test whether replacestringplaceholders works as expected when there
    925   // are fewer inputs than outputs.
    926   std::vector<string16> subst;
    927   subst.push_back(ASCIIToUTF16("9a"));
    928   subst.push_back(ASCIIToUTF16("8b"));
    929   subst.push_back(ASCIIToUTF16("7c"));
    930 
    931   string16 formatted =
    932       ReplaceStringPlaceholders(
    933           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr);
    934 
    935   EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted);
    936 }
    937 
    938 TEST(StringUtilTest, ReplaceStringPlaceholders) {
    939   std::vector<string16> subst;
    940   subst.push_back(ASCIIToUTF16("9a"));
    941   subst.push_back(ASCIIToUTF16("8b"));
    942   subst.push_back(ASCIIToUTF16("7c"));
    943   subst.push_back(ASCIIToUTF16("6d"));
    944   subst.push_back(ASCIIToUTF16("5e"));
    945   subst.push_back(ASCIIToUTF16("4f"));
    946   subst.push_back(ASCIIToUTF16("3g"));
    947   subst.push_back(ASCIIToUTF16("2h"));
    948   subst.push_back(ASCIIToUTF16("1i"));
    949 
    950   string16 formatted =
    951       ReplaceStringPlaceholders(
    952           ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr);
    953 
    954   EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted);
    955 }
    956 
    957 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) {
    958   std::vector<string16> subst;
    959   subst.push_back(ASCIIToUTF16("1a"));
    960   string16 formatted =
    961       ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr);
    962   EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted);
    963 }
    964 
    965 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) {
    966   std::vector<string16> subst;
    967   subst.push_back(ASCIIToUTF16("1a"));
    968   string16 formatted =
    969       ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr);
    970   EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted);
    971 }
    972 
    973 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) {
    974   std::vector<std::string> subst;
    975   subst.push_back("9a");
    976   subst.push_back("8b");
    977   subst.push_back("7c");
    978   subst.push_back("6d");
    979   subst.push_back("5e");
    980   subst.push_back("4f");
    981   subst.push_back("3g");
    982   subst.push_back("2h");
    983   subst.push_back("1i");
    984 
    985   std::string formatted =
    986       ReplaceStringPlaceholders(
    987           "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr);
    988 
    989   EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted);
    990 }
    991 
    992 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) {
    993   std::vector<std::string> subst;
    994   subst.push_back("a");
    995   subst.push_back("b");
    996   subst.push_back("c");
    997   EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr),
    998             "$1 $$2 $$$3");
    999 }
   1000 
   1001 TEST(StringUtilTest, LcpyTest) {
   1002   // Test the normal case where we fit in our buffer.
   1003   {
   1004     char dst[10];
   1005     wchar_t wdst[10];
   1006     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
   1007     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1008     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1009     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1010   }
   1011 
   1012   // Test dst_size == 0, nothing should be written to |dst| and we should
   1013   // have the equivalent of strlen(src).
   1014   {
   1015     char dst[2] = {1, 2};
   1016     wchar_t wdst[2] = {1, 2};
   1017     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0));
   1018     EXPECT_EQ(1, dst[0]);
   1019     EXPECT_EQ(2, dst[1]);
   1020     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0));
   1021     EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]);
   1022     EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]);
   1023   }
   1024 
   1025   // Test the case were we _just_ competely fit including the null.
   1026   {
   1027     char dst[8];
   1028     wchar_t wdst[8];
   1029     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
   1030     EXPECT_EQ(0, memcmp(dst, "abcdefg", 8));
   1031     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1032     EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8));
   1033   }
   1034 
   1035   // Test the case were we we are one smaller, so we can't fit the null.
   1036   {
   1037     char dst[7];
   1038     wchar_t wdst[7];
   1039     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
   1040     EXPECT_EQ(0, memcmp(dst, "abcdef", 7));
   1041     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1042     EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7));
   1043   }
   1044 
   1045   // Test the case were we are just too small.
   1046   {
   1047     char dst[3];
   1048     wchar_t wdst[3];
   1049     EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst)));
   1050     EXPECT_EQ(0, memcmp(dst, "ab", 3));
   1051     EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst)));
   1052     EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3));
   1053   }
   1054 }
   1055 
   1056 TEST(StringUtilTest, WprintfFormatPortabilityTest) {
   1057   static const struct {
   1058     const wchar_t* input;
   1059     bool portable;
   1060   } cases[] = {
   1061     { L"%ls", true },
   1062     { L"%s", false },
   1063     { L"%S", false },
   1064     { L"%lS", false },
   1065     { L"Hello, %s", false },
   1066     { L"%lc", true },
   1067     { L"%c", false },
   1068     { L"%C", false },
   1069     { L"%lC", false },
   1070     { L"%ls %s", false },
   1071     { L"%s %ls", false },
   1072     { L"%s %ls %s", false },
   1073     { L"%f", true },
   1074     { L"%f %F", false },
   1075     { L"%d %D", false },
   1076     { L"%o %O", false },
   1077     { L"%u %U", false },
   1078     { L"%f %d %o %u", true },
   1079     { L"%-8d (%02.1f%)", true },
   1080     { L"% 10s", false },
   1081     { L"% 10ls", true }
   1082   };
   1083   for (size_t i = 0; i < arraysize(cases); ++i)
   1084     EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input));
   1085 }
   1086 
   1087 TEST(StringUtilTest, RemoveChars) {
   1088   const char kRemoveChars[] = "-/+*";
   1089   std::string input = "A-+bc/d!*";
   1090   EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input));
   1091   EXPECT_EQ("Abcd!", input);
   1092 
   1093   // No characters match kRemoveChars.
   1094   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1095   EXPECT_EQ("Abcd!", input);
   1096 
   1097   // Empty string.
   1098   input.clear();
   1099   EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input));
   1100   EXPECT_EQ(std::string(), input);
   1101 }
   1102 
   1103 TEST(StringUtilTest, ReplaceChars) {
   1104   struct TestData {
   1105     const char* input;
   1106     const char* replace_chars;
   1107     const char* replace_with;
   1108     const char* output;
   1109     bool result;
   1110   } cases[] = {
   1111     { "", "", "", "", false },
   1112     { "test", "", "", "test", false },
   1113     { "test", "", "!", "test", false },
   1114     { "test", "z", "!", "test", false },
   1115     { "test", "e", "!", "t!st", true },
   1116     { "test", "e", "!?", "t!?st", true },
   1117     { "test", "ez", "!", "t!st", true },
   1118     { "test", "zed", "!?", "t!?st", true },
   1119     { "test", "t", "!?", "!?es!?", true },
   1120     { "test", "et", "!>", "!>!>s!>", true },
   1121     { "test", "zest", "!", "!!!!", true },
   1122     { "test", "szt", "!", "!e!!", true },
   1123     { "test", "t", "test", "testestest", true },
   1124   };
   1125 
   1126   for (size_t i = 0; i < arraysize(cases); ++i) {
   1127     std::string output;
   1128     bool result = ReplaceChars(cases[i].input,
   1129                                cases[i].replace_chars,
   1130                                cases[i].replace_with,
   1131                                &output);
   1132     EXPECT_EQ(cases[i].result, result);
   1133     EXPECT_EQ(cases[i].output, output);
   1134   }
   1135 }
   1136 
   1137 TEST(StringUtilTest, ContainsOnlyChars) {
   1138   // Providing an empty list of characters should return false but for the empty
   1139   // string.
   1140   EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string()));
   1141   EXPECT_FALSE(ContainsOnlyChars("Hello", std::string()));
   1142 
   1143   EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234"));
   1144   EXPECT_TRUE(ContainsOnlyChars("1", "1234"));
   1145   EXPECT_TRUE(ContainsOnlyChars("1", "4321"));
   1146   EXPECT_TRUE(ContainsOnlyChars("123", "4321"));
   1147   EXPECT_FALSE(ContainsOnlyChars("123a", "4321"));
   1148 
   1149   EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII));
   1150   EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII));
   1151   EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII));
   1152   EXPECT_TRUE(ContainsOnlyChars("\t \r \n  ", kWhitespaceASCII));
   1153   EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII));
   1154   EXPECT_FALSE(ContainsOnlyChars("\thello\r \n  ", kWhitespaceASCII));
   1155 
   1156   EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16));
   1157   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16));
   1158   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16));
   1159   EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n  "), kWhitespaceUTF16));
   1160   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16));
   1161   EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n  "),
   1162                                   kWhitespaceUTF16));
   1163 }
   1164 
   1165 TEST(StringUtilTest, CompareCaseInsensitiveASCII) {
   1166   EXPECT_EQ(0, CompareCaseInsensitiveASCII("", ""));
   1167   EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf"));
   1168 
   1169   // Differing lengths.
   1170   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA"));
   1171   EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf"));
   1172 
   1173   // Differing values.
   1174   EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb"));
   1175   EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA"));
   1176 }
   1177 
   1178 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) {
   1179   EXPECT_TRUE(EqualsCaseInsensitiveASCII("", ""));
   1180   EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF"));
   1181   EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF"));
   1182   EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz"));
   1183 }
   1184 
   1185 TEST(StringUtilTest, IsUnicodeWhitespace) {
   1186   // NOT unicode white space.
   1187   EXPECT_FALSE(IsUnicodeWhitespace(L'\0'));
   1188   EXPECT_FALSE(IsUnicodeWhitespace(L'A'));
   1189   EXPECT_FALSE(IsUnicodeWhitespace(L'0'));
   1190   EXPECT_FALSE(IsUnicodeWhitespace(L'.'));
   1191   EXPECT_FALSE(IsUnicodeWhitespace(L';'));
   1192   EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100'));
   1193 
   1194   // Actual unicode whitespace.
   1195   EXPECT_TRUE(IsUnicodeWhitespace(L' '));
   1196   EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0'));
   1197   EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000'));
   1198   EXPECT_TRUE(IsUnicodeWhitespace(L'\t'));
   1199   EXPECT_TRUE(IsUnicodeWhitespace(L'\r'));
   1200   EXPECT_TRUE(IsUnicodeWhitespace(L'\v'));
   1201   EXPECT_TRUE(IsUnicodeWhitespace(L'\f'));
   1202   EXPECT_TRUE(IsUnicodeWhitespace(L'\n'));
   1203 }
   1204 
   1205 class WriteIntoTest : public testing::Test {
   1206  protected:
   1207   static void WritesCorrectly(size_t num_chars) {
   1208     std::string buffer;
   1209     char kOriginal[] = "supercali";
   1210     strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars);
   1211     // Using std::string(buffer.c_str()) instead of |buffer| truncates the
   1212     // string at the first \0.
   1213     EXPECT_EQ(std::string(kOriginal,
   1214                           std::min(num_chars, arraysize(kOriginal) - 1)),
   1215               std::string(buffer.c_str()));
   1216     EXPECT_EQ(num_chars, buffer.size());
   1217   }
   1218 };
   1219 
   1220 TEST_F(WriteIntoTest, WriteInto) {
   1221   // Validate that WriteInto reserves enough space and
   1222   // sizes a string correctly.
   1223   WritesCorrectly(1);
   1224   WritesCorrectly(2);
   1225   WritesCorrectly(5000);
   1226 
   1227   // Validate that WriteInto doesn't modify other strings
   1228   // when using a Copy-on-Write implementation.
   1229   const char kLive[] = "live";
   1230   const char kDead[] = "dead";
   1231   const std::string live = kLive;
   1232   std::string dead = live;
   1233   strncpy(WriteInto(&dead, 5), kDead, 4);
   1234   EXPECT_EQ(kDead, dead);
   1235   EXPECT_EQ(4u, dead.size());
   1236   EXPECT_EQ(kLive, live);
   1237   EXPECT_EQ(4u, live.size());
   1238 }
   1239 
   1240 }  // namespace base
   1241