Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_split.h"
      6 
      7 #include <stddef.h>
      8 
      9 #include "base/macros.h"
     10 #include "base/strings/string_util.h"
     11 #include "base/strings/utf_string_conversions.h"
     12 #include "testing/gmock/include/gmock/gmock.h"
     13 #include "testing/gtest/include/gtest/gtest.h"
     14 
     15 using ::testing::ElementsAre;
     16 
     17 namespace base {
     18 
     19 class SplitStringIntoKeyValuePairsTest : public testing::Test {
     20  protected:
     21   base::StringPairs kv_pairs;
     22 };
     23 
     24 TEST_F(SplitStringIntoKeyValuePairsTest, EmptyString) {
     25   EXPECT_TRUE(SplitStringIntoKeyValuePairs(std::string(),
     26                                            ':',  // Key-value delimiter
     27                                            ',',  // Key-value pair delimiter
     28                                            &kv_pairs));
     29   EXPECT_TRUE(kv_pairs.empty());
     30 }
     31 
     32 TEST_F(SplitStringIntoKeyValuePairsTest, MissingKeyValueDelimiter) {
     33   EXPECT_FALSE(SplitStringIntoKeyValuePairs("key1,key2:value2",
     34                                             ':',  // Key-value delimiter
     35                                             ',',  // Key-value pair delimiter
     36                                             &kv_pairs));
     37   ASSERT_EQ(2U, kv_pairs.size());
     38   EXPECT_TRUE(kv_pairs[0].first.empty());
     39   EXPECT_TRUE(kv_pairs[0].second.empty());
     40   EXPECT_EQ("key2", kv_pairs[1].first);
     41   EXPECT_EQ("value2", kv_pairs[1].second);
     42 }
     43 
     44 TEST_F(SplitStringIntoKeyValuePairsTest, EmptyKeyWithKeyValueDelimiter) {
     45   EXPECT_TRUE(SplitStringIntoKeyValuePairs(":value1,key2:value2",
     46                                            ':',  // Key-value delimiter
     47                                            ',',  // Key-value pair delimiter
     48                                            &kv_pairs));
     49   ASSERT_EQ(2U, kv_pairs.size());
     50   EXPECT_TRUE(kv_pairs[0].first.empty());
     51   EXPECT_EQ("value1", kv_pairs[0].second);
     52   EXPECT_EQ("key2", kv_pairs[1].first);
     53   EXPECT_EQ("value2", kv_pairs[1].second);
     54 }
     55 
     56 TEST_F(SplitStringIntoKeyValuePairsTest, TrailingAndLeadingPairDelimiter) {
     57   EXPECT_TRUE(SplitStringIntoKeyValuePairs(",key1:value1,key2:value2,",
     58                                            ':',   // Key-value delimiter
     59                                            ',',   // Key-value pair delimiter
     60                                            &kv_pairs));
     61   ASSERT_EQ(2U, kv_pairs.size());
     62   EXPECT_EQ("key1", kv_pairs[0].first);
     63   EXPECT_EQ("value1", kv_pairs[0].second);
     64   EXPECT_EQ("key2", kv_pairs[1].first);
     65   EXPECT_EQ("value2", kv_pairs[1].second);
     66 }
     67 
     68 TEST_F(SplitStringIntoKeyValuePairsTest, EmptyPair) {
     69   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:value1,,key3:value3",
     70                                            ':',   // Key-value delimiter
     71                                            ',',   // Key-value pair delimiter
     72                                            &kv_pairs));
     73   ASSERT_EQ(2U, kv_pairs.size());
     74   EXPECT_EQ("key1", kv_pairs[0].first);
     75   EXPECT_EQ("value1", kv_pairs[0].second);
     76   EXPECT_EQ("key3", kv_pairs[1].first);
     77   EXPECT_EQ("value3", kv_pairs[1].second);
     78 }
     79 
     80 TEST_F(SplitStringIntoKeyValuePairsTest, EmptyValue) {
     81   EXPECT_FALSE(SplitStringIntoKeyValuePairs("key1:,key2:value2",
     82                                             ':',   // Key-value delimiter
     83                                             ',',   // Key-value pair delimiter
     84                                             &kv_pairs));
     85   ASSERT_EQ(2U, kv_pairs.size());
     86   EXPECT_EQ("key1", kv_pairs[0].first);
     87   EXPECT_EQ("", kv_pairs[0].second);
     88   EXPECT_EQ("key2", kv_pairs[1].first);
     89   EXPECT_EQ("value2", kv_pairs[1].second);
     90 }
     91 
     92 TEST_F(SplitStringIntoKeyValuePairsTest, UntrimmedWhitespace) {
     93   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1 : value1",
     94                                            ':',  // Key-value delimiter
     95                                            ',',  // Key-value pair delimiter
     96                                            &kv_pairs));
     97   ASSERT_EQ(1U, kv_pairs.size());
     98   EXPECT_EQ("key1 ", kv_pairs[0].first);
     99   EXPECT_EQ(" value1", kv_pairs[0].second);
    100 }
    101 
    102 TEST_F(SplitStringIntoKeyValuePairsTest, TrimmedWhitespace) {
    103   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:value1 , key2:value2",
    104                                            ':',   // Key-value delimiter
    105                                            ',',   // Key-value pair delimiter
    106                                            &kv_pairs));
    107   ASSERT_EQ(2U, kv_pairs.size());
    108   EXPECT_EQ("key1", kv_pairs[0].first);
    109   EXPECT_EQ("value1", kv_pairs[0].second);
    110   EXPECT_EQ("key2", kv_pairs[1].first);
    111   EXPECT_EQ("value2", kv_pairs[1].second);
    112 }
    113 
    114 TEST_F(SplitStringIntoKeyValuePairsTest, MultipleKeyValueDelimiters) {
    115   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:::value1,key2:value2",
    116                                            ':',   // Key-value delimiter
    117                                            ',',   // Key-value pair delimiter
    118                                            &kv_pairs));
    119   ASSERT_EQ(2U, kv_pairs.size());
    120   EXPECT_EQ("key1", kv_pairs[0].first);
    121   EXPECT_EQ("value1", kv_pairs[0].second);
    122   EXPECT_EQ("key2", kv_pairs[1].first);
    123   EXPECT_EQ("value2", kv_pairs[1].second);
    124 }
    125 
    126 TEST_F(SplitStringIntoKeyValuePairsTest, OnlySplitAtGivenSeparator) {
    127   std::string a("a ?!@#$%^&*()_+:/{}\\\t\nb");
    128   EXPECT_TRUE(SplitStringIntoKeyValuePairs(a + "X" + a + "Y" + a + "X" + a,
    129                                            'X',  // Key-value delimiter
    130                                            'Y',  // Key-value pair delimiter
    131                                            &kv_pairs));
    132   ASSERT_EQ(2U, kv_pairs.size());
    133   EXPECT_EQ(a, kv_pairs[0].first);
    134   EXPECT_EQ(a, kv_pairs[0].second);
    135   EXPECT_EQ(a, kv_pairs[1].first);
    136   EXPECT_EQ(a, kv_pairs[1].second);
    137 }
    138 
    139 
    140 TEST_F(SplitStringIntoKeyValuePairsTest, DelimiterInValue) {
    141   EXPECT_TRUE(SplitStringIntoKeyValuePairs("key1:va:ue1,key2:value2",
    142                                            ':',   // Key-value delimiter
    143                                            ',',   // Key-value pair delimiter
    144                                            &kv_pairs));
    145   ASSERT_EQ(2U, kv_pairs.size());
    146   EXPECT_EQ("key1", kv_pairs[0].first);
    147   EXPECT_EQ("va:ue1", kv_pairs[0].second);
    148   EXPECT_EQ("key2", kv_pairs[1].first);
    149   EXPECT_EQ("value2", kv_pairs[1].second);
    150 }
    151 
    152 TEST(SplitStringUsingSubstrTest, EmptyString) {
    153   std::vector<std::string> results;
    154   SplitStringUsingSubstr(std::string(), "DELIMITER", &results);
    155   ASSERT_EQ(1u, results.size());
    156   EXPECT_THAT(results, ElementsAre(""));
    157 }
    158 
    159 TEST(StringUtilTest, SplitString_Basics) {
    160   std::vector<std::string> r;
    161 
    162   r = SplitString(std::string(), ",:;", KEEP_WHITESPACE, SPLIT_WANT_ALL);
    163   EXPECT_TRUE(r.empty());
    164 
    165   // Empty separator list
    166   r = SplitString("hello, world", "", KEEP_WHITESPACE, SPLIT_WANT_ALL);
    167   ASSERT_EQ(1u, r.size());
    168   EXPECT_EQ("hello, world", r[0]);
    169 
    170   // Should split on any of the separators.
    171   r = SplitString("::,,;;", ",:;", KEEP_WHITESPACE, SPLIT_WANT_ALL);
    172   ASSERT_EQ(7u, r.size());
    173   for (auto str : r)
    174     ASSERT_TRUE(str.empty());
    175 
    176   r = SplitString("red, green; blue:", ",:;", TRIM_WHITESPACE,
    177                   SPLIT_WANT_NONEMPTY);
    178   ASSERT_EQ(3u, r.size());
    179   EXPECT_EQ("red", r[0]);
    180   EXPECT_EQ("green", r[1]);
    181   EXPECT_EQ("blue", r[2]);
    182 
    183   // Want to split a string along whitespace sequences.
    184   r = SplitString("  red green   \tblue\n", " \t\n", TRIM_WHITESPACE,
    185                   SPLIT_WANT_NONEMPTY);
    186   ASSERT_EQ(3u, r.size());
    187   EXPECT_EQ("red", r[0]);
    188   EXPECT_EQ("green", r[1]);
    189   EXPECT_EQ("blue", r[2]);
    190 
    191   // Weird case of splitting on spaces but not trimming.
    192   r = SplitString(" red ", " ", TRIM_WHITESPACE, SPLIT_WANT_ALL);
    193   ASSERT_EQ(3u, r.size());
    194   EXPECT_EQ("", r[0]);  // Before the first space.
    195   EXPECT_EQ("red", r[1]);
    196   EXPECT_EQ("", r[2]);  // After the last space.
    197 }
    198 
    199 TEST(StringUtilTest, SplitString_WhitespaceAndResultType) {
    200   std::vector<std::string> r;
    201 
    202   // Empty input handling.
    203   r = SplitString(std::string(), ",", KEEP_WHITESPACE, SPLIT_WANT_ALL);
    204   EXPECT_TRUE(r.empty());
    205   r = SplitString(std::string(), ",", KEEP_WHITESPACE, SPLIT_WANT_NONEMPTY);
    206   EXPECT_TRUE(r.empty());
    207 
    208   // Input string is space and we're trimming.
    209   r = SplitString(" ", ",", TRIM_WHITESPACE, SPLIT_WANT_ALL);
    210   ASSERT_EQ(1u, r.size());
    211   EXPECT_EQ("", r[0]);
    212   r = SplitString(" ", ",", TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
    213   EXPECT_TRUE(r.empty());
    214 
    215   // Test all 4 combinations of flags on ", ,".
    216   r = SplitString(", ,", ",", KEEP_WHITESPACE, SPLIT_WANT_ALL);
    217   ASSERT_EQ(3u, r.size());
    218   EXPECT_EQ("", r[0]);
    219   EXPECT_EQ(" ", r[1]);
    220   EXPECT_EQ("", r[2]);
    221   r = SplitString(", ,", ",", KEEP_WHITESPACE, SPLIT_WANT_NONEMPTY);
    222   ASSERT_EQ(1u, r.size());
    223   ASSERT_EQ(" ", r[0]);
    224   r = SplitString(", ,", ",", TRIM_WHITESPACE, SPLIT_WANT_ALL);
    225   ASSERT_EQ(3u, r.size());
    226   EXPECT_EQ("", r[0]);
    227   EXPECT_EQ("", r[1]);
    228   EXPECT_EQ("", r[2]);
    229   r = SplitString(", ,", ",", TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY);
    230   ASSERT_TRUE(r.empty());
    231 }
    232 
    233 TEST(SplitStringUsingSubstrTest, StringWithNoDelimiter) {
    234   std::vector<std::string> results;
    235   SplitStringUsingSubstr("alongwordwithnodelimiter", "DELIMITER", &results);
    236   ASSERT_EQ(1u, results.size());
    237   EXPECT_THAT(results, ElementsAre("alongwordwithnodelimiter"));
    238 }
    239 
    240 TEST(SplitStringUsingSubstrTest, LeadingDelimitersSkipped) {
    241   std::vector<std::string> results;
    242   SplitStringUsingSubstr(
    243       "DELIMITERDELIMITERDELIMITERoneDELIMITERtwoDELIMITERthree",
    244       "DELIMITER",
    245       &results);
    246   ASSERT_EQ(6u, results.size());
    247   EXPECT_THAT(results, ElementsAre("", "", "", "one", "two", "three"));
    248 }
    249 
    250 TEST(SplitStringUsingSubstrTest, ConsecutiveDelimitersSkipped) {
    251   std::vector<std::string> results;
    252   SplitStringUsingSubstr(
    253       "unoDELIMITERDELIMITERDELIMITERdosDELIMITERtresDELIMITERDELIMITERcuatro",
    254       "DELIMITER",
    255       &results);
    256   ASSERT_EQ(7u, results.size());
    257   EXPECT_THAT(results, ElementsAre("uno", "", "", "dos", "tres", "", "cuatro"));
    258 }
    259 
    260 TEST(SplitStringUsingSubstrTest, TrailingDelimitersSkipped) {
    261   std::vector<std::string> results;
    262   SplitStringUsingSubstr(
    263       "unDELIMITERdeuxDELIMITERtroisDELIMITERquatreDELIMITERDELIMITERDELIMITER",
    264       "DELIMITER",
    265       &results);
    266   ASSERT_EQ(7u, results.size());
    267   EXPECT_THAT(
    268       results, ElementsAre("un", "deux", "trois", "quatre", "", "", ""));
    269 }
    270 
    271 TEST(SplitStringPieceUsingSubstrTest, StringWithNoDelimiter) {
    272   std::vector<base::StringPiece> results =
    273       SplitStringPieceUsingSubstr("alongwordwithnodelimiter", "DELIMITER",
    274                                   base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
    275   ASSERT_EQ(1u, results.size());
    276   EXPECT_THAT(results, ElementsAre("alongwordwithnodelimiter"));
    277 }
    278 
    279 TEST(SplitStringPieceUsingSubstrTest, LeadingDelimitersSkipped) {
    280   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
    281       "DELIMITERDELIMITERDELIMITERoneDELIMITERtwoDELIMITERthree", "DELIMITER",
    282       base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
    283   ASSERT_EQ(6u, results.size());
    284   EXPECT_THAT(results, ElementsAre("", "", "", "one", "two", "three"));
    285 }
    286 
    287 TEST(SplitStringPieceUsingSubstrTest, ConsecutiveDelimitersSkipped) {
    288   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
    289       "unoDELIMITERDELIMITERDELIMITERdosDELIMITERtresDELIMITERDELIMITERcuatro",
    290       "DELIMITER", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
    291   ASSERT_EQ(7u, results.size());
    292   EXPECT_THAT(results, ElementsAre("uno", "", "", "dos", "tres", "", "cuatro"));
    293 }
    294 
    295 TEST(SplitStringPieceUsingSubstrTest, TrailingDelimitersSkipped) {
    296   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
    297       "unDELIMITERdeuxDELIMITERtroisDELIMITERquatreDELIMITERDELIMITERDELIMITER",
    298       "DELIMITER", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
    299   ASSERT_EQ(7u, results.size());
    300   EXPECT_THAT(results,
    301               ElementsAre("un", "deux", "trois", "quatre", "", "", ""));
    302 }
    303 
    304 TEST(SplitStringPieceUsingSubstrTest, KeepWhitespace) {
    305   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
    306       "un DELIMITERdeux\tDELIMITERtrois\nDELIMITERquatre", "DELIMITER",
    307       base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
    308   ASSERT_EQ(4u, results.size());
    309   EXPECT_THAT(results, ElementsAre("un ", "deux\t", "trois\n", "quatre"));
    310 }
    311 
    312 TEST(SplitStringPieceUsingSubstrTest, TrimWhitespace) {
    313   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
    314       "un DELIMITERdeux\tDELIMITERtrois\nDELIMITERquatre", "DELIMITER",
    315       base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
    316   ASSERT_EQ(4u, results.size());
    317   EXPECT_THAT(results, ElementsAre("un", "deux", "trois", "quatre"));
    318 }
    319 
    320 TEST(SplitStringPieceUsingSubstrTest, SplitWantAll) {
    321   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
    322       "unDELIMITERdeuxDELIMITERtroisDELIMITERDELIMITER", "DELIMITER",
    323       base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL);
    324   ASSERT_EQ(5u, results.size());
    325   EXPECT_THAT(results, ElementsAre("un", "deux", "trois", "", ""));
    326 }
    327 
    328 TEST(SplitStringPieceUsingSubstrTest, SplitWantNonEmpty) {
    329   std::vector<base::StringPiece> results = SplitStringPieceUsingSubstr(
    330       "unDELIMITERdeuxDELIMITERtroisDELIMITERDELIMITER", "DELIMITER",
    331       base::TRIM_WHITESPACE, base::SPLIT_WANT_NONEMPTY);
    332   ASSERT_EQ(3u, results.size());
    333   EXPECT_THAT(results, ElementsAre("un", "deux", "trois"));
    334 }
    335 
    336 TEST(StringSplitTest, StringSplitKeepWhitespace) {
    337   std::vector<std::string> r;
    338 
    339   r = SplitString("   ", "*", base::KEEP_WHITESPACE, base::SPLIT_WANT_ALL);
    340   ASSERT_EQ(1U, r.size());
    341   EXPECT_EQ(r[0], "   ");
    342 
    343   r = SplitString("\t  \ta\t ", "\t", base::KEEP_WHITESPACE,
    344                   base::SPLIT_WANT_ALL);
    345   ASSERT_EQ(4U, r.size());
    346   EXPECT_EQ(r[0], "");
    347   EXPECT_EQ(r[1], "  ");
    348   EXPECT_EQ(r[2], "a");
    349   EXPECT_EQ(r[3], " ");
    350 
    351   r = SplitString("\ta\t\nb\tcc", "\n", base::KEEP_WHITESPACE,
    352                   base::SPLIT_WANT_ALL);
    353   ASSERT_EQ(2U, r.size());
    354   EXPECT_EQ(r[0], "\ta\t");
    355   EXPECT_EQ(r[1], "b\tcc");
    356 }
    357 
    358 TEST(StringSplitTest, SplitStringAlongWhitespace) {
    359   struct TestData {
    360     const char* input;
    361     const size_t expected_result_count;
    362     const char* output1;
    363     const char* output2;
    364   } data[] = {
    365     { "a",       1, "a",  ""   },
    366     { " ",       0, "",   ""   },
    367     { " a",      1, "a",  ""   },
    368     { " ab ",    1, "ab", ""   },
    369     { " ab c",   2, "ab", "c"  },
    370     { " ab c ",  2, "ab", "c"  },
    371     { " ab cd",  2, "ab", "cd" },
    372     { " ab cd ", 2, "ab", "cd" },
    373     { " \ta\t",  1, "a",  ""   },
    374     { " b\ta\t", 2, "b",  "a"  },
    375     { " b\tat",  2, "b",  "at" },
    376     { "b\tat",   2, "b",  "at" },
    377     { "b\t at",  2, "b",  "at" },
    378   };
    379   for (size_t i = 0; i < arraysize(data); ++i) {
    380     std::vector<std::string> results = base::SplitString(
    381         data[i].input, kWhitespaceASCII, base::KEEP_WHITESPACE,
    382         base::SPLIT_WANT_NONEMPTY);
    383     ASSERT_EQ(data[i].expected_result_count, results.size());
    384     if (data[i].expected_result_count > 0)
    385       ASSERT_EQ(data[i].output1, results[0]);
    386     if (data[i].expected_result_count > 1)
    387       ASSERT_EQ(data[i].output2, results[1]);
    388   }
    389 }
    390 
    391 }  // namespace base
    392