Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_split.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/strings/string_util.h"
      9 #include "base/strings/utf_string_conversions.h"
     10 #include "base/third_party/icu/icu_utf.h"
     11 
     12 namespace base {
     13 
     14 template<typename STR>
     15 static void SplitStringT(const STR& str,
     16                          const typename STR::value_type s,
     17                          bool trim_whitespace,
     18                          std::vector<STR>* r) {
     19   r->clear();
     20   size_t last = 0;
     21   size_t c = str.size();
     22   for (size_t i = 0; i <= c; ++i) {
     23     if (i == c || str[i] == s) {
     24       STR tmp(str, last, i - last);
     25       if (trim_whitespace)
     26         TrimWhitespace(tmp, TRIM_ALL, &tmp);
     27       // Avoid converting an empty or all-whitespace source string into a vector
     28       // of one empty string.
     29       if (i != c || !r->empty() || !tmp.empty())
     30         r->push_back(tmp);
     31       last = i + 1;
     32     }
     33   }
     34 }
     35 
     36 void SplitString(const string16& str,
     37                  char16 c,
     38                  std::vector<string16>* r) {
     39   DCHECK(CBU16_IS_SINGLE(c));
     40   SplitStringT(str, c, true, r);
     41 }
     42 
     43 void SplitString(const std::string& str,
     44                  char c,
     45                  std::vector<std::string>* r) {
     46 #if CHAR_MIN < 0
     47   DCHECK(c >= 0);
     48 #endif
     49   DCHECK(c < 0x7F);
     50   SplitStringT(str, c, true, r);
     51 }
     52 
     53 bool SplitStringIntoKeyValues(
     54     const std::string& line,
     55     char key_value_delimiter,
     56     std::string* key, std::vector<std::string>* values) {
     57   key->clear();
     58   values->clear();
     59 
     60   // Find the key string.
     61   size_t end_key_pos = line.find_first_of(key_value_delimiter);
     62   if (end_key_pos == std::string::npos) {
     63     DVLOG(1) << "cannot parse key from line: " << line;
     64     return false;    // no key
     65   }
     66   key->assign(line, 0, end_key_pos);
     67 
     68   // Find the values string.
     69   std::string remains(line, end_key_pos, line.size() - end_key_pos);
     70   size_t begin_values_pos = remains.find_first_not_of(key_value_delimiter);
     71   if (begin_values_pos == std::string::npos) {
     72     DVLOG(1) << "cannot parse value from line: " << line;
     73     return false;   // no value
     74   }
     75   std::string values_string(remains, begin_values_pos,
     76                             remains.size() - begin_values_pos);
     77 
     78   // Construct the values vector.
     79   values->push_back(values_string);
     80   return true;
     81 }
     82 
     83 bool SplitStringIntoKeyValuePairs(const std::string& line,
     84                                   char key_value_delimiter,
     85                                   char key_value_pair_delimiter,
     86                                   StringPairs* key_value_pairs) {
     87   key_value_pairs->clear();
     88 
     89   std::vector<std::string> pairs;
     90   SplitString(line, key_value_pair_delimiter, &pairs);
     91 
     92   bool success = true;
     93   for (size_t i = 0; i < pairs.size(); ++i) {
     94     // Empty pair. SplitStringIntoKeyValues is more strict about an empty pair
     95     // line, so continue with the next pair.
     96     if (pairs[i].empty())
     97       continue;
     98 
     99     std::string key;
    100     std::vector<std::string> value;
    101     if (!SplitStringIntoKeyValues(pairs[i],
    102                                   key_value_delimiter,
    103                                   &key, &value)) {
    104       // Don't return here, to allow for keys without associated
    105       // values; just record that our split failed.
    106       success = false;
    107     }
    108     DCHECK_LE(value.size(), 1U);
    109     key_value_pairs->push_back(
    110         make_pair(key, value.empty() ? std::string() : value[0]));
    111   }
    112   return success;
    113 }
    114 
    115 template <typename STR>
    116 static void SplitStringUsingSubstrT(const STR& str,
    117                                     const STR& s,
    118                                     std::vector<STR>* r) {
    119   r->clear();
    120   typename STR::size_type begin_index = 0;
    121   while (true) {
    122     const typename STR::size_type end_index = str.find(s, begin_index);
    123     if (end_index == STR::npos) {
    124       const STR term = str.substr(begin_index);
    125       STR tmp;
    126       TrimWhitespace(term, TRIM_ALL, &tmp);
    127       r->push_back(tmp);
    128       return;
    129     }
    130     const STR term = str.substr(begin_index, end_index - begin_index);
    131     STR tmp;
    132     TrimWhitespace(term, TRIM_ALL, &tmp);
    133     r->push_back(tmp);
    134     begin_index = end_index + s.size();
    135   }
    136 }
    137 
    138 void SplitStringUsingSubstr(const string16& str,
    139                             const string16& s,
    140                             std::vector<string16>* r) {
    141   SplitStringUsingSubstrT(str, s, r);
    142 }
    143 
    144 void SplitStringUsingSubstr(const std::string& str,
    145                             const std::string& s,
    146                             std::vector<std::string>* r) {
    147   SplitStringUsingSubstrT(str, s, r);
    148 }
    149 
    150 void SplitStringDontTrim(const string16& str,
    151                          char16 c,
    152                          std::vector<string16>* r) {
    153   DCHECK(CBU16_IS_SINGLE(c));
    154   SplitStringT(str, c, false, r);
    155 }
    156 
    157 void SplitStringDontTrim(const std::string& str,
    158                          char c,
    159                          std::vector<std::string>* r) {
    160   DCHECK(IsStringUTF8(str));
    161 #if CHAR_MIN < 0
    162   DCHECK(c >= 0);
    163 #endif
    164   DCHECK(c < 0x7F);
    165   SplitStringT(str, c, false, r);
    166 }
    167 
    168 template<typename STR>
    169 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
    170   result->clear();
    171   const size_t length = str.length();
    172   if (!length)
    173     return;
    174 
    175   bool last_was_ws = false;
    176   size_t last_non_ws_start = 0;
    177   for (size_t i = 0; i < length; ++i) {
    178     switch (str[i]) {
    179       // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
    180       case L' ':
    181       case L'\t':
    182       case L'\xA':
    183       case L'\xB':
    184       case L'\xC':
    185       case L'\xD':
    186         if (!last_was_ws) {
    187           if (i > 0) {
    188             result->push_back(
    189                 str.substr(last_non_ws_start, i - last_non_ws_start));
    190           }
    191           last_was_ws = true;
    192         }
    193         break;
    194 
    195       default:  // Not a space character.
    196         if (last_was_ws) {
    197           last_was_ws = false;
    198           last_non_ws_start = i;
    199         }
    200         break;
    201     }
    202   }
    203   if (!last_was_ws) {
    204     result->push_back(
    205         str.substr(last_non_ws_start, length - last_non_ws_start));
    206   }
    207 }
    208 
    209 void SplitStringAlongWhitespace(const string16& str,
    210                                 std::vector<string16>* result) {
    211   SplitStringAlongWhitespaceT(str, result);
    212 }
    213 
    214 void SplitStringAlongWhitespace(const std::string& str,
    215                                 std::vector<std::string>* result) {
    216   SplitStringAlongWhitespaceT(str, result);
    217 }
    218 
    219 }  // namespace base
    220