Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_split.h"
      6 
      7 #include "base/logging.h"
      8 #include "base/strings/string_util.h"
      9 #include "base/strings/utf_string_conversions.h"
     10 #include "base/third_party/icu/icu_utf.h"
     11 
     12 namespace base {
     13 
     14 namespace {
     15 
     16 template <typename STR>
     17 void SplitStringT(const STR& str,
     18                   const typename STR::value_type s,
     19                   bool trim_whitespace,
     20                   std::vector<STR>* r) {
     21   r->clear();
     22   size_t last = 0;
     23   size_t c = str.size();
     24   for (size_t i = 0; i <= c; ++i) {
     25     if (i == c || str[i] == s) {
     26       STR tmp(str, last, i - last);
     27       if (trim_whitespace)
     28         TrimWhitespace(tmp, TRIM_ALL, &tmp);
     29       // Avoid converting an empty or all-whitespace source string into a vector
     30       // of one empty string.
     31       if (i != c || !r->empty() || !tmp.empty())
     32         r->push_back(tmp);
     33       last = i + 1;
     34     }
     35   }
     36 }
     37 
     38 bool SplitStringIntoKeyValue(const std::string& line,
     39                              char key_value_delimiter,
     40                              std::string* key,
     41                              std::string* value) {
     42   key->clear();
     43   value->clear();
     44 
     45   // Find the delimiter.
     46   size_t end_key_pos = line.find_first_of(key_value_delimiter);
     47   if (end_key_pos == std::string::npos) {
     48     DVLOG(1) << "cannot find delimiter in: " << line;
     49     return false;    // no delimiter
     50   }
     51   key->assign(line, 0, end_key_pos);
     52 
     53   // Find the value string.
     54   std::string remains(line, end_key_pos, line.size() - end_key_pos);
     55   size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter);
     56   if (begin_value_pos == std::string::npos) {
     57     DVLOG(1) << "cannot parse value from line: " << line;
     58     return false;   // no value
     59   }
     60   value->assign(remains, begin_value_pos, remains.size() - begin_value_pos);
     61   return true;
     62 }
     63 
     64 template <typename STR>
     65 void SplitStringUsingSubstrT(const STR& str,
     66                                     const STR& s,
     67                                     std::vector<STR>* r) {
     68   r->clear();
     69   typename STR::size_type begin_index = 0;
     70   while (true) {
     71     const typename STR::size_type end_index = str.find(s, begin_index);
     72     if (end_index == STR::npos) {
     73       const STR term = str.substr(begin_index);
     74       STR tmp;
     75       TrimWhitespace(term, TRIM_ALL, &tmp);
     76       r->push_back(tmp);
     77       return;
     78     }
     79     const STR term = str.substr(begin_index, end_index - begin_index);
     80     STR tmp;
     81     TrimWhitespace(term, TRIM_ALL, &tmp);
     82     r->push_back(tmp);
     83     begin_index = end_index + s.size();
     84   }
     85 }
     86 
     87 template<typename STR>
     88 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) {
     89   result->clear();
     90   const size_t length = str.length();
     91   if (!length)
     92     return;
     93 
     94   bool last_was_ws = false;
     95   size_t last_non_ws_start = 0;
     96   for (size_t i = 0; i < length; ++i) {
     97     switch (str[i]) {
     98       // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR.
     99       case L' ':
    100       case L'\t':
    101       case L'\xA':
    102       case L'\xB':
    103       case L'\xC':
    104       case L'\xD':
    105         if (!last_was_ws) {
    106           if (i > 0) {
    107             result->push_back(
    108                 str.substr(last_non_ws_start, i - last_non_ws_start));
    109           }
    110           last_was_ws = true;
    111         }
    112         break;
    113 
    114       default:  // Not a space character.
    115         if (last_was_ws) {
    116           last_was_ws = false;
    117           last_non_ws_start = i;
    118         }
    119         break;
    120     }
    121   }
    122   if (!last_was_ws) {
    123     result->push_back(
    124         str.substr(last_non_ws_start, length - last_non_ws_start));
    125   }
    126 }
    127 
    128 }  // namespace
    129 
    130 void SplitString(const string16& str,
    131                  char16 c,
    132                  std::vector<string16>* r) {
    133   DCHECK(CBU16_IS_SINGLE(c));
    134   SplitStringT(str, c, true, r);
    135 }
    136 
    137 void SplitString(const std::string& str,
    138                  char c,
    139                  std::vector<std::string>* r) {
    140 #if CHAR_MIN < 0
    141   DCHECK(c >= 0);
    142 #endif
    143   DCHECK(c < 0x7F);
    144   SplitStringT(str, c, true, r);
    145 }
    146 
    147 bool SplitStringIntoKeyValuePairs(const std::string& line,
    148                                   char key_value_delimiter,
    149                                   char key_value_pair_delimiter,
    150                                   StringPairs* key_value_pairs) {
    151   key_value_pairs->clear();
    152 
    153   std::vector<std::string> pairs;
    154   SplitString(line, key_value_pair_delimiter, &pairs);
    155 
    156   bool success = true;
    157   for (size_t i = 0; i < pairs.size(); ++i) {
    158     // Don't add empty pairs into the result.
    159     if (pairs[i].empty())
    160       continue;
    161 
    162     std::string key;
    163     std::string value;
    164     if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) {
    165       // Don't return here, to allow for pairs without associated
    166       // value or key; just record that the split failed.
    167       success = false;
    168     }
    169     key_value_pairs->push_back(make_pair(key, value));
    170   }
    171   return success;
    172 }
    173 
    174 void SplitStringUsingSubstr(const string16& str,
    175                             const string16& s,
    176                             std::vector<string16>* r) {
    177   SplitStringUsingSubstrT(str, s, r);
    178 }
    179 
    180 void SplitStringUsingSubstr(const std::string& str,
    181                             const std::string& s,
    182                             std::vector<std::string>* r) {
    183   SplitStringUsingSubstrT(str, s, r);
    184 }
    185 
    186 void SplitStringDontTrim(const string16& str,
    187                          char16 c,
    188                          std::vector<string16>* r) {
    189   DCHECK(CBU16_IS_SINGLE(c));
    190   SplitStringT(str, c, false, r);
    191 }
    192 
    193 void SplitStringDontTrim(const std::string& str,
    194                          char c,
    195                          std::vector<std::string>* r) {
    196   DCHECK(IsStringUTF8(str));
    197 #if CHAR_MIN < 0
    198   DCHECK(c >= 0);
    199 #endif
    200   DCHECK(c < 0x7F);
    201   SplitStringT(str, c, false, r);
    202 }
    203 
    204 void SplitStringAlongWhitespace(const string16& str,
    205                                 std::vector<string16>* result) {
    206   SplitStringAlongWhitespaceT(str, result);
    207 }
    208 
    209 void SplitStringAlongWhitespace(const std::string& str,
    210                                 std::vector<std::string>* result) {
    211   SplitStringAlongWhitespaceT(str, result);
    212 }
    213 
    214 }  // namespace base
    215