1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_split.h" 6 7 #include "base/logging.h" 8 #include "base/strings/string_util.h" 9 #include "base/strings/utf_string_conversions.h" 10 #include "base/third_party/icu/icu_utf.h" 11 12 namespace base { 13 14 namespace { 15 16 template <typename STR> 17 void SplitStringT(const STR& str, 18 const typename STR::value_type s, 19 bool trim_whitespace, 20 std::vector<STR>* r) { 21 r->clear(); 22 size_t last = 0; 23 size_t c = str.size(); 24 for (size_t i = 0; i <= c; ++i) { 25 if (i == c || str[i] == s) { 26 STR tmp(str, last, i - last); 27 if (trim_whitespace) 28 TrimWhitespace(tmp, TRIM_ALL, &tmp); 29 // Avoid converting an empty or all-whitespace source string into a vector 30 // of one empty string. 31 if (i != c || !r->empty() || !tmp.empty()) 32 r->push_back(tmp); 33 last = i + 1; 34 } 35 } 36 } 37 38 bool SplitStringIntoKeyValue(const std::string& line, 39 char key_value_delimiter, 40 std::string* key, 41 std::string* value) { 42 key->clear(); 43 value->clear(); 44 45 // Find the delimiter. 46 size_t end_key_pos = line.find_first_of(key_value_delimiter); 47 if (end_key_pos == std::string::npos) { 48 DVLOG(1) << "cannot find delimiter in: " << line; 49 return false; // no delimiter 50 } 51 key->assign(line, 0, end_key_pos); 52 53 // Find the value string. 54 std::string remains(line, end_key_pos, line.size() - end_key_pos); 55 size_t begin_value_pos = remains.find_first_not_of(key_value_delimiter); 56 if (begin_value_pos == std::string::npos) { 57 DVLOG(1) << "cannot parse value from line: " << line; 58 return false; // no value 59 } 60 value->assign(remains, begin_value_pos, remains.size() - begin_value_pos); 61 return true; 62 } 63 64 template <typename STR> 65 void SplitStringUsingSubstrT(const STR& str, 66 const STR& s, 67 std::vector<STR>* r) { 68 r->clear(); 69 typename STR::size_type begin_index = 0; 70 while (true) { 71 const typename STR::size_type end_index = str.find(s, begin_index); 72 if (end_index == STR::npos) { 73 const STR term = str.substr(begin_index); 74 STR tmp; 75 TrimWhitespace(term, TRIM_ALL, &tmp); 76 r->push_back(tmp); 77 return; 78 } 79 const STR term = str.substr(begin_index, end_index - begin_index); 80 STR tmp; 81 TrimWhitespace(term, TRIM_ALL, &tmp); 82 r->push_back(tmp); 83 begin_index = end_index + s.size(); 84 } 85 } 86 87 template<typename STR> 88 void SplitStringAlongWhitespaceT(const STR& str, std::vector<STR>* result) { 89 result->clear(); 90 const size_t length = str.length(); 91 if (!length) 92 return; 93 94 bool last_was_ws = false; 95 size_t last_non_ws_start = 0; 96 for (size_t i = 0; i < length; ++i) { 97 switch (str[i]) { 98 // HTML 5 defines whitespace as: space, tab, LF, line tab, FF, or CR. 99 case L' ': 100 case L'\t': 101 case L'\xA': 102 case L'\xB': 103 case L'\xC': 104 case L'\xD': 105 if (!last_was_ws) { 106 if (i > 0) { 107 result->push_back( 108 str.substr(last_non_ws_start, i - last_non_ws_start)); 109 } 110 last_was_ws = true; 111 } 112 break; 113 114 default: // Not a space character. 115 if (last_was_ws) { 116 last_was_ws = false; 117 last_non_ws_start = i; 118 } 119 break; 120 } 121 } 122 if (!last_was_ws) { 123 result->push_back( 124 str.substr(last_non_ws_start, length - last_non_ws_start)); 125 } 126 } 127 128 } // namespace 129 130 void SplitString(const string16& str, 131 char16 c, 132 std::vector<string16>* r) { 133 DCHECK(CBU16_IS_SINGLE(c)); 134 SplitStringT(str, c, true, r); 135 } 136 137 void SplitString(const std::string& str, 138 char c, 139 std::vector<std::string>* r) { 140 #if CHAR_MIN < 0 141 DCHECK(c >= 0); 142 #endif 143 DCHECK(c < 0x7F); 144 SplitStringT(str, c, true, r); 145 } 146 147 bool SplitStringIntoKeyValuePairs(const std::string& line, 148 char key_value_delimiter, 149 char key_value_pair_delimiter, 150 StringPairs* key_value_pairs) { 151 key_value_pairs->clear(); 152 153 std::vector<std::string> pairs; 154 SplitString(line, key_value_pair_delimiter, &pairs); 155 156 bool success = true; 157 for (size_t i = 0; i < pairs.size(); ++i) { 158 // Don't add empty pairs into the result. 159 if (pairs[i].empty()) 160 continue; 161 162 std::string key; 163 std::string value; 164 if (!SplitStringIntoKeyValue(pairs[i], key_value_delimiter, &key, &value)) { 165 // Don't return here, to allow for pairs without associated 166 // value or key; just record that the split failed. 167 success = false; 168 } 169 key_value_pairs->push_back(make_pair(key, value)); 170 } 171 return success; 172 } 173 174 void SplitStringUsingSubstr(const string16& str, 175 const string16& s, 176 std::vector<string16>* r) { 177 SplitStringUsingSubstrT(str, s, r); 178 } 179 180 void SplitStringUsingSubstr(const std::string& str, 181 const std::string& s, 182 std::vector<std::string>* r) { 183 SplitStringUsingSubstrT(str, s, r); 184 } 185 186 void SplitStringDontTrim(const string16& str, 187 char16 c, 188 std::vector<string16>* r) { 189 DCHECK(CBU16_IS_SINGLE(c)); 190 SplitStringT(str, c, false, r); 191 } 192 193 void SplitStringDontTrim(const std::string& str, 194 char c, 195 std::vector<std::string>* r) { 196 DCHECK(IsStringUTF8(str)); 197 #if CHAR_MIN < 0 198 DCHECK(c >= 0); 199 #endif 200 DCHECK(c < 0x7F); 201 SplitStringT(str, c, false, r); 202 } 203 204 void SplitStringAlongWhitespace(const string16& str, 205 std::vector<string16>* result) { 206 SplitStringAlongWhitespaceT(str, result); 207 } 208 209 void SplitStringAlongWhitespace(const std::string& str, 210 std::vector<std::string>* result) { 211 SplitStringAlongWhitespaceT(str, result); 212 } 213 214 } // namespace base 215