1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_split.h" 6 7 #include <stddef.h> 8 9 #include "base/logging.h" 10 #include "base/strings/string_util.h" 11 #include "base/third_party/icu/icu_utf.h" 12 13 namespace base { 14 15 namespace { 16 17 // PieceToOutputType converts a StringPiece as needed to a given output type, 18 // which is either the same type of StringPiece (a NOP) or the corresponding 19 // non-piece string type. 20 // 21 // The default converter is a NOP, it works when the OutputType is the 22 // correct StringPiece. 23 template<typename Str, typename OutputType> 24 OutputType PieceToOutputType(BasicStringPiece<Str> piece) { 25 return piece; 26 } 27 template<> // Convert StringPiece to std::string 28 std::string PieceToOutputType<std::string, std::string>(StringPiece piece) { 29 return piece.as_string(); 30 } 31 template<> // Convert StringPiece16 to string16. 32 string16 PieceToOutputType<string16, string16>(StringPiece16 piece) { 33 return piece.as_string(); 34 } 35 36 // Returns either the ASCII or UTF-16 whitespace. 37 template<typename Str> BasicStringPiece<Str> WhitespaceForType(); 38 template<> StringPiece16 WhitespaceForType<string16>() { 39 return kWhitespaceUTF16; 40 } 41 template<> StringPiece WhitespaceForType<std::string>() { 42 return kWhitespaceASCII; 43 } 44 45 // Optimize the single-character case to call find() on the string instead, 46 // since this is the common case and can be made faster. This could have been 47 // done with template specialization too, but would have been less clear. 48 // 49 // There is no corresponding FindFirstNotOf because StringPiece already 50 // implements these different versions that do the optimized searching. 51 size_t FindFirstOf(StringPiece piece, char c, size_t pos) { 52 return piece.find(c, pos); 53 } 54 size_t FindFirstOf(StringPiece16 piece, char16 c, size_t pos) { 55 return piece.find(c, pos); 56 } 57 size_t FindFirstOf(StringPiece piece, StringPiece one_of, size_t pos) { 58 return piece.find_first_of(one_of, pos); 59 } 60 size_t FindFirstOf(StringPiece16 piece, StringPiece16 one_of, size_t pos) { 61 return piece.find_first_of(one_of, pos); 62 } 63 64 // General string splitter template. Can take 8- or 16-bit input, can produce 65 // the corresponding string or StringPiece output, and can take single- or 66 // multiple-character delimiters. 67 // 68 // DelimiterType is either a character (Str::value_type) or a string piece of 69 // multiple characters (BasicStringPiece<Str>). StringPiece has a version of 70 // find for both of these cases, and the single-character version is the most 71 // common and can be implemented faster, which is why this is a template. 72 template<typename Str, typename OutputStringType, typename DelimiterType> 73 static std::vector<OutputStringType> SplitStringT( 74 BasicStringPiece<Str> str, 75 DelimiterType delimiter, 76 WhitespaceHandling whitespace, 77 SplitResult result_type) { 78 std::vector<OutputStringType> result; 79 if (str.empty()) 80 return result; 81 82 size_t start = 0; 83 while (start != Str::npos) { 84 size_t end = FindFirstOf(str, delimiter, start); 85 86 BasicStringPiece<Str> piece; 87 if (end == Str::npos) { 88 piece = str.substr(start); 89 start = Str::npos; 90 } else { 91 piece = str.substr(start, end - start); 92 start = end + 1; 93 } 94 95 if (whitespace == TRIM_WHITESPACE) 96 piece = TrimString(piece, WhitespaceForType<Str>(), TRIM_ALL); 97 98 if (result_type == SPLIT_WANT_ALL || !piece.empty()) 99 result.push_back(PieceToOutputType<Str, OutputStringType>(piece)); 100 } 101 return result; 102 } 103 104 bool AppendStringKeyValue(StringPiece input, 105 char delimiter, 106 StringPairs* result) { 107 // Always append a new item regardless of success (it might be empty). The 108 // below code will copy the strings directly into the result pair. 109 result->resize(result->size() + 1); 110 auto& result_pair = result->back(); 111 112 // Find the delimiter. 113 size_t end_key_pos = input.find_first_of(delimiter); 114 if (end_key_pos == std::string::npos) { 115 DVLOG(1) << "cannot find delimiter in: " << input; 116 return false; // No delimiter. 117 } 118 input.substr(0, end_key_pos).CopyToString(&result_pair.first); 119 120 // Find the value string. 121 StringPiece remains = input.substr(end_key_pos, input.size() - end_key_pos); 122 size_t begin_value_pos = remains.find_first_not_of(delimiter); 123 if (begin_value_pos == StringPiece::npos) { 124 DVLOG(1) << "cannot parse value from input: " << input; 125 return false; // No value. 126 } 127 remains.substr(begin_value_pos, remains.size() - begin_value_pos) 128 .CopyToString(&result_pair.second); 129 130 return true; 131 } 132 133 template <typename Str, typename OutputStringType> 134 void SplitStringUsingSubstrT(BasicStringPiece<Str> input, 135 BasicStringPiece<Str> delimiter, 136 WhitespaceHandling whitespace, 137 SplitResult result_type, 138 std::vector<OutputStringType>* result) { 139 using Piece = BasicStringPiece<Str>; 140 using size_type = typename Piece::size_type; 141 142 result->clear(); 143 for (size_type begin_index = 0, end_index = 0; end_index != Piece::npos; 144 begin_index = end_index + delimiter.size()) { 145 end_index = input.find(delimiter, begin_index); 146 Piece term = end_index == Piece::npos 147 ? input.substr(begin_index) 148 : input.substr(begin_index, end_index - begin_index); 149 150 if (whitespace == TRIM_WHITESPACE) 151 term = TrimString(term, WhitespaceForType<Str>(), TRIM_ALL); 152 153 if (result_type == SPLIT_WANT_ALL || !term.empty()) 154 result->push_back(PieceToOutputType<Str, OutputStringType>(term)); 155 } 156 } 157 158 } // namespace 159 160 std::vector<std::string> SplitString(StringPiece input, 161 StringPiece separators, 162 WhitespaceHandling whitespace, 163 SplitResult result_type) { 164 if (separators.size() == 1) { 165 return SplitStringT<std::string, std::string, char>( 166 input, separators[0], whitespace, result_type); 167 } 168 return SplitStringT<std::string, std::string, StringPiece>( 169 input, separators, whitespace, result_type); 170 } 171 172 std::vector<string16> SplitString(StringPiece16 input, 173 StringPiece16 separators, 174 WhitespaceHandling whitespace, 175 SplitResult result_type) { 176 if (separators.size() == 1) { 177 return SplitStringT<string16, string16, char16>( 178 input, separators[0], whitespace, result_type); 179 } 180 return SplitStringT<string16, string16, StringPiece16>( 181 input, separators, whitespace, result_type); 182 } 183 184 std::vector<StringPiece> SplitStringPiece(StringPiece input, 185 StringPiece separators, 186 WhitespaceHandling whitespace, 187 SplitResult result_type) { 188 if (separators.size() == 1) { 189 return SplitStringT<std::string, StringPiece, char>( 190 input, separators[0], whitespace, result_type); 191 } 192 return SplitStringT<std::string, StringPiece, StringPiece>( 193 input, separators, whitespace, result_type); 194 } 195 196 std::vector<StringPiece16> SplitStringPiece(StringPiece16 input, 197 StringPiece16 separators, 198 WhitespaceHandling whitespace, 199 SplitResult result_type) { 200 if (separators.size() == 1) { 201 return SplitStringT<string16, StringPiece16, char16>( 202 input, separators[0], whitespace, result_type); 203 } 204 return SplitStringT<string16, StringPiece16, StringPiece16>( 205 input, separators, whitespace, result_type); 206 } 207 208 bool SplitStringIntoKeyValuePairs(StringPiece input, 209 char key_value_delimiter, 210 char key_value_pair_delimiter, 211 StringPairs* key_value_pairs) { 212 key_value_pairs->clear(); 213 214 std::vector<StringPiece> pairs = SplitStringPiece( 215 input, std::string(1, key_value_pair_delimiter), 216 TRIM_WHITESPACE, SPLIT_WANT_NONEMPTY); 217 key_value_pairs->reserve(pairs.size()); 218 219 bool success = true; 220 for (const StringPiece& pair : pairs) { 221 if (!AppendStringKeyValue(pair, key_value_delimiter, key_value_pairs)) { 222 // Don't return here, to allow for pairs without associated 223 // value or key; just record that the split failed. 224 success = false; 225 } 226 } 227 return success; 228 } 229 230 void SplitStringUsingSubstr(StringPiece16 input, 231 StringPiece16 delimiter, 232 std::vector<string16>* result) { 233 SplitStringUsingSubstrT(input, delimiter, TRIM_WHITESPACE, SPLIT_WANT_ALL, 234 result); 235 } 236 237 void SplitStringUsingSubstr(StringPiece input, 238 StringPiece delimiter, 239 std::vector<std::string>* result) { 240 SplitStringUsingSubstrT(input, delimiter, TRIM_WHITESPACE, SPLIT_WANT_ALL, 241 result); 242 } 243 244 std::vector<StringPiece16> SplitStringPieceUsingSubstr( 245 StringPiece16 input, 246 StringPiece16 delimiter, 247 WhitespaceHandling whitespace, 248 SplitResult result_type) { 249 std::vector<StringPiece16> result; 250 SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result); 251 return result; 252 } 253 254 std::vector<StringPiece> SplitStringPieceUsingSubstr( 255 StringPiece input, 256 StringPiece delimiter, 257 WhitespaceHandling whitespace, 258 SplitResult result_type) { 259 std::vector<StringPiece> result; 260 SplitStringUsingSubstrT(input, delimiter, whitespace, result_type, &result); 261 return result; 262 } 263 264 } // namespace base 265