1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_number_conversions.h" 6 7 #include <ctype.h> 8 #include <errno.h> 9 #include <stdlib.h> 10 #include <wctype.h> 11 12 #include <limits> 13 #include <type_traits> 14 15 #include "base/logging.h" 16 #include "base/numerics/safe_math.h" 17 #include "base/scoped_clear_errno.h" 18 #include "base/strings/utf_string_conversions.h" 19 20 namespace base { 21 22 namespace { 23 24 template <typename STR, typename INT> 25 struct IntToStringT { 26 static STR IntToString(INT value) { 27 // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4. 28 // So round up to allocate 3 output characters per byte, plus 1 for '-'. 29 const size_t kOutputBufSize = 30 3 * sizeof(INT) + std::numeric_limits<INT>::is_signed; 31 32 // Create the string in a temporary buffer, write it back to front, and 33 // then return the substr of what we ended up using. 34 using CHR = typename STR::value_type; 35 CHR outbuf[kOutputBufSize]; 36 37 // The ValueOrDie call below can never fail, because UnsignedAbs is valid 38 // for all valid inputs. 39 typename std::make_unsigned<INT>::type res = 40 CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie(); 41 42 CHR* end = outbuf + kOutputBufSize; 43 CHR* i = end; 44 do { 45 --i; 46 DCHECK(i != outbuf); 47 *i = static_cast<CHR>((res % 10) + '0'); 48 res /= 10; 49 } while (res != 0); 50 if (IsValueNegative(value)) { 51 --i; 52 DCHECK(i != outbuf); 53 *i = static_cast<CHR>('-'); 54 } 55 return STR(i, end); 56 } 57 }; 58 59 // Utility to convert a character to a digit in a given base 60 template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit { 61 }; 62 63 // Faster specialization for bases <= 10 64 template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> { 65 public: 66 static bool Convert(CHAR c, uint8_t* digit) { 67 if (c >= '0' && c < '0' + BASE) { 68 *digit = static_cast<uint8_t>(c - '0'); 69 return true; 70 } 71 return false; 72 } 73 }; 74 75 // Specialization for bases where 10 < base <= 36 76 template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> { 77 public: 78 static bool Convert(CHAR c, uint8_t* digit) { 79 if (c >= '0' && c <= '9') { 80 *digit = c - '0'; 81 } else if (c >= 'a' && c < 'a' + BASE - 10) { 82 *digit = c - 'a' + 10; 83 } else if (c >= 'A' && c < 'A' + BASE - 10) { 84 *digit = c - 'A' + 10; 85 } else { 86 return false; 87 } 88 return true; 89 } 90 }; 91 92 template <int BASE, typename CHAR> 93 bool CharToDigit(CHAR c, uint8_t* digit) { 94 return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit); 95 } 96 97 // There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it 98 // is locale independent, whereas the functions we are replacing were 99 // locale-dependent. TBD what is desired, but for the moment let's not 100 // introduce a change in behaviour. 101 template<typename CHAR> class WhitespaceHelper { 102 }; 103 104 template<> class WhitespaceHelper<char> { 105 public: 106 static bool Invoke(char c) { 107 return 0 != isspace(static_cast<unsigned char>(c)); 108 } 109 }; 110 111 template<> class WhitespaceHelper<char16> { 112 public: 113 static bool Invoke(char16 c) { 114 return 0 != iswspace(c); 115 } 116 }; 117 118 template<typename CHAR> bool LocalIsWhitespace(CHAR c) { 119 return WhitespaceHelper<CHAR>::Invoke(c); 120 } 121 122 // IteratorRangeToNumberTraits should provide: 123 // - a typedef for iterator_type, the iterator type used as input. 124 // - a typedef for value_type, the target numeric type. 125 // - static functions min, max (returning the minimum and maximum permitted 126 // values) 127 // - constant kBase, the base in which to interpret the input 128 template<typename IteratorRangeToNumberTraits> 129 class IteratorRangeToNumber { 130 public: 131 typedef IteratorRangeToNumberTraits traits; 132 typedef typename traits::iterator_type const_iterator; 133 typedef typename traits::value_type value_type; 134 135 // Generalized iterator-range-to-number conversion. 136 // 137 static bool Invoke(const_iterator begin, 138 const_iterator end, 139 value_type* output) { 140 bool valid = true; 141 142 while (begin != end && LocalIsWhitespace(*begin)) { 143 valid = false; 144 ++begin; 145 } 146 147 if (begin != end && *begin == '-') { 148 if (!std::numeric_limits<value_type>::is_signed) { 149 *output = 0; 150 valid = false; 151 } else if (!Negative::Invoke(begin + 1, end, output)) { 152 valid = false; 153 } 154 } else { 155 if (begin != end && *begin == '+') { 156 ++begin; 157 } 158 if (!Positive::Invoke(begin, end, output)) { 159 valid = false; 160 } 161 } 162 163 return valid; 164 } 165 166 private: 167 // Sign provides: 168 // - a static function, CheckBounds, that determines whether the next digit 169 // causes an overflow/underflow 170 // - a static function, Increment, that appends the next digit appropriately 171 // according to the sign of the number being parsed. 172 template<typename Sign> 173 class Base { 174 public: 175 static bool Invoke(const_iterator begin, const_iterator end, 176 typename traits::value_type* output) { 177 *output = 0; 178 179 if (begin == end) { 180 return false; 181 } 182 183 // Note: no performance difference was found when using template 184 // specialization to remove this check in bases other than 16 185 if (traits::kBase == 16 && end - begin > 2 && *begin == '0' && 186 (*(begin + 1) == 'x' || *(begin + 1) == 'X')) { 187 begin += 2; 188 } 189 190 for (const_iterator current = begin; current != end; ++current) { 191 uint8_t new_digit = 0; 192 193 if (!CharToDigit<traits::kBase>(*current, &new_digit)) { 194 return false; 195 } 196 197 if (current != begin) { 198 if (!Sign::CheckBounds(output, new_digit)) { 199 return false; 200 } 201 *output *= traits::kBase; 202 } 203 204 Sign::Increment(new_digit, output); 205 } 206 return true; 207 } 208 }; 209 210 class Positive : public Base<Positive> { 211 public: 212 static bool CheckBounds(value_type* output, uint8_t new_digit) { 213 if (*output > static_cast<value_type>(traits::max() / traits::kBase) || 214 (*output == static_cast<value_type>(traits::max() / traits::kBase) && 215 new_digit > traits::max() % traits::kBase)) { 216 *output = traits::max(); 217 return false; 218 } 219 return true; 220 } 221 static void Increment(uint8_t increment, value_type* output) { 222 *output += increment; 223 } 224 }; 225 226 class Negative : public Base<Negative> { 227 public: 228 static bool CheckBounds(value_type* output, uint8_t new_digit) { 229 if (*output < traits::min() / traits::kBase || 230 (*output == traits::min() / traits::kBase && 231 new_digit > 0 - traits::min() % traits::kBase)) { 232 *output = traits::min(); 233 return false; 234 } 235 return true; 236 } 237 static void Increment(uint8_t increment, value_type* output) { 238 *output -= increment; 239 } 240 }; 241 }; 242 243 template<typename ITERATOR, typename VALUE, int BASE> 244 class BaseIteratorRangeToNumberTraits { 245 public: 246 typedef ITERATOR iterator_type; 247 typedef VALUE value_type; 248 static value_type min() { 249 return std::numeric_limits<value_type>::min(); 250 } 251 static value_type max() { 252 return std::numeric_limits<value_type>::max(); 253 } 254 static const int kBase = BASE; 255 }; 256 257 template<typename ITERATOR> 258 class BaseHexIteratorRangeToIntTraits 259 : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> { 260 }; 261 262 template <typename ITERATOR> 263 class BaseHexIteratorRangeToUIntTraits 264 : public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {}; 265 266 template <typename ITERATOR> 267 class BaseHexIteratorRangeToInt64Traits 268 : public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {}; 269 270 template <typename ITERATOR> 271 class BaseHexIteratorRangeToUInt64Traits 272 : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {}; 273 274 typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator> 275 HexIteratorRangeToIntTraits; 276 277 typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator> 278 HexIteratorRangeToUIntTraits; 279 280 typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator> 281 HexIteratorRangeToInt64Traits; 282 283 typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator> 284 HexIteratorRangeToUInt64Traits; 285 286 template <typename VALUE, int BASE> 287 class StringPieceToNumberTraits 288 : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator, 289 VALUE, 290 BASE> { 291 }; 292 293 template <typename VALUE> 294 bool StringToIntImpl(StringPiece input, VALUE* output) { 295 return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke( 296 input.begin(), input.end(), output); 297 } 298 299 template <typename VALUE, int BASE> 300 class StringPiece16ToNumberTraits 301 : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator, 302 VALUE, 303 BASE> { 304 }; 305 306 template <typename VALUE> 307 bool String16ToIntImpl(StringPiece16 input, VALUE* output) { 308 return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke( 309 input.begin(), input.end(), output); 310 } 311 312 } // namespace 313 314 std::string NumberToString(int value) { 315 return IntToStringT<std::string, int>::IntToString(value); 316 } 317 318 string16 NumberToString16(int value) { 319 return IntToStringT<string16, int>::IntToString(value); 320 } 321 322 std::string NumberToString(unsigned value) { 323 return IntToStringT<std::string, unsigned>::IntToString(value); 324 } 325 326 string16 NumberToString16(unsigned value) { 327 return IntToStringT<string16, unsigned>::IntToString(value); 328 } 329 330 std::string NumberToString(long value) { 331 return IntToStringT<std::string, long>::IntToString(value); 332 } 333 334 string16 NumberToString16(long value) { 335 return IntToStringT<string16, long>::IntToString(value); 336 } 337 338 std::string NumberToString(unsigned long value) { 339 return IntToStringT<std::string, unsigned long>::IntToString(value); 340 } 341 342 string16 NumberToString16(unsigned long value) { 343 return IntToStringT<string16, unsigned long>::IntToString(value); 344 } 345 346 std::string NumberToString(long long value) { 347 return IntToStringT<std::string, long long>::IntToString(value); 348 } 349 350 string16 NumberToString16(long long value) { 351 return IntToStringT<string16, long long>::IntToString(value); 352 } 353 354 std::string NumberToString(unsigned long long value) { 355 return IntToStringT<std::string, unsigned long long>::IntToString(value); 356 } 357 358 string16 NumberToString16(unsigned long long value) { 359 return IntToStringT<string16, unsigned long long>::IntToString(value); 360 } 361 362 std::string NumberToString(double value) { 363 auto ret = std::to_string(value); 364 // If this returned an integer, don't do anything. 365 if (ret.find('.') == std::string::npos) { 366 return ret; 367 } 368 // Otherwise, it has an annoying tendency to leave trailing zeros. 369 size_t len = ret.size(); 370 while (len >= 2 && ret[len - 1] == '0' && ret[len - 2] != '.') { 371 --len; 372 } 373 ret.erase(len); 374 return ret; 375 } 376 377 base::string16 NumberToString16(double value) { 378 auto tmp = std::to_string(value); 379 base::string16 ret(tmp.c_str(), tmp.c_str() + tmp.length()); 380 381 // If this returned an integer, don't do anything. 382 if (ret.find('.') == std::string::npos) { 383 return ret; 384 } 385 // Otherwise, it has an annoying tendency to leave trailing zeros. 386 size_t len = ret.size(); 387 while (len >= 2 && ret[len - 1] == '0' && ret[len - 2] != '.') { 388 --len; 389 } 390 ret.erase(len); 391 return ret; 392 } 393 394 bool StringToInt(StringPiece input, int* output) { 395 return StringToIntImpl(input, output); 396 } 397 398 bool StringToInt(StringPiece16 input, int* output) { 399 return String16ToIntImpl(input, output); 400 } 401 402 bool StringToUint(StringPiece input, unsigned* output) { 403 return StringToIntImpl(input, output); 404 } 405 406 bool StringToUint(StringPiece16 input, unsigned* output) { 407 return String16ToIntImpl(input, output); 408 } 409 410 bool StringToInt64(StringPiece input, int64_t* output) { 411 return StringToIntImpl(input, output); 412 } 413 414 bool StringToInt64(StringPiece16 input, int64_t* output) { 415 return String16ToIntImpl(input, output); 416 } 417 418 bool StringToUint64(StringPiece input, uint64_t* output) { 419 return StringToIntImpl(input, output); 420 } 421 422 bool StringToUint64(StringPiece16 input, uint64_t* output) { 423 return String16ToIntImpl(input, output); 424 } 425 426 bool StringToSizeT(StringPiece input, size_t* output) { 427 return StringToIntImpl(input, output); 428 } 429 430 bool StringToSizeT(StringPiece16 input, size_t* output) { 431 return String16ToIntImpl(input, output); 432 } 433 434 bool StringToDouble(const std::string& input, double* output) { 435 char* endptr = nullptr; 436 *output = strtod(input.c_str(), &endptr); 437 438 // Cases to return false: 439 // - If the input string is empty, there was nothing to parse. 440 // - If endptr does not point to the end of the string, there are either 441 // characters remaining in the string after a parsed number, or the string 442 // does not begin with a parseable number. endptr is compared to the 443 // expected end given the string's stated length to correctly catch cases 444 // where the string contains embedded NUL characters. 445 // - If the first character is a space, there was leading whitespace 446 return !input.empty() && 447 input.c_str() + input.length() == endptr && 448 !isspace(input[0]) && 449 *output != std::numeric_limits<double>::infinity() && 450 *output != -std::numeric_limits<double>::infinity(); 451 } 452 453 // Note: if you need to add String16ToDouble, first ask yourself if it's 454 // really necessary. If it is, probably the best implementation here is to 455 // convert to 8-bit and then use the 8-bit version. 456 457 // Note: if you need to add an iterator range version of StringToDouble, first 458 // ask yourself if it's really necessary. If it is, probably the best 459 // implementation here is to instantiate a string and use the string version. 460 461 std::string HexEncode(const void* bytes, size_t size) { 462 static const char kHexChars[] = "0123456789ABCDEF"; 463 464 // Each input byte creates two output hex characters. 465 std::string ret(size * 2, '\0'); 466 467 for (size_t i = 0; i < size; ++i) { 468 char b = reinterpret_cast<const char*>(bytes)[i]; 469 ret[(i * 2)] = kHexChars[(b >> 4) & 0xf]; 470 ret[(i * 2) + 1] = kHexChars[b & 0xf]; 471 } 472 return ret; 473 } 474 475 bool HexStringToInt(StringPiece input, int* output) { 476 return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke( 477 input.begin(), input.end(), output); 478 } 479 480 bool HexStringToUInt(StringPiece input, uint32_t* output) { 481 return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke( 482 input.begin(), input.end(), output); 483 } 484 485 bool HexStringToInt64(StringPiece input, int64_t* output) { 486 return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke( 487 input.begin(), input.end(), output); 488 } 489 490 bool HexStringToUInt64(StringPiece input, uint64_t* output) { 491 return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke( 492 input.begin(), input.end(), output); 493 } 494 495 bool HexStringToBytes(StringPiece input, std::vector<uint8_t>* output) { 496 DCHECK_EQ(output->size(), 0u); 497 size_t count = input.size(); 498 if (count == 0 || (count % 2) != 0) 499 return false; 500 for (uintptr_t i = 0; i < count / 2; ++i) { 501 uint8_t msb = 0; // most significant 4 bits 502 uint8_t lsb = 0; // least significant 4 bits 503 if (!CharToDigit<16>(input[i * 2], &msb) || 504 !CharToDigit<16>(input[i * 2 + 1], &lsb)) { 505 return false; 506 } 507 output->push_back((msb << 4) | lsb); 508 } 509 return true; 510 } 511 512 } // namespace base 513