Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/strings/string_number_conversions.h"
      6 
      7 #include <ctype.h>
      8 #include <errno.h>
      9 #include <stdlib.h>
     10 #include <wctype.h>
     11 
     12 #include <limits>
     13 
     14 #include "base/logging.h"
     15 #include "base/numerics/safe_math.h"
     16 #include "base/scoped_clear_errno.h"
     17 #include "base/scoped_clear_errno.h"
     18 
     19 namespace base {
     20 
     21 namespace {
     22 
     23 template <typename STR, typename INT>
     24 struct IntToStringT {
     25   static STR IntToString(INT value) {
     26     // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
     27     // So round up to allocate 3 output characters per byte, plus 1 for '-'.
     28     const size_t kOutputBufSize =
     29         3 * sizeof(INT) + std::numeric_limits<INT>::is_signed;
     30 
     31     // Create the string in a temporary buffer, write it back to front, and
     32     // then return the substr of what we ended up using.
     33     using CHR = typename STR::value_type;
     34     CHR outbuf[kOutputBufSize];
     35 
     36     // The ValueOrDie call below can never fail, because UnsignedAbs is valid
     37     // for all valid inputs.
     38     auto res = CheckedNumeric<INT>(value).UnsignedAbs().ValueOrDie();
     39 
     40     CHR* end = outbuf + kOutputBufSize;
     41     CHR* i = end;
     42     do {
     43       --i;
     44       DCHECK(i != outbuf);
     45       *i = static_cast<CHR>((res % 10) + '0');
     46       res /= 10;
     47     } while (res != 0);
     48     if (IsValueNegative(value)) {
     49       --i;
     50       DCHECK(i != outbuf);
     51       *i = static_cast<CHR>('-');
     52     }
     53     return STR(i, end);
     54   }
     55 };
     56 
     57 // Utility to convert a character to a digit in a given base
     58 template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit {
     59 };
     60 
     61 // Faster specialization for bases <= 10
     62 template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> {
     63  public:
     64   static bool Convert(CHAR c, uint8_t* digit) {
     65     if (c >= '0' && c < '0' + BASE) {
     66       *digit = static_cast<uint8_t>(c - '0');
     67       return true;
     68     }
     69     return false;
     70   }
     71 };
     72 
     73 // Specialization for bases where 10 < base <= 36
     74 template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> {
     75  public:
     76   static bool Convert(CHAR c, uint8_t* digit) {
     77     if (c >= '0' && c <= '9') {
     78       *digit = c - '0';
     79     } else if (c >= 'a' && c < 'a' + BASE - 10) {
     80       *digit = c - 'a' + 10;
     81     } else if (c >= 'A' && c < 'A' + BASE - 10) {
     82       *digit = c - 'A' + 10;
     83     } else {
     84       return false;
     85     }
     86     return true;
     87   }
     88 };
     89 
     90 template <int BASE, typename CHAR>
     91 bool CharToDigit(CHAR c, uint8_t* digit) {
     92   return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit);
     93 }
     94 
     95 // There is an IsUnicodeWhitespace for wchars defined in string_util.h, but it
     96 // is locale independent, whereas the functions we are replacing were
     97 // locale-dependent. TBD what is desired, but for the moment let's not
     98 // introduce a change in behaviour.
     99 template<typename CHAR> class WhitespaceHelper {
    100 };
    101 
    102 template<> class WhitespaceHelper<char> {
    103  public:
    104   static bool Invoke(char c) {
    105     return 0 != isspace(static_cast<unsigned char>(c));
    106   }
    107 };
    108 
    109 template<> class WhitespaceHelper<char16> {
    110  public:
    111   static bool Invoke(char16 c) {
    112     return 0 != iswspace(c);
    113   }
    114 };
    115 
    116 template<typename CHAR> bool LocalIsWhitespace(CHAR c) {
    117   return WhitespaceHelper<CHAR>::Invoke(c);
    118 }
    119 
    120 // IteratorRangeToNumberTraits should provide:
    121 //  - a typedef for iterator_type, the iterator type used as input.
    122 //  - a typedef for value_type, the target numeric type.
    123 //  - static functions min, max (returning the minimum and maximum permitted
    124 //    values)
    125 //  - constant kBase, the base in which to interpret the input
    126 template<typename IteratorRangeToNumberTraits>
    127 class IteratorRangeToNumber {
    128  public:
    129   typedef IteratorRangeToNumberTraits traits;
    130   typedef typename traits::iterator_type const_iterator;
    131   typedef typename traits::value_type value_type;
    132 
    133   // Generalized iterator-range-to-number conversion.
    134   //
    135   static bool Invoke(const_iterator begin,
    136                      const_iterator end,
    137                      value_type* output) {
    138     bool valid = true;
    139 
    140     while (begin != end && LocalIsWhitespace(*begin)) {
    141       valid = false;
    142       ++begin;
    143     }
    144 
    145     if (begin != end && *begin == '-') {
    146       if (!std::numeric_limits<value_type>::is_signed) {
    147         *output = 0;
    148         valid = false;
    149       } else if (!Negative::Invoke(begin + 1, end, output)) {
    150         valid = false;
    151       }
    152     } else {
    153       if (begin != end && *begin == '+') {
    154         ++begin;
    155       }
    156       if (!Positive::Invoke(begin, end, output)) {
    157         valid = false;
    158       }
    159     }
    160 
    161     return valid;
    162   }
    163 
    164  private:
    165   // Sign provides:
    166   //  - a static function, CheckBounds, that determines whether the next digit
    167   //    causes an overflow/underflow
    168   //  - a static function, Increment, that appends the next digit appropriately
    169   //    according to the sign of the number being parsed.
    170   template<typename Sign>
    171   class Base {
    172    public:
    173     static bool Invoke(const_iterator begin, const_iterator end,
    174                        typename traits::value_type* output) {
    175       *output = 0;
    176 
    177       if (begin == end) {
    178         return false;
    179       }
    180 
    181       // Note: no performance difference was found when using template
    182       // specialization to remove this check in bases other than 16
    183       if (traits::kBase == 16 && end - begin > 2 && *begin == '0' &&
    184           (*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
    185         begin += 2;
    186       }
    187 
    188       for (const_iterator current = begin; current != end; ++current) {
    189         uint8_t new_digit = 0;
    190 
    191         if (!CharToDigit<traits::kBase>(*current, &new_digit)) {
    192           return false;
    193         }
    194 
    195         if (current != begin) {
    196           if (!Sign::CheckBounds(output, new_digit)) {
    197             return false;
    198           }
    199           *output *= traits::kBase;
    200         }
    201 
    202         Sign::Increment(new_digit, output);
    203       }
    204       return true;
    205     }
    206   };
    207 
    208   class Positive : public Base<Positive> {
    209    public:
    210     static bool CheckBounds(value_type* output, uint8_t new_digit) {
    211       if (*output > static_cast<value_type>(traits::max() / traits::kBase) ||
    212           (*output == static_cast<value_type>(traits::max() / traits::kBase) &&
    213            new_digit > traits::max() % traits::kBase)) {
    214         *output = traits::max();
    215         return false;
    216       }
    217       return true;
    218     }
    219     static void Increment(uint8_t increment, value_type* output) {
    220       *output += increment;
    221     }
    222   };
    223 
    224   class Negative : public Base<Negative> {
    225    public:
    226     static bool CheckBounds(value_type* output, uint8_t new_digit) {
    227       if (*output < traits::min() / traits::kBase ||
    228           (*output == traits::min() / traits::kBase &&
    229            new_digit > 0 - traits::min() % traits::kBase)) {
    230         *output = traits::min();
    231         return false;
    232       }
    233       return true;
    234     }
    235     static void Increment(uint8_t increment, value_type* output) {
    236       *output -= increment;
    237     }
    238   };
    239 };
    240 
    241 template<typename ITERATOR, typename VALUE, int BASE>
    242 class BaseIteratorRangeToNumberTraits {
    243  public:
    244   typedef ITERATOR iterator_type;
    245   typedef VALUE value_type;
    246   static value_type min() {
    247     return std::numeric_limits<value_type>::min();
    248   }
    249   static value_type max() {
    250     return std::numeric_limits<value_type>::max();
    251   }
    252   static const int kBase = BASE;
    253 };
    254 
    255 template<typename ITERATOR>
    256 class BaseHexIteratorRangeToIntTraits
    257     : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> {
    258 };
    259 
    260 template <typename ITERATOR>
    261 class BaseHexIteratorRangeToUIntTraits
    262     : public BaseIteratorRangeToNumberTraits<ITERATOR, uint32_t, 16> {};
    263 
    264 template <typename ITERATOR>
    265 class BaseHexIteratorRangeToInt64Traits
    266     : public BaseIteratorRangeToNumberTraits<ITERATOR, int64_t, 16> {};
    267 
    268 template <typename ITERATOR>
    269 class BaseHexIteratorRangeToUInt64Traits
    270     : public BaseIteratorRangeToNumberTraits<ITERATOR, uint64_t, 16> {};
    271 
    272 typedef BaseHexIteratorRangeToIntTraits<StringPiece::const_iterator>
    273     HexIteratorRangeToIntTraits;
    274 
    275 typedef BaseHexIteratorRangeToUIntTraits<StringPiece::const_iterator>
    276     HexIteratorRangeToUIntTraits;
    277 
    278 typedef BaseHexIteratorRangeToInt64Traits<StringPiece::const_iterator>
    279     HexIteratorRangeToInt64Traits;
    280 
    281 typedef BaseHexIteratorRangeToUInt64Traits<StringPiece::const_iterator>
    282     HexIteratorRangeToUInt64Traits;
    283 
    284 template <typename STR>
    285 bool HexStringToBytesT(const STR& input, std::vector<uint8_t>* output) {
    286   DCHECK_EQ(output->size(), 0u);
    287   size_t count = input.size();
    288   if (count == 0 || (count % 2) != 0)
    289     return false;
    290   for (uintptr_t i = 0; i < count / 2; ++i) {
    291     uint8_t msb = 0;  // most significant 4 bits
    292     uint8_t lsb = 0;  // least significant 4 bits
    293     if (!CharToDigit<16>(input[i * 2], &msb) ||
    294         !CharToDigit<16>(input[i * 2 + 1], &lsb))
    295       return false;
    296     output->push_back((msb << 4) | lsb);
    297   }
    298   return true;
    299 }
    300 
    301 template <typename VALUE, int BASE>
    302 class StringPieceToNumberTraits
    303     : public BaseIteratorRangeToNumberTraits<StringPiece::const_iterator,
    304                                              VALUE,
    305                                              BASE> {
    306 };
    307 
    308 template <typename VALUE>
    309 bool StringToIntImpl(const StringPiece& input, VALUE* output) {
    310   return IteratorRangeToNumber<StringPieceToNumberTraits<VALUE, 10> >::Invoke(
    311       input.begin(), input.end(), output);
    312 }
    313 
    314 template <typename VALUE, int BASE>
    315 class StringPiece16ToNumberTraits
    316     : public BaseIteratorRangeToNumberTraits<StringPiece16::const_iterator,
    317                                              VALUE,
    318                                              BASE> {
    319 };
    320 
    321 template <typename VALUE>
    322 bool String16ToIntImpl(const StringPiece16& input, VALUE* output) {
    323   return IteratorRangeToNumber<StringPiece16ToNumberTraits<VALUE, 10> >::Invoke(
    324       input.begin(), input.end(), output);
    325 }
    326 
    327 }  // namespace
    328 
    329 std::string IntToString(int value) {
    330   return IntToStringT<std::string, int>::IntToString(value);
    331 }
    332 
    333 string16 IntToString16(int value) {
    334   return IntToStringT<string16, int>::IntToString(value);
    335 }
    336 
    337 std::string UintToString(unsigned int value) {
    338   return IntToStringT<std::string, unsigned int>::IntToString(value);
    339 }
    340 
    341 string16 UintToString16(unsigned int value) {
    342   return IntToStringT<string16, unsigned int>::IntToString(value);
    343 }
    344 
    345 std::string Int64ToString(int64_t value) {
    346   return IntToStringT<std::string, int64_t>::IntToString(value);
    347 }
    348 
    349 string16 Int64ToString16(int64_t value) {
    350   return IntToStringT<string16, int64_t>::IntToString(value);
    351 }
    352 
    353 std::string Uint64ToString(uint64_t value) {
    354   return IntToStringT<std::string, uint64_t>::IntToString(value);
    355 }
    356 
    357 string16 Uint64ToString16(uint64_t value) {
    358   return IntToStringT<string16, uint64_t>::IntToString(value);
    359 }
    360 
    361 std::string SizeTToString(size_t value) {
    362   return IntToStringT<std::string, size_t>::IntToString(value);
    363 }
    364 
    365 string16 SizeTToString16(size_t value) {
    366   return IntToStringT<string16, size_t>::IntToString(value);
    367 }
    368 
    369 std::string DoubleToString(double value) {
    370   auto ret = std::to_string(value);
    371   // If this returned an integer, don't do anything.
    372   if (ret.find('.') == std::string::npos) {
    373     return ret;
    374   }
    375   // Otherwise, it has an annoying tendency to leave trailing zeros.
    376   size_t len = ret.size();
    377   while (len >= 2 && ret[len - 1] == '0' && ret[len - 2] != '.') {
    378     --len;
    379   }
    380   ret.erase(len);
    381   return ret;
    382 }
    383 
    384 bool StringToInt(const StringPiece& input, int* output) {
    385   return StringToIntImpl(input, output);
    386 }
    387 
    388 bool StringToInt(const StringPiece16& input, int* output) {
    389   return String16ToIntImpl(input, output);
    390 }
    391 
    392 bool StringToUint(const StringPiece& input, unsigned* output) {
    393   return StringToIntImpl(input, output);
    394 }
    395 
    396 bool StringToUint(const StringPiece16& input, unsigned* output) {
    397   return String16ToIntImpl(input, output);
    398 }
    399 
    400 bool StringToInt64(const StringPiece& input, int64_t* output) {
    401   return StringToIntImpl(input, output);
    402 }
    403 
    404 bool StringToInt64(const StringPiece16& input, int64_t* output) {
    405   return String16ToIntImpl(input, output);
    406 }
    407 
    408 bool StringToUint64(const StringPiece& input, uint64_t* output) {
    409   return StringToIntImpl(input, output);
    410 }
    411 
    412 bool StringToUint64(const StringPiece16& input, uint64_t* output) {
    413   return String16ToIntImpl(input, output);
    414 }
    415 
    416 bool StringToSizeT(const StringPiece& input, size_t* output) {
    417   return StringToIntImpl(input, output);
    418 }
    419 
    420 bool StringToSizeT(const StringPiece16& input, size_t* output) {
    421   return String16ToIntImpl(input, output);
    422 }
    423 
    424 bool StringToDouble(const std::string& input, double* output) {
    425   char* endptr = nullptr;
    426   *output = strtod(input.c_str(), &endptr);
    427 
    428   // Cases to return false:
    429   //  - If the input string is empty, there was nothing to parse.
    430   //  - If endptr does not point to the end of the string, there are either
    431   //    characters remaining in the string after a parsed number, or the string
    432   //    does not begin with a parseable number.  endptr is compared to the
    433   //    expected end given the string's stated length to correctly catch cases
    434   //    where the string contains embedded NUL characters.
    435   //  - If the first character is a space, there was leading whitespace
    436   return !input.empty() &&
    437          input.c_str() + input.length() == endptr &&
    438          !isspace(input[0]) &&
    439          *output != std::numeric_limits<double>::infinity() &&
    440          *output != -std::numeric_limits<double>::infinity();
    441 }
    442 
    443 // Note: if you need to add String16ToDouble, first ask yourself if it's
    444 // really necessary. If it is, probably the best implementation here is to
    445 // convert to 8-bit and then use the 8-bit version.
    446 
    447 // Note: if you need to add an iterator range version of StringToDouble, first
    448 // ask yourself if it's really necessary. If it is, probably the best
    449 // implementation here is to instantiate a string and use the string version.
    450 
    451 std::string HexEncode(const void* bytes, size_t size) {
    452   static const char kHexChars[] = "0123456789ABCDEF";
    453 
    454   // Each input byte creates two output hex characters.
    455   std::string ret(size * 2, '\0');
    456 
    457   for (size_t i = 0; i < size; ++i) {
    458     char b = reinterpret_cast<const char*>(bytes)[i];
    459     ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
    460     ret[(i * 2) + 1] = kHexChars[b & 0xf];
    461   }
    462   return ret;
    463 }
    464 
    465 bool HexStringToInt(const StringPiece& input, int* output) {
    466   return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(
    467     input.begin(), input.end(), output);
    468 }
    469 
    470 bool HexStringToUInt(const StringPiece& input, uint32_t* output) {
    471   return IteratorRangeToNumber<HexIteratorRangeToUIntTraits>::Invoke(
    472       input.begin(), input.end(), output);
    473 }
    474 
    475 bool HexStringToInt64(const StringPiece& input, int64_t* output) {
    476   return IteratorRangeToNumber<HexIteratorRangeToInt64Traits>::Invoke(
    477     input.begin(), input.end(), output);
    478 }
    479 
    480 bool HexStringToUInt64(const StringPiece& input, uint64_t* output) {
    481   return IteratorRangeToNumber<HexIteratorRangeToUInt64Traits>::Invoke(
    482       input.begin(), input.end(), output);
    483 }
    484 
    485 bool HexStringToBytes(const std::string& input, std::vector<uint8_t>* output) {
    486   return HexStringToBytesT(input, output);
    487 }
    488 
    489 }  // namespace base
    490