Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "base/string_number_conversions.h"
      6 
      7 #include <errno.h>
      8 #include <stdlib.h>
      9 
     10 #include <limits>
     11 
     12 #include "base/logging.h"
     13 #include "base/third_party/dmg_fp/dmg_fp.h"
     14 #include "base/utf_string_conversions.h"
     15 
     16 namespace base {
     17 
     18 namespace {
     19 
     20 template <typename STR, typename INT, typename UINT, bool NEG>
     21 struct IntToStringT {
     22   // This is to avoid a compiler warning about unary minus on unsigned type.
     23   // For example, say you had the following code:
     24   //   template <typename INT>
     25   //   INT abs(INT value) { return value < 0 ? -value : value; }
     26   // Even though if INT is unsigned, it's impossible for value < 0, so the
     27   // unary minus will never be taken, the compiler will still generate a
     28   // warning.  We do a little specialization dance...
     29   template <typename INT2, typename UINT2, bool NEG2>
     30   struct ToUnsignedT {};
     31 
     32   template <typename INT2, typename UINT2>
     33   struct ToUnsignedT<INT2, UINT2, false> {
     34     static UINT2 ToUnsigned(INT2 value) {
     35       return static_cast<UINT2>(value);
     36     }
     37   };
     38 
     39   template <typename INT2, typename UINT2>
     40   struct ToUnsignedT<INT2, UINT2, true> {
     41     static UINT2 ToUnsigned(INT2 value) {
     42       return static_cast<UINT2>(value < 0 ? -value : value);
     43     }
     44   };
     45 
     46   // This set of templates is very similar to the above templates, but
     47   // for testing whether an integer is negative.
     48   template <typename INT2, bool NEG2>
     49   struct TestNegT {};
     50   template <typename INT2>
     51   struct TestNegT<INT2, false> {
     52     static bool TestNeg(INT2 value) {
     53       // value is unsigned, and can never be negative.
     54       return false;
     55     }
     56   };
     57   template <typename INT2>
     58   struct TestNegT<INT2, true> {
     59     static bool TestNeg(INT2 value) {
     60       return value < 0;
     61     }
     62   };
     63 
     64   static STR IntToString(INT value) {
     65     // log10(2) ~= 0.3 bytes needed per bit or per byte log10(2**8) ~= 2.4.
     66     // So round up to allocate 3 output characters per byte, plus 1 for '-'.
     67     const int kOutputBufSize = 3 * sizeof(INT) + 1;
     68 
     69     // Allocate the whole string right away, we will right back to front, and
     70     // then return the substr of what we ended up using.
     71     STR outbuf(kOutputBufSize, 0);
     72 
     73     bool is_neg = TestNegT<INT, NEG>::TestNeg(value);
     74     // Even though is_neg will never be true when INT is parameterized as
     75     // unsigned, even the presence of the unary operation causes a warning.
     76     UINT res = ToUnsignedT<INT, UINT, NEG>::ToUnsigned(value);
     77 
     78     for (typename STR::iterator it = outbuf.end();;) {
     79       --it;
     80       DCHECK(it != outbuf.begin());
     81       *it = static_cast<typename STR::value_type>((res % 10) + '0');
     82       res /= 10;
     83 
     84       // We're done..
     85       if (res == 0) {
     86         if (is_neg) {
     87           --it;
     88           DCHECK(it != outbuf.begin());
     89           *it = static_cast<typename STR::value_type>('-');
     90         }
     91         return STR(it, outbuf.end());
     92       }
     93     }
     94     NOTREACHED();
     95     return STR();
     96   }
     97 };
     98 
     99 // Utility to convert a character to a digit in a given base
    100 template<typename CHAR, int BASE, bool BASE_LTE_10> class BaseCharToDigit {
    101 };
    102 
    103 // Faster specialization for bases <= 10
    104 template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, true> {
    105  public:
    106   static bool Convert(CHAR c, uint8* digit) {
    107     if (c >= '0' && c < '0' + BASE) {
    108       *digit = c - '0';
    109       return true;
    110     }
    111     return false;
    112   }
    113 };
    114 
    115 // Specialization for bases where 10 < base <= 36
    116 template<typename CHAR, int BASE> class BaseCharToDigit<CHAR, BASE, false> {
    117  public:
    118   static bool Convert(CHAR c, uint8* digit) {
    119     if (c >= '0' && c <= '9') {
    120       *digit = c - '0';
    121     } else if (c >= 'a' && c < 'a' + BASE - 10) {
    122       *digit = c - 'a' + 10;
    123     } else if (c >= 'A' && c < 'A' + BASE - 10) {
    124       *digit = c - 'A' + 10;
    125     } else {
    126       return false;
    127     }
    128     return true;
    129   }
    130 };
    131 
    132 template<int BASE, typename CHAR> bool CharToDigit(CHAR c, uint8* digit) {
    133   return BaseCharToDigit<CHAR, BASE, BASE <= 10>::Convert(c, digit);
    134 }
    135 
    136 // There is an IsWhitespace for wchars defined in string_util.h, but it is
    137 // locale independent, whereas the functions we are replacing were
    138 // locale-dependent. TBD what is desired, but for the moment let's not introduce
    139 // a change in behaviour.
    140 template<typename CHAR> class WhitespaceHelper {
    141 };
    142 
    143 template<> class WhitespaceHelper<char> {
    144  public:
    145   static bool Invoke(char c) {
    146     return 0 != isspace(static_cast<unsigned char>(c));
    147   }
    148 };
    149 
    150 template<> class WhitespaceHelper<char16> {
    151  public:
    152   static bool Invoke(char16 c) {
    153     return 0 != iswspace(c);
    154   }
    155 };
    156 
    157 template<typename CHAR> bool LocalIsWhitespace(CHAR c) {
    158   return WhitespaceHelper<CHAR>::Invoke(c);
    159 }
    160 
    161 // IteratorRangeToNumberTraits should provide:
    162 //  - a typedef for iterator_type, the iterator type used as input.
    163 //  - a typedef for value_type, the target numeric type.
    164 //  - static functions min, max (returning the minimum and maximum permitted
    165 //    values)
    166 //  - constant kBase, the base in which to interpret the input
    167 template<typename IteratorRangeToNumberTraits>
    168 class IteratorRangeToNumber {
    169  public:
    170   typedef IteratorRangeToNumberTraits traits;
    171   typedef typename traits::iterator_type const_iterator;
    172   typedef typename traits::value_type value_type;
    173 
    174   // Generalized iterator-range-to-number conversion.
    175   //
    176   static bool Invoke(const_iterator begin,
    177                      const_iterator end,
    178                      value_type* output) {
    179     bool valid = true;
    180 
    181     while (begin != end && LocalIsWhitespace(*begin)) {
    182       valid = false;
    183       ++begin;
    184     }
    185 
    186     if (begin != end && *begin == '-') {
    187       if (!Negative::Invoke(begin + 1, end, output)) {
    188         valid = false;
    189       }
    190     } else {
    191       if (begin != end && *begin == '+') {
    192         ++begin;
    193       }
    194       if (!Positive::Invoke(begin, end, output)) {
    195         valid = false;
    196       }
    197     }
    198 
    199     return valid;
    200   }
    201 
    202  private:
    203   // Sign provides:
    204   //  - a static function, CheckBounds, that determines whether the next digit
    205   //    causes an overflow/underflow
    206   //  - a static function, Increment, that appends the next digit appropriately
    207   //    according to the sign of the number being parsed.
    208   template<typename Sign>
    209   class Base {
    210    public:
    211     static bool Invoke(const_iterator begin, const_iterator end,
    212                        typename traits::value_type* output) {
    213       *output = 0;
    214 
    215       if (begin == end) {
    216         return false;
    217       }
    218 
    219       // Note: no performance difference was found when using template
    220       // specialization to remove this check in bases other than 16
    221       if (traits::kBase == 16 && end - begin >= 2 && *begin == '0' &&
    222           (*(begin + 1) == 'x' || *(begin + 1) == 'X')) {
    223         begin += 2;
    224       }
    225 
    226       for (const_iterator current = begin; current != end; ++current) {
    227         uint8 new_digit = 0;
    228 
    229         if (!CharToDigit<traits::kBase>(*current, &new_digit)) {
    230           return false;
    231         }
    232 
    233         if (current != begin) {
    234           if (!Sign::CheckBounds(output, new_digit)) {
    235             return false;
    236           }
    237           *output *= traits::kBase;
    238         }
    239 
    240         Sign::Increment(new_digit, output);
    241       }
    242       return true;
    243     }
    244   };
    245 
    246   class Positive : public Base<Positive> {
    247    public:
    248     static bool CheckBounds(value_type* output, uint8 new_digit) {
    249       if (*output > static_cast<value_type>(traits::max() / traits::kBase) ||
    250           (*output == static_cast<value_type>(traits::max() / traits::kBase) &&
    251            new_digit > traits::max() % traits::kBase)) {
    252         *output = traits::max();
    253         return false;
    254       }
    255       return true;
    256     }
    257     static void Increment(uint8 increment, value_type* output) {
    258       *output += increment;
    259     }
    260   };
    261 
    262   class Negative : public Base<Negative> {
    263    public:
    264     static bool CheckBounds(value_type* output, uint8 new_digit) {
    265       if (*output < traits::min() / traits::kBase ||
    266           (*output == traits::min() / traits::kBase &&
    267            new_digit > 0 - traits::min() % traits::kBase)) {
    268         *output = traits::min();
    269         return false;
    270       }
    271       return true;
    272     }
    273     static void Increment(uint8 increment, value_type* output) {
    274       *output -= increment;
    275     }
    276   };
    277 };
    278 
    279 template<typename ITERATOR, typename VALUE, int BASE>
    280 class BaseIteratorRangeToNumberTraits {
    281  public:
    282   typedef ITERATOR iterator_type;
    283   typedef VALUE value_type;
    284   static value_type min() {
    285     return std::numeric_limits<value_type>::min();
    286   }
    287   static value_type max() {
    288     return std::numeric_limits<value_type>::max();
    289   }
    290   static const int kBase = BASE;
    291 };
    292 
    293 typedef BaseIteratorRangeToNumberTraits<std::string::const_iterator, int, 10>
    294     IteratorRangeToIntTraits;
    295 typedef BaseIteratorRangeToNumberTraits<string16::const_iterator, int, 10>
    296     WideIteratorRangeToIntTraits;
    297 typedef BaseIteratorRangeToNumberTraits<std::string::const_iterator, int64, 10>
    298     IteratorRangeToInt64Traits;
    299 typedef BaseIteratorRangeToNumberTraits<string16::const_iterator, int64, 10>
    300     WideIteratorRangeToInt64Traits;
    301 
    302 typedef BaseIteratorRangeToNumberTraits<const char*, int, 10>
    303     CharBufferToIntTraits;
    304 typedef BaseIteratorRangeToNumberTraits<const char16*, int, 10>
    305     WideCharBufferToIntTraits;
    306 typedef BaseIteratorRangeToNumberTraits<const char*, int64, 10>
    307     CharBufferToInt64Traits;
    308 typedef BaseIteratorRangeToNumberTraits<const char16*, int64, 10>
    309     WideCharBufferToInt64Traits;
    310 
    311 template<typename ITERATOR>
    312 class BaseHexIteratorRangeToIntTraits
    313     : public BaseIteratorRangeToNumberTraits<ITERATOR, int, 16> {
    314  public:
    315   // Allow parsing of 0xFFFFFFFF, which is technically an overflow
    316   static unsigned int max() {
    317     return std::numeric_limits<unsigned int>::max();
    318   }
    319 };
    320 
    321 typedef BaseHexIteratorRangeToIntTraits<std::string::const_iterator>
    322     HexIteratorRangeToIntTraits;
    323 typedef BaseHexIteratorRangeToIntTraits<const char*>
    324     HexCharBufferToIntTraits;
    325 
    326 template<typename STR>
    327 bool HexStringToBytesT(const STR& input, std::vector<uint8>* output) {
    328   DCHECK_EQ(output->size(), 0u);
    329   size_t count = input.size();
    330   if (count == 0 || (count % 2) != 0)
    331     return false;
    332   for (uintptr_t i = 0; i < count / 2; ++i) {
    333     uint8 msb = 0;  // most significant 4 bits
    334     uint8 lsb = 0;  // least significant 4 bits
    335     if (!CharToDigit<16>(input[i * 2], &msb) ||
    336         !CharToDigit<16>(input[i * 2 + 1], &lsb))
    337       return false;
    338     output->push_back((msb << 4) | lsb);
    339   }
    340   return true;
    341 }
    342 
    343 }  // namespace
    344 
    345 std::string IntToString(int value) {
    346   return IntToStringT<std::string, int, unsigned int, true>::
    347       IntToString(value);
    348 }
    349 
    350 string16 IntToString16(int value) {
    351   return IntToStringT<string16, int, unsigned int, true>::
    352       IntToString(value);
    353 }
    354 
    355 std::string UintToString(unsigned int value) {
    356   return IntToStringT<std::string, unsigned int, unsigned int, false>::
    357       IntToString(value);
    358 }
    359 
    360 string16 UintToString16(unsigned int value) {
    361   return IntToStringT<string16, unsigned int, unsigned int, false>::
    362       IntToString(value);
    363 }
    364 
    365 std::string Int64ToString(int64 value) {
    366   return IntToStringT<std::string, int64, uint64, true>::
    367       IntToString(value);
    368 }
    369 
    370 string16 Int64ToString16(int64 value) {
    371   return IntToStringT<string16, int64, uint64, true>::IntToString(value);
    372 }
    373 
    374 std::string Uint64ToString(uint64 value) {
    375   return IntToStringT<std::string, uint64, uint64, false>::
    376       IntToString(value);
    377 }
    378 
    379 string16 Uint64ToString16(uint64 value) {
    380   return IntToStringT<string16, uint64, uint64, false>::
    381       IntToString(value);
    382 }
    383 
    384 std::string DoubleToString(double value) {
    385   // According to g_fmt.cc, it is sufficient to declare a buffer of size 32.
    386   char buffer[32];
    387   dmg_fp::g_fmt(buffer, value);
    388   return std::string(buffer);
    389 }
    390 
    391 bool StringToInt(const std::string& input, int* output) {
    392   return IteratorRangeToNumber<IteratorRangeToIntTraits>::Invoke(input.begin(),
    393                                                                  input.end(),
    394                                                                  output);
    395 }
    396 
    397 #if !defined(ANDROID)
    398 bool StringToInt(std::string::const_iterator begin,
    399                  std::string::const_iterator end,
    400                  int* output) {
    401   return IteratorRangeToNumber<IteratorRangeToIntTraits>::Invoke(begin,
    402                                                                  end,
    403                                                                  output);
    404 }
    405 #endif
    406 
    407 bool StringToInt(const char* begin, const char* end, int* output) {
    408   return IteratorRangeToNumber<CharBufferToIntTraits>::Invoke(begin,
    409                                                               end,
    410                                                               output);
    411 }
    412 
    413 bool StringToInt(const string16& input, int* output) {
    414   return IteratorRangeToNumber<WideIteratorRangeToIntTraits>::Invoke(
    415     input.begin(), input.end(), output);
    416 }
    417 
    418 #if !defined(ANDROID)
    419 bool StringToInt(string16::const_iterator begin,
    420                  string16::const_iterator end,
    421                  int* output) {
    422   return IteratorRangeToNumber<WideIteratorRangeToIntTraits>::Invoke(begin,
    423                                                                      end,
    424                                                                      output);
    425 }
    426 #endif
    427 
    428 bool StringToInt(const char16* begin, const char16* end, int* output) {
    429   return IteratorRangeToNumber<WideCharBufferToIntTraits>::Invoke(begin,
    430                                                                   end,
    431                                                                   output);
    432 }
    433 
    434 bool StringToInt64(const std::string& input, int64* output) {
    435   return IteratorRangeToNumber<IteratorRangeToInt64Traits>::Invoke(
    436     input.begin(), input.end(), output);
    437 }
    438 
    439 #if !defined(ANDROID)
    440 bool StringToInt64(std::string::const_iterator begin,
    441                  std::string::const_iterator end,
    442                  int64* output) {
    443   return IteratorRangeToNumber<IteratorRangeToInt64Traits>::Invoke(begin,
    444                                                                  end,
    445                                                                  output);
    446 }
    447 #endif
    448 
    449 bool StringToInt64(const char* begin, const char* end, int64* output) {
    450   return IteratorRangeToNumber<CharBufferToInt64Traits>::Invoke(begin,
    451                                                               end,
    452                                                               output);
    453 }
    454 
    455 bool StringToInt64(const string16& input, int64* output) {
    456   return IteratorRangeToNumber<WideIteratorRangeToInt64Traits>::Invoke(
    457     input.begin(), input.end(), output);
    458 }
    459 
    460 #if !defined(ANDROID)
    461 bool StringToInt64(string16::const_iterator begin,
    462                  string16::const_iterator end,
    463                  int64* output) {
    464   return IteratorRangeToNumber<WideIteratorRangeToInt64Traits>::Invoke(begin,
    465                                                                      end,
    466                                                                      output);
    467 }
    468 #endif
    469 
    470 bool StringToInt64(const char16* begin, const char16* end, int64* output) {
    471   return IteratorRangeToNumber<WideCharBufferToInt64Traits>::Invoke(begin,
    472                                                                   end,
    473                                                                   output);
    474 }
    475 
    476 bool StringToDouble(const std::string& input, double* output) {
    477   errno = 0;  // Thread-safe?  It is on at least Mac, Linux, and Windows.
    478   char* endptr = NULL;
    479   *output = dmg_fp::strtod(input.c_str(), &endptr);
    480 
    481   // Cases to return false:
    482   //  - If errno is ERANGE, there was an overflow or underflow.
    483   //  - If the input string is empty, there was nothing to parse.
    484   //  - If endptr does not point to the end of the string, there are either
    485   //    characters remaining in the string after a parsed number, or the string
    486   //    does not begin with a parseable number.  endptr is compared to the
    487   //    expected end given the string's stated length to correctly catch cases
    488   //    where the string contains embedded NUL characters.
    489   //  - If the first character is a space, there was leading whitespace
    490   return errno == 0 &&
    491          !input.empty() &&
    492          input.c_str() + input.length() == endptr &&
    493          !isspace(input[0]);
    494 }
    495 
    496 // Note: if you need to add String16ToDouble, first ask yourself if it's
    497 // really necessary. If it is, probably the best implementation here is to
    498 // convert to 8-bit and then use the 8-bit version.
    499 
    500 // Note: if you need to add an iterator range version of StringToDouble, first
    501 // ask yourself if it's really necessary. If it is, probably the best
    502 // implementation here is to instantiate a string and use the string version.
    503 
    504 std::string HexEncode(const void* bytes, size_t size) {
    505   static const char kHexChars[] = "0123456789ABCDEF";
    506 
    507   // Each input byte creates two output hex characters.
    508   std::string ret(size * 2, '\0');
    509 
    510   for (size_t i = 0; i < size; ++i) {
    511     char b = reinterpret_cast<const char*>(bytes)[i];
    512     ret[(i * 2)] = kHexChars[(b >> 4) & 0xf];
    513     ret[(i * 2) + 1] = kHexChars[b & 0xf];
    514   }
    515   return ret;
    516 }
    517 
    518 bool HexStringToInt(const std::string& input, int* output) {
    519   return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(
    520     input.begin(), input.end(), output);
    521 }
    522 
    523 #if !defined(ANDROID)
    524 bool HexStringToInt(std::string::const_iterator begin,
    525                     std::string::const_iterator end,
    526                     int* output) {
    527   return IteratorRangeToNumber<HexIteratorRangeToIntTraits>::Invoke(begin,
    528                                                                     end,
    529                                                                     output);
    530 }
    531 #endif
    532 
    533 bool HexStringToInt(const char* begin, const char* end, int* output) {
    534   return IteratorRangeToNumber<HexCharBufferToIntTraits>::Invoke(begin,
    535                                                                     end,
    536                                                                     output);
    537 }
    538 
    539 bool HexStringToBytes(const std::string& input, std::vector<uint8>* output) {
    540   return HexStringToBytesT(input, output);
    541 }
    542 
    543 }  // namespace base
    544 
    545