Home | History | Annotate | Download | only in strings
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 Licensed under the Apache License, Version 2.0 (the "License");
      3 you may not use this file except in compliance with the License.
      4 You may obtain a copy of the License at
      5 
      6     http://www.apache.org/licenses/LICENSE-2.0
      7 
      8 Unless required by applicable law or agreed to in writing, software
      9 distributed under the License is distributed on an "AS IS" BASIS,
     10 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     11 See the License for the specific language governing permissions and
     12 limitations under the License.
     13 ==============================================================================*/
     14 
     15 #include "tensorflow/core/lib/strings/numbers.h"
     16 
     17 #include <ctype.h>
     18 #include <float.h>
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <algorithm>
     22 #include <cinttypes>
     23 #include <cmath>
     24 #include <locale>
     25 #include <unordered_map>
     26 
     27 #include "double-conversion/double-conversion.h"
     28 
     29 #include "tensorflow/core/lib/strings/str_util.h"
     30 #include "tensorflow/core/lib/strings/stringprintf.h"
     31 #include "tensorflow/core/platform/logging.h"
     32 #include "tensorflow/core/platform/macros.h"
     33 #include "tensorflow/core/platform/types.h"
     34 
     35 namespace tensorflow {
     36 
     37 namespace {
     38 
     39 template <typename T>
     40 const std::unordered_map<string, T>* GetSpecialNumsSingleton() {
     41   static const std::unordered_map<string, T>* special_nums =
     42       CHECK_NOTNULL((new const std::unordered_map<string, T>{
     43           {"inf", std::numeric_limits<T>::infinity()},
     44           {"+inf", std::numeric_limits<T>::infinity()},
     45           {"-inf", -std::numeric_limits<T>::infinity()},
     46           {"infinity", std::numeric_limits<T>::infinity()},
     47           {"+infinity", std::numeric_limits<T>::infinity()},
     48           {"-infinity", -std::numeric_limits<T>::infinity()},
     49           {"nan", std::numeric_limits<T>::quiet_NaN()},
     50           {"+nan", std::numeric_limits<T>::quiet_NaN()},
     51           {"-nan", -std::numeric_limits<T>::quiet_NaN()},
     52       }));
     53   return special_nums;
     54 }
     55 
     56 template <typename T>
     57 T locale_independent_strtonum(const char* str, const char** endptr) {
     58   auto special_nums = GetSpecialNumsSingleton<T>();
     59   std::stringstream s(str);
     60 
     61   // Check if str is one of the special numbers.
     62   string special_num_str;
     63   s >> special_num_str;
     64 
     65   for (int i = 0; i < special_num_str.length(); ++i) {
     66     special_num_str[i] =
     67         std::tolower(special_num_str[i], std::locale::classic());
     68   }
     69 
     70   auto entry = special_nums->find(special_num_str);
     71   if (entry != special_nums->end()) {
     72     *endptr = str + (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
     73                              : s.tellg());
     74     return entry->second;
     75   } else {
     76     // Perhaps it's a hex number
     77     if (special_num_str.compare(0, 2, "0x") == 0 ||
     78         special_num_str.compare(0, 3, "-0x") == 0) {
     79       return strtol(str, const_cast<char**>(endptr), 16);
     80     }
     81   }
     82   // Reset the stream
     83   s.str(str);
     84   s.clear();
     85   // Use the "C" locale
     86   s.imbue(std::locale::classic());
     87 
     88   T result;
     89   s >> result;
     90 
     91   // Set to result to what strto{f,d} functions would have returned. If the
     92   // number was outside the range, the stringstream sets the fail flag, but
     93   // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
     94   if (s.fail()) {
     95     if (result == std::numeric_limits<T>::max() ||
     96         result == std::numeric_limits<T>::infinity()) {
     97       result = std::numeric_limits<T>::infinity();
     98       s.clear(s.rdstate() & ~std::ios::failbit);
     99     } else if (result == -std::numeric_limits<T>::max() ||
    100                result == -std::numeric_limits<T>::infinity()) {
    101       result = -std::numeric_limits<T>::infinity();
    102       s.clear(s.rdstate() & ~std::ios::failbit);
    103     }
    104   }
    105 
    106   if (endptr) {
    107     *endptr =
    108         str +
    109         (s.fail() ? static_cast<std::iostream::pos_type>(0)
    110                   : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
    111                              : s.tellg()));
    112   }
    113   return result;
    114 }
    115 
    116 static inline const double_conversion::StringToDoubleConverter&
    117 StringToFloatConverter() {
    118   static const double_conversion::StringToDoubleConverter converter(
    119       double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES |
    120           double_conversion::StringToDoubleConverter::ALLOW_HEX |
    121           double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES |
    122           double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY,
    123       0., 0., "inf", "nan");
    124   return converter;
    125 }
    126 
    127 }  // namespace
    128 
    129 namespace strings {
    130 
    131 size_t FastInt32ToBufferLeft(int32 i, char* buffer) {
    132   uint32 u = i;
    133   size_t length = 0;
    134   if (i < 0) {
    135     *buffer++ = '-';
    136     ++length;
    137     // We need to do the negation in modular (i.e., "unsigned")
    138     // arithmetic; MSVC++ apparently warns for plain "-u", so
    139     // we write the equivalent expression "0 - u" instead.
    140     u = 0 - u;
    141   }
    142   length += FastUInt32ToBufferLeft(u, buffer);
    143   return length;
    144 }
    145 
    146 size_t FastUInt32ToBufferLeft(uint32 i, char* buffer) {
    147   char* start = buffer;
    148   do {
    149     *buffer++ = ((i % 10) + '0');
    150     i /= 10;
    151   } while (i > 0);
    152   *buffer = 0;
    153   std::reverse(start, buffer);
    154   return buffer - start;
    155 }
    156 
    157 size_t FastInt64ToBufferLeft(int64 i, char* buffer) {
    158   uint64 u = i;
    159   size_t length = 0;
    160   if (i < 0) {
    161     *buffer++ = '-';
    162     ++length;
    163     u = 0 - u;
    164   }
    165   length += FastUInt64ToBufferLeft(u, buffer);
    166   return length;
    167 }
    168 
    169 size_t FastUInt64ToBufferLeft(uint64 i, char* buffer) {
    170   char* start = buffer;
    171   do {
    172     *buffer++ = ((i % 10) + '0');
    173     i /= 10;
    174   } while (i > 0);
    175   *buffer = 0;
    176   std::reverse(start, buffer);
    177   return buffer - start;
    178 }
    179 
    180 static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001;
    181 
    182 size_t DoubleToBuffer(double value, char* buffer) {
    183   // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
    184   // platforms these days.  Just in case some system exists where DBL_DIG
    185   // is significantly larger -- and risks overflowing our buffer -- we have
    186   // this assert.
    187   static_assert(DBL_DIG < 20, "DBL_DIG is too big");
    188 
    189   if (std::abs(value) <= kDoublePrecisionCheckMax) {
    190     int snprintf_result =
    191         snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG, value);
    192 
    193     // The snprintf should never overflow because the buffer is significantly
    194     // larger than the precision we asked for.
    195     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
    196 
    197     if (locale_independent_strtonum<double>(buffer, nullptr) == value) {
    198       // Round-tripping the string to double works; we're done.
    199       return snprintf_result;
    200     }
    201     // else: full precision formatting needed. Fall through.
    202   }
    203 
    204   int snprintf_result =
    205       snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG + 2, value);
    206 
    207   // Should never overflow; see above.
    208   DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
    209 
    210   return snprintf_result;
    211 }
    212 
    213 namespace {
    214 char SafeFirstChar(StringPiece str) {
    215   if (str.empty()) return '\0';
    216   return str[0];
    217 }
    218 void SkipSpaces(StringPiece* str) {
    219   while (isspace(SafeFirstChar(*str))) str->remove_prefix(1);
    220 }
    221 }  // namespace
    222 
    223 bool safe_strto64(StringPiece str, int64* value) {
    224   SkipSpaces(&str);
    225 
    226   int64 vlimit = kint64max;
    227   int sign = 1;
    228   if (str_util::ConsumePrefix(&str, "-")) {
    229     sign = -1;
    230     // Different limit for positive and negative integers.
    231     vlimit = kint64min;
    232   }
    233 
    234   if (!isdigit(SafeFirstChar(str))) return false;
    235 
    236   int64 result = 0;
    237   if (sign == 1) {
    238     do {
    239       int digit = SafeFirstChar(str) - '0';
    240       if ((vlimit - digit) / 10 < result) {
    241         return false;
    242       }
    243       result = result * 10 + digit;
    244       str.remove_prefix(1);
    245     } while (isdigit(SafeFirstChar(str)));
    246   } else {
    247     do {
    248       int digit = SafeFirstChar(str) - '0';
    249       if ((vlimit + digit) / 10 > result) {
    250         return false;
    251       }
    252       result = result * 10 - digit;
    253       str.remove_prefix(1);
    254     } while (isdigit(SafeFirstChar(str)));
    255   }
    256 
    257   SkipSpaces(&str);
    258   if (!str.empty()) return false;
    259 
    260   *value = result;
    261   return true;
    262 }
    263 
    264 bool safe_strtou64(StringPiece str, uint64* value) {
    265   SkipSpaces(&str);
    266   if (!isdigit(SafeFirstChar(str))) return false;
    267 
    268   uint64 result = 0;
    269   do {
    270     int digit = SafeFirstChar(str) - '0';
    271     if ((kuint64max - digit) / 10 < result) {
    272       return false;
    273     }
    274     result = result * 10 + digit;
    275     str.remove_prefix(1);
    276   } while (isdigit(SafeFirstChar(str)));
    277 
    278   SkipSpaces(&str);
    279   if (!str.empty()) return false;
    280 
    281   *value = result;
    282   return true;
    283 }
    284 
    285 bool safe_strto32(StringPiece str, int32* value) {
    286   SkipSpaces(&str);
    287 
    288   int64 vmax = kint32max;
    289   int sign = 1;
    290   if (str_util::ConsumePrefix(&str, "-")) {
    291     sign = -1;
    292     // Different max for positive and negative integers.
    293     ++vmax;
    294   }
    295 
    296   if (!isdigit(SafeFirstChar(str))) return false;
    297 
    298   int64 result = 0;
    299   do {
    300     result = result * 10 + SafeFirstChar(str) - '0';
    301     if (result > vmax) {
    302       return false;
    303     }
    304     str.remove_prefix(1);
    305   } while (isdigit(SafeFirstChar(str)));
    306 
    307   SkipSpaces(&str);
    308 
    309   if (!str.empty()) return false;
    310 
    311   *value = static_cast<int32>(result * sign);
    312   return true;
    313 }
    314 
    315 bool safe_strtou32(StringPiece str, uint32* value) {
    316   SkipSpaces(&str);
    317   if (!isdigit(SafeFirstChar(str))) return false;
    318 
    319   int64 result = 0;
    320   do {
    321     result = result * 10 + SafeFirstChar(str) - '0';
    322     if (result > kuint32max) {
    323       return false;
    324     }
    325     str.remove_prefix(1);
    326   } while (isdigit(SafeFirstChar(str)));
    327 
    328   SkipSpaces(&str);
    329   if (!str.empty()) return false;
    330 
    331   *value = static_cast<uint32>(result);
    332   return true;
    333 }
    334 
    335 bool safe_strtof(StringPiece str, float* value) {
    336   int processed_characters_count = -1;
    337   auto len = str.size();
    338 
    339   // If string length exceeds buffer size or int max, fail.
    340   if (len >= kFastToBufferSize) return false;
    341   if (len > std::numeric_limits<int>::max()) return false;
    342 
    343   *value = StringToFloatConverter().StringToFloat(
    344       str.data(), static_cast<int>(len), &processed_characters_count);
    345   return processed_characters_count > 0;
    346 }
    347 
    348 bool safe_strtod(StringPiece str, double* value) {
    349   int processed_characters_count = -1;
    350   auto len = str.size();
    351 
    352   // If string length exceeds buffer size or int max, fail.
    353   if (len >= kFastToBufferSize) return false;
    354   if (len > std::numeric_limits<int>::max()) return false;
    355 
    356   *value = StringToFloatConverter().StringToDouble(
    357       str.data(), static_cast<int>(len), &processed_characters_count);
    358   return processed_characters_count > 0;
    359 }
    360 
    361 size_t FloatToBuffer(float value, char* buffer) {
    362   // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
    363   // platforms these days.  Just in case some system exists where FLT_DIG
    364   // is significantly larger -- and risks overflowing our buffer -- we have
    365   // this assert.
    366   static_assert(FLT_DIG < 10, "FLT_DIG is too big");
    367 
    368   int snprintf_result =
    369       snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG, value);
    370 
    371   // The snprintf should never overflow because the buffer is significantly
    372   // larger than the precision we asked for.
    373   DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
    374 
    375   float parsed_value;
    376   if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
    377     snprintf_result =
    378         snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG + 3, value);
    379 
    380     // Should never overflow; see above.
    381     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
    382   }
    383   return snprintf_result;
    384 }
    385 
    386 string FpToString(Fprint fp) {
    387   char buf[17];
    388   snprintf(buf, sizeof(buf), "%016llx", static_cast<uint64>(fp));
    389   return string(buf);
    390 }
    391 
    392 bool StringToFp(const string& s, Fprint* fp) {
    393   char junk;
    394   uint64_t result;
    395   if (sscanf(s.c_str(), "%" SCNx64 "%c", &result, &junk) == 1) {
    396     *fp = result;
    397     return true;
    398   } else {
    399     return false;
    400   }
    401 }
    402 
    403 StringPiece Uint64ToHexString(uint64 v, char* buf) {
    404   static const char* hexdigits = "0123456789abcdef";
    405   const int num_byte = 16;
    406   buf[num_byte] = '\0';
    407   for (int i = num_byte - 1; i >= 0; i--) {
    408     buf[i] = hexdigits[v & 0xf];
    409     v >>= 4;
    410   }
    411   return StringPiece(buf, num_byte);
    412 }
    413 
    414 bool HexStringToUint64(const StringPiece& s, uint64* result) {
    415   uint64 v = 0;
    416   if (s.empty()) {
    417     return false;
    418   }
    419   for (size_t i = 0; i < s.size(); i++) {
    420     char c = s[i];
    421     if (c >= '0' && c <= '9') {
    422       v = (v << 4) + (c - '0');
    423     } else if (c >= 'a' && c <= 'f') {
    424       v = (v << 4) + 10 + (c - 'a');
    425     } else if (c >= 'A' && c <= 'F') {
    426       v = (v << 4) + 10 + (c - 'A');
    427     } else {
    428       return false;
    429     }
    430   }
    431   *result = v;
    432   return true;
    433 }
    434 
    435 string HumanReadableNum(int64 value) {
    436   string s;
    437   if (value < 0) {
    438     s += "-";
    439     value = -value;
    440   }
    441   if (value < 1000) {
    442     Appendf(&s, "%lld", value);
    443   } else if (value >= static_cast<int64>(1e15)) {
    444     // Number bigger than 1E15; use that notation.
    445     Appendf(&s, "%0.3G", static_cast<double>(value));
    446   } else {
    447     static const char units[] = "kMBT";
    448     const char* unit = units;
    449     while (value >= static_cast<int64>(1000000)) {
    450       value /= static_cast<int64>(1000);
    451       ++unit;
    452       CHECK(unit < units + TF_ARRAYSIZE(units));
    453     }
    454     Appendf(&s, "%.2f%c", value / 1000.0, *unit);
    455   }
    456   return s;
    457 }
    458 
    459 string HumanReadableNumBytes(int64 num_bytes) {
    460   if (num_bytes == kint64min) {
    461     // Special case for number with not representable negation.
    462     return "-8E";
    463   }
    464 
    465   const char* neg_str = (num_bytes < 0) ? "-" : "";
    466   if (num_bytes < 0) {
    467     num_bytes = -num_bytes;
    468   }
    469 
    470   // Special case for bytes.
    471   if (num_bytes < 1024) {
    472     // No fractions for bytes.
    473     char buf[8];  // Longest possible string is '-XXXXB'
    474     snprintf(buf, sizeof(buf), "%s%lldB", neg_str,
    475              static_cast<int64>(num_bytes));
    476     return string(buf);
    477   }
    478 
    479   static const char units[] = "KMGTPE";  // int64 only goes up to E.
    480   const char* unit = units;
    481   while (num_bytes >= static_cast<int64>(1024) * 1024) {
    482     num_bytes /= 1024;
    483     ++unit;
    484     CHECK(unit < units + TF_ARRAYSIZE(units));
    485   }
    486 
    487   // We use SI prefixes.
    488   char buf[16];
    489   snprintf(buf, sizeof(buf), ((*unit == 'K') ? "%s%.1f%ciB" : "%s%.2f%ciB"),
    490            neg_str, num_bytes / 1024.0, *unit);
    491   return string(buf);
    492 }
    493 
    494 string HumanReadableElapsedTime(double seconds) {
    495   string human_readable;
    496 
    497   if (seconds < 0) {
    498     human_readable = "-";
    499     seconds = -seconds;
    500   }
    501 
    502   // Start with us and keep going up to years.
    503   // The comparisons must account for rounding to prevent the format breaking
    504   // the tested condition and returning, e.g., "1e+03 us" instead of "1 ms".
    505   const double microseconds = seconds * 1.0e6;
    506   if (microseconds < 999.5) {
    507     strings::Appendf(&human_readable, "%0.3g us", microseconds);
    508     return human_readable;
    509   }
    510   double milliseconds = seconds * 1e3;
    511   if (milliseconds >= .995 && milliseconds < 1) {
    512     // Round half to even in Appendf would convert this to 0.999 ms.
    513     milliseconds = 1.0;
    514   }
    515   if (milliseconds < 999.5) {
    516     strings::Appendf(&human_readable, "%0.3g ms", milliseconds);
    517     return human_readable;
    518   }
    519   if (seconds < 60.0) {
    520     strings::Appendf(&human_readable, "%0.3g s", seconds);
    521     return human_readable;
    522   }
    523   seconds /= 60.0;
    524   if (seconds < 60.0) {
    525     strings::Appendf(&human_readable, "%0.3g min", seconds);
    526     return human_readable;
    527   }
    528   seconds /= 60.0;
    529   if (seconds < 24.0) {
    530     strings::Appendf(&human_readable, "%0.3g h", seconds);
    531     return human_readable;
    532   }
    533   seconds /= 24.0;
    534   if (seconds < 30.0) {
    535     strings::Appendf(&human_readable, "%0.3g days", seconds);
    536     return human_readable;
    537   }
    538   if (seconds < 365.2425) {
    539     strings::Appendf(&human_readable, "%0.3g months", seconds / 30.436875);
    540     return human_readable;
    541   }
    542   seconds /= 365.2425;
    543   strings::Appendf(&human_readable, "%0.3g years", seconds);
    544   return human_readable;
    545 }
    546 
    547 }  // namespace strings
    548 }  // namespace tensorflow
    549