Home | History | Annotate | Download | only in strings
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 Licensed under the Apache License, Version 2.0 (the "License");
      3 you may not use this file except in compliance with the License.
      4 You may obtain a copy of the License at
      5 
      6     http://www.apache.org/licenses/LICENSE-2.0
      7 
      8 Unless required by applicable law or agreed to in writing, software
      9 distributed under the License is distributed on an "AS IS" BASIS,
     10 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     11 See the License for the specific language governing permissions and
     12 limitations under the License.
     13 ==============================================================================*/
     14 
     15 #include "tensorflow/core/lib/strings/numbers.h"
     16 
     17 #include <ctype.h>
     18 #include <float.h>
     19 #include <stdio.h>
     20 #include <stdlib.h>
     21 #include <algorithm>
     22 #include <cmath>
     23 #include <locale>
     24 #include <unordered_map>
     25 
     26 #include "tensorflow/core/lib/strings/stringprintf.h"
     27 #include "tensorflow/core/platform/logging.h"
     28 #include "tensorflow/core/platform/macros.h"
     29 #include "tensorflow/core/platform/types.h"
     30 
     31 namespace tensorflow {
     32 
     33 namespace {
     34 
     35 template <typename T>
     36 T locale_independent_strtonum(const char* str, const char** endptr) {
     37   static const std::unordered_map<string, T> special_nums = {
     38       {"inf", std::numeric_limits<T>::infinity()},
     39       {"+inf", std::numeric_limits<T>::infinity()},
     40       {"-inf", -std::numeric_limits<T>::infinity()},
     41       {"infinity", std::numeric_limits<T>::infinity()},
     42       {"+infinity", std::numeric_limits<T>::infinity()},
     43       {"-infinity", -std::numeric_limits<T>::infinity()},
     44       {"nan", std::numeric_limits<T>::quiet_NaN()},
     45       {"+nan", std::numeric_limits<T>::quiet_NaN()},
     46       {"-nan", -std::numeric_limits<T>::quiet_NaN()},
     47   };
     48   std::stringstream s(str);
     49 
     50   // Check if str is one of the special numbers.
     51   string special_num_str;
     52   s >> special_num_str;
     53 
     54   for (int i = 0; i < special_num_str.length(); ++i) {
     55     special_num_str[i] =
     56         std::tolower(special_num_str[i], std::locale::classic());
     57   }
     58 
     59   auto entry = special_nums.find(special_num_str);
     60   if (entry != special_nums.end()) {
     61     *endptr = str + (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
     62                              : s.tellg());
     63     return entry->second;
     64   } else {
     65     // Perhaps it's a hex number
     66     if (special_num_str.compare(0, 2, "0x") == 0 ||
     67         special_num_str.compare(0, 3, "-0x") == 0) {
     68       return strtol(str, const_cast<char**>(endptr), 16);
     69     }
     70   }
     71   // Reset the stream
     72   s.str(str);
     73   s.clear();
     74   // Use the "C" locale
     75   s.imbue(std::locale::classic());
     76 
     77   T result;
     78   s >> result;
     79 
     80   // Set to result to what strto{f,d} functions would have returned. If the
     81   // number was outside the range, the stringstream sets the fail flag, but
     82   // returns the +/-max() value, whereas strto{f,d} functions return +/-INF.
     83   if (s.fail()) {
     84     if (result == std::numeric_limits<T>::max() ||
     85         result == std::numeric_limits<T>::infinity()) {
     86       result = std::numeric_limits<T>::infinity();
     87       s.clear(s.rdstate() & ~std::ios::failbit);
     88     } else if (result == -std::numeric_limits<T>::max() ||
     89                result == -std::numeric_limits<T>::infinity()) {
     90       result = -std::numeric_limits<T>::infinity();
     91       s.clear(s.rdstate() & ~std::ios::failbit);
     92     }
     93   }
     94 
     95   if (endptr) {
     96     *endptr =
     97         str +
     98         (s.fail() ? static_cast<std::iostream::pos_type>(0)
     99                   : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str))
    100                              : s.tellg()));
    101   }
    102   return result;
    103 }
    104 
    105 }  // namespace
    106 
    107 namespace strings {
    108 
    109 char* FastInt32ToBufferLeft(int32 i, char* buffer) {
    110   uint32 u = i;
    111   if (i < 0) {
    112     *buffer++ = '-';
    113     // We need to do the negation in modular (i.e., "unsigned")
    114     // arithmetic; MSVC++ apparently warns for plain "-u", so
    115     // we write the equivalent expression "0 - u" instead.
    116     u = 0 - u;
    117   }
    118   return FastUInt32ToBufferLeft(u, buffer);
    119 }
    120 
    121 char* FastUInt32ToBufferLeft(uint32 i, char* buffer) {
    122   char* start = buffer;
    123   do {
    124     *buffer++ = ((i % 10) + '0');
    125     i /= 10;
    126   } while (i > 0);
    127   *buffer = 0;
    128   std::reverse(start, buffer);
    129   return buffer;
    130 }
    131 
    132 char* FastInt64ToBufferLeft(int64 i, char* buffer) {
    133   uint64 u = i;
    134   if (i < 0) {
    135     *buffer++ = '-';
    136     u = 0 - u;
    137   }
    138   return FastUInt64ToBufferLeft(u, buffer);
    139 }
    140 
    141 char* FastUInt64ToBufferLeft(uint64 i, char* buffer) {
    142   char* start = buffer;
    143   do {
    144     *buffer++ = ((i % 10) + '0');
    145     i /= 10;
    146   } while (i > 0);
    147   *buffer = 0;
    148   std::reverse(start, buffer);
    149   return buffer;
    150 }
    151 
    152 static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001;
    153 
    154 char* DoubleToBuffer(double value, char* buffer) {
    155   // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all
    156   // platforms these days.  Just in case some system exists where DBL_DIG
    157   // is significantly larger -- and risks overflowing our buffer -- we have
    158   // this assert.
    159   static_assert(DBL_DIG < 20, "DBL_DIG is too big");
    160 
    161   bool full_precision_needed = true;
    162   if (std::abs(value) <= kDoublePrecisionCheckMax) {
    163     int snprintf_result =
    164         snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG, value);
    165 
    166     // The snprintf should never overflow because the buffer is significantly
    167     // larger than the precision we asked for.
    168     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
    169 
    170     full_precision_needed =
    171         locale_independent_strtonum<double>(buffer, nullptr) != value;
    172   }
    173 
    174   if (full_precision_needed) {
    175     int snprintf_result =
    176         snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG + 2, value);
    177 
    178     // Should never overflow; see above.
    179     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
    180   }
    181   return buffer;
    182 }
    183 
    184 namespace {
    185 char SafeFirstChar(StringPiece str) {
    186   if (str.empty()) return '\0';
    187   return str[0];
    188 }
    189 void SkipSpaces(StringPiece* str) {
    190   while (isspace(SafeFirstChar(*str))) str->remove_prefix(1);
    191 }
    192 }  // namespace
    193 
    194 bool safe_strto64(StringPiece str, int64* value) {
    195   SkipSpaces(&str);
    196 
    197   int64 vlimit = kint64max;
    198   int sign = 1;
    199   if (str.Consume("-")) {
    200     sign = -1;
    201     // Different limit for positive and negative integers.
    202     vlimit = kint64min;
    203   }
    204 
    205   if (!isdigit(SafeFirstChar(str))) return false;
    206 
    207   int64 result = 0;
    208   if (sign == 1) {
    209     do {
    210       int digit = SafeFirstChar(str) - '0';
    211       if ((vlimit - digit) / 10 < result) {
    212         return false;
    213       }
    214       result = result * 10 + digit;
    215       str.remove_prefix(1);
    216     } while (isdigit(SafeFirstChar(str)));
    217   } else {
    218     do {
    219       int digit = SafeFirstChar(str) - '0';
    220       if ((vlimit + digit) / 10 > result) {
    221         return false;
    222       }
    223       result = result * 10 - digit;
    224       str.remove_prefix(1);
    225     } while (isdigit(SafeFirstChar(str)));
    226   }
    227 
    228   SkipSpaces(&str);
    229   if (!str.empty()) return false;
    230 
    231   *value = result;
    232   return true;
    233 }
    234 
    235 bool safe_strtou64(StringPiece str, uint64* value) {
    236   SkipSpaces(&str);
    237   if (!isdigit(SafeFirstChar(str))) return false;
    238 
    239   uint64 result = 0;
    240   do {
    241     int digit = SafeFirstChar(str) - '0';
    242     if ((kuint64max - digit) / 10 < result) {
    243       return false;
    244     }
    245     result = result * 10 + digit;
    246     str.remove_prefix(1);
    247   } while (isdigit(SafeFirstChar(str)));
    248 
    249   SkipSpaces(&str);
    250   if (!str.empty()) return false;
    251 
    252   *value = result;
    253   return true;
    254 }
    255 
    256 bool safe_strto32(StringPiece str, int32* value) {
    257   SkipSpaces(&str);
    258 
    259   int64 vmax = kint32max;
    260   int sign = 1;
    261   if (str.Consume("-")) {
    262     sign = -1;
    263     // Different max for positive and negative integers.
    264     ++vmax;
    265   }
    266 
    267   if (!isdigit(SafeFirstChar(str))) return false;
    268 
    269   int64 result = 0;
    270   do {
    271     result = result * 10 + SafeFirstChar(str) - '0';
    272     if (result > vmax) {
    273       return false;
    274     }
    275     str.remove_prefix(1);
    276   } while (isdigit(SafeFirstChar(str)));
    277 
    278   SkipSpaces(&str);
    279 
    280   if (!str.empty()) return false;
    281 
    282   *value = static_cast<int32>(result * sign);
    283   return true;
    284 }
    285 
    286 bool safe_strtou32(StringPiece str, uint32* value) {
    287   SkipSpaces(&str);
    288   if (!isdigit(SafeFirstChar(str))) return false;
    289 
    290   int64 result = 0;
    291   do {
    292     result = result * 10 + SafeFirstChar(str) - '0';
    293     if (result > kuint32max) {
    294       return false;
    295     }
    296     str.remove_prefix(1);
    297   } while (isdigit(SafeFirstChar(str)));
    298 
    299   SkipSpaces(&str);
    300   if (!str.empty()) return false;
    301 
    302   *value = static_cast<uint32>(result);
    303   return true;
    304 }
    305 
    306 bool safe_strtof(const char* str, float* value) {
    307   const char* endptr;
    308   *value = locale_independent_strtonum<float>(str, &endptr);
    309   while (isspace(*endptr)) ++endptr;
    310   // Ignore range errors from strtod/strtof.
    311   // The values it returns on underflow and
    312   // overflow are the right fallback in a
    313   // robust setting.
    314   return *str != '\0' && *endptr == '\0';
    315 }
    316 
    317 bool safe_strtod(const char* str, double* value) {
    318   const char* endptr;
    319   *value = locale_independent_strtonum<double>(str, &endptr);
    320   while (isspace(*endptr)) ++endptr;
    321   // Ignore range errors from strtod/strtof.
    322   // The values it returns on underflow and
    323   // overflow are the right fallback in a
    324   // robust setting.
    325   return *str != '\0' && *endptr == '\0';
    326 }
    327 
    328 char* FloatToBuffer(float value, char* buffer) {
    329   // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all
    330   // platforms these days.  Just in case some system exists where FLT_DIG
    331   // is significantly larger -- and risks overflowing our buffer -- we have
    332   // this assert.
    333   static_assert(FLT_DIG < 10, "FLT_DIG is too big");
    334 
    335   int snprintf_result =
    336       snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG, value);
    337 
    338   // The snprintf should never overflow because the buffer is significantly
    339   // larger than the precision we asked for.
    340   DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
    341 
    342   float parsed_value;
    343   if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) {
    344     snprintf_result =
    345         snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG + 3, value);
    346 
    347     // Should never overflow; see above.
    348     DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize);
    349   }
    350   return buffer;
    351 }
    352 
    353 string FpToString(Fprint fp) {
    354   char buf[17];
    355   snprintf(buf, sizeof(buf), "%016llx", static_cast<uint64>(fp));
    356   return string(buf);
    357 }
    358 
    359 bool StringToFp(const string& s, Fprint* fp) {
    360   char junk;
    361   uint64 result;
    362   if (sscanf(s.c_str(), "%llx%c", &result, &junk) == 1) {
    363     *fp = result;
    364     return true;
    365   } else {
    366     return false;
    367   }
    368 }
    369 
    370 StringPiece Uint64ToHexString(uint64 v, char* buf) {
    371   static const char* hexdigits = "0123456789abcdef";
    372   const int num_byte = 16;
    373   buf[num_byte] = '\0';
    374   for (int i = num_byte - 1; i >= 0; i--) {
    375     buf[i] = hexdigits[v & 0xf];
    376     v >>= 4;
    377   }
    378   return StringPiece(buf, num_byte);
    379 }
    380 
    381 bool HexStringToUint64(const StringPiece& s, uint64* result) {
    382   uint64 v = 0;
    383   if (s.empty()) {
    384     return false;
    385   }
    386   for (size_t i = 0; i < s.size(); i++) {
    387     char c = s[i];
    388     if (c >= '0' && c <= '9') {
    389       v = (v << 4) + (c - '0');
    390     } else if (c >= 'a' && c <= 'f') {
    391       v = (v << 4) + 10 + (c - 'a');
    392     } else if (c >= 'A' && c <= 'F') {
    393       v = (v << 4) + 10 + (c - 'A');
    394     } else {
    395       return false;
    396     }
    397   }
    398   *result = v;
    399   return true;
    400 }
    401 
    402 string HumanReadableNum(int64 value) {
    403   string s;
    404   if (value < 0) {
    405     s += "-";
    406     value = -value;
    407   }
    408   if (value < 1000) {
    409     Appendf(&s, "%lld", value);
    410   } else if (value >= static_cast<int64>(1e15)) {
    411     // Number bigger than 1E15; use that notation.
    412     Appendf(&s, "%0.3G", static_cast<double>(value));
    413   } else {
    414     static const char units[] = "kMBT";
    415     const char* unit = units;
    416     while (value >= static_cast<int64>(1000000)) {
    417       value /= static_cast<int64>(1000);
    418       ++unit;
    419       CHECK(unit < units + TF_ARRAYSIZE(units));
    420     }
    421     Appendf(&s, "%.2f%c", value / 1000.0, *unit);
    422   }
    423   return s;
    424 }
    425 
    426 string HumanReadableNumBytes(int64 num_bytes) {
    427   if (num_bytes == kint64min) {
    428     // Special case for number with not representable negation.
    429     return "-8E";
    430   }
    431 
    432   const char* neg_str = (num_bytes < 0) ? "-" : "";
    433   if (num_bytes < 0) {
    434     num_bytes = -num_bytes;
    435   }
    436 
    437   // Special case for bytes.
    438   if (num_bytes < 1024) {
    439     // No fractions for bytes.
    440     char buf[8];  // Longest possible string is '-XXXXB'
    441     snprintf(buf, sizeof(buf), "%s%lldB", neg_str,
    442              static_cast<int64>(num_bytes));
    443     return string(buf);
    444   }
    445 
    446   static const char units[] = "KMGTPE";  // int64 only goes up to E.
    447   const char* unit = units;
    448   while (num_bytes >= static_cast<int64>(1024) * 1024) {
    449     num_bytes /= 1024;
    450     ++unit;
    451     CHECK(unit < units + TF_ARRAYSIZE(units));
    452   }
    453 
    454   // We use SI prefixes.
    455   char buf[16];
    456   snprintf(buf, sizeof(buf), ((*unit == 'K') ? "%s%.1f%ciB" : "%s%.2f%ciB"),
    457            neg_str, num_bytes / 1024.0, *unit);
    458   return string(buf);
    459 }
    460 
    461 string HumanReadableElapsedTime(double seconds) {
    462   string human_readable;
    463 
    464   if (seconds < 0) {
    465     human_readable = "-";
    466     seconds = -seconds;
    467   }
    468 
    469   // Start with us and keep going up to years.
    470   // The comparisons must account for rounding to prevent the format breaking
    471   // the tested condition and returning, e.g., "1e+03 us" instead of "1 ms".
    472   const double microseconds = seconds * 1.0e6;
    473   if (microseconds < 999.5) {
    474     strings::Appendf(&human_readable, "%0.3g us", microseconds);
    475     return human_readable;
    476   }
    477   double milliseconds = seconds * 1e3;
    478   if (milliseconds >= .995 && milliseconds < 1) {
    479     // Round half to even in Appendf would convert this to 0.999 ms.
    480     milliseconds = 1.0;
    481   }
    482   if (milliseconds < 999.5) {
    483     strings::Appendf(&human_readable, "%0.3g ms", milliseconds);
    484     return human_readable;
    485   }
    486   if (seconds < 60.0) {
    487     strings::Appendf(&human_readable, "%0.3g s", seconds);
    488     return human_readable;
    489   }
    490   seconds /= 60.0;
    491   if (seconds < 60.0) {
    492     strings::Appendf(&human_readable, "%0.3g min", seconds);
    493     return human_readable;
    494   }
    495   seconds /= 60.0;
    496   if (seconds < 24.0) {
    497     strings::Appendf(&human_readable, "%0.3g h", seconds);
    498     return human_readable;
    499   }
    500   seconds /= 24.0;
    501   if (seconds < 30.0) {
    502     strings::Appendf(&human_readable, "%0.3g days", seconds);
    503     return human_readable;
    504   }
    505   if (seconds < 365.2425) {
    506     strings::Appendf(&human_readable, "%0.3g months", seconds / 30.436875);
    507     return human_readable;
    508   }
    509   seconds /= 365.2425;
    510   strings::Appendf(&human_readable, "%0.3g years", seconds);
    511   return human_readable;
    512 }
    513 
    514 }  // namespace strings
    515 }  // namespace tensorflow
    516