1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 Licensed under the Apache License, Version 2.0 (the "License"); 3 you may not use this file except in compliance with the License. 4 You may obtain a copy of the License at 5 6 http://www.apache.org/licenses/LICENSE-2.0 7 8 Unless required by applicable law or agreed to in writing, software 9 distributed under the License is distributed on an "AS IS" BASIS, 10 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 See the License for the specific language governing permissions and 12 limitations under the License. 13 ==============================================================================*/ 14 15 #include "tensorflow/core/lib/strings/numbers.h" 16 17 #include <ctype.h> 18 #include <float.h> 19 #include <stdio.h> 20 #include <stdlib.h> 21 #include <algorithm> 22 #include <cinttypes> 23 #include <cmath> 24 #include <locale> 25 #include <unordered_map> 26 27 #include "double-conversion/double-conversion.h" 28 29 #include "tensorflow/core/lib/strings/str_util.h" 30 #include "tensorflow/core/lib/strings/stringprintf.h" 31 #include "tensorflow/core/platform/logging.h" 32 #include "tensorflow/core/platform/macros.h" 33 #include "tensorflow/core/platform/types.h" 34 35 namespace tensorflow { 36 37 namespace { 38 39 template <typename T> 40 const std::unordered_map<string, T>* GetSpecialNumsSingleton() { 41 static const std::unordered_map<string, T>* special_nums = 42 CHECK_NOTNULL((new const std::unordered_map<string, T>{ 43 {"inf", std::numeric_limits<T>::infinity()}, 44 {"+inf", std::numeric_limits<T>::infinity()}, 45 {"-inf", -std::numeric_limits<T>::infinity()}, 46 {"infinity", std::numeric_limits<T>::infinity()}, 47 {"+infinity", std::numeric_limits<T>::infinity()}, 48 {"-infinity", -std::numeric_limits<T>::infinity()}, 49 {"nan", std::numeric_limits<T>::quiet_NaN()}, 50 {"+nan", std::numeric_limits<T>::quiet_NaN()}, 51 {"-nan", -std::numeric_limits<T>::quiet_NaN()}, 52 })); 53 return special_nums; 54 } 55 56 template <typename T> 57 T locale_independent_strtonum(const char* str, const char** endptr) { 58 auto special_nums = GetSpecialNumsSingleton<T>(); 59 std::stringstream s(str); 60 61 // Check if str is one of the special numbers. 62 string special_num_str; 63 s >> special_num_str; 64 65 for (int i = 0; i < special_num_str.length(); ++i) { 66 special_num_str[i] = 67 std::tolower(special_num_str[i], std::locale::classic()); 68 } 69 70 auto entry = special_nums->find(special_num_str); 71 if (entry != special_nums->end()) { 72 *endptr = str + (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str)) 73 : s.tellg()); 74 return entry->second; 75 } else { 76 // Perhaps it's a hex number 77 if (special_num_str.compare(0, 2, "0x") == 0 || 78 special_num_str.compare(0, 3, "-0x") == 0) { 79 return strtol(str, const_cast<char**>(endptr), 16); 80 } 81 } 82 // Reset the stream 83 s.str(str); 84 s.clear(); 85 // Use the "C" locale 86 s.imbue(std::locale::classic()); 87 88 T result; 89 s >> result; 90 91 // Set to result to what strto{f,d} functions would have returned. If the 92 // number was outside the range, the stringstream sets the fail flag, but 93 // returns the +/-max() value, whereas strto{f,d} functions return +/-INF. 94 if (s.fail()) { 95 if (result == std::numeric_limits<T>::max() || 96 result == std::numeric_limits<T>::infinity()) { 97 result = std::numeric_limits<T>::infinity(); 98 s.clear(s.rdstate() & ~std::ios::failbit); 99 } else if (result == -std::numeric_limits<T>::max() || 100 result == -std::numeric_limits<T>::infinity()) { 101 result = -std::numeric_limits<T>::infinity(); 102 s.clear(s.rdstate() & ~std::ios::failbit); 103 } 104 } 105 106 if (endptr) { 107 *endptr = 108 str + 109 (s.fail() ? static_cast<std::iostream::pos_type>(0) 110 : (s.eof() ? static_cast<std::iostream::pos_type>(strlen(str)) 111 : s.tellg())); 112 } 113 return result; 114 } 115 116 static inline const double_conversion::StringToDoubleConverter& 117 StringToFloatConverter() { 118 static const double_conversion::StringToDoubleConverter converter( 119 double_conversion::StringToDoubleConverter::ALLOW_LEADING_SPACES | 120 double_conversion::StringToDoubleConverter::ALLOW_HEX | 121 double_conversion::StringToDoubleConverter::ALLOW_TRAILING_SPACES | 122 double_conversion::StringToDoubleConverter::ALLOW_CASE_INSENSIBILITY, 123 0., 0., "inf", "nan"); 124 return converter; 125 } 126 127 } // namespace 128 129 namespace strings { 130 131 size_t FastInt32ToBufferLeft(int32 i, char* buffer) { 132 uint32 u = i; 133 size_t length = 0; 134 if (i < 0) { 135 *buffer++ = '-'; 136 ++length; 137 // We need to do the negation in modular (i.e., "unsigned") 138 // arithmetic; MSVC++ apparently warns for plain "-u", so 139 // we write the equivalent expression "0 - u" instead. 140 u = 0 - u; 141 } 142 length += FastUInt32ToBufferLeft(u, buffer); 143 return length; 144 } 145 146 size_t FastUInt32ToBufferLeft(uint32 i, char* buffer) { 147 char* start = buffer; 148 do { 149 *buffer++ = ((i % 10) + '0'); 150 i /= 10; 151 } while (i > 0); 152 *buffer = 0; 153 std::reverse(start, buffer); 154 return buffer - start; 155 } 156 157 size_t FastInt64ToBufferLeft(int64 i, char* buffer) { 158 uint64 u = i; 159 size_t length = 0; 160 if (i < 0) { 161 *buffer++ = '-'; 162 ++length; 163 u = 0 - u; 164 } 165 length += FastUInt64ToBufferLeft(u, buffer); 166 return length; 167 } 168 169 size_t FastUInt64ToBufferLeft(uint64 i, char* buffer) { 170 char* start = buffer; 171 do { 172 *buffer++ = ((i % 10) + '0'); 173 i /= 10; 174 } while (i > 0); 175 *buffer = 0; 176 std::reverse(start, buffer); 177 return buffer - start; 178 } 179 180 static const double kDoublePrecisionCheckMax = DBL_MAX / 1.000000000000001; 181 182 size_t DoubleToBuffer(double value, char* buffer) { 183 // DBL_DIG is 15 for IEEE-754 doubles, which are used on almost all 184 // platforms these days. Just in case some system exists where DBL_DIG 185 // is significantly larger -- and risks overflowing our buffer -- we have 186 // this assert. 187 static_assert(DBL_DIG < 20, "DBL_DIG is too big"); 188 189 if (std::abs(value) <= kDoublePrecisionCheckMax) { 190 int snprintf_result = 191 snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG, value); 192 193 // The snprintf should never overflow because the buffer is significantly 194 // larger than the precision we asked for. 195 DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); 196 197 if (locale_independent_strtonum<double>(buffer, nullptr) == value) { 198 // Round-tripping the string to double works; we're done. 199 return snprintf_result; 200 } 201 // else: full precision formatting needed. Fall through. 202 } 203 204 int snprintf_result = 205 snprintf(buffer, kFastToBufferSize, "%.*g", DBL_DIG + 2, value); 206 207 // Should never overflow; see above. 208 DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); 209 210 return snprintf_result; 211 } 212 213 namespace { 214 char SafeFirstChar(StringPiece str) { 215 if (str.empty()) return '\0'; 216 return str[0]; 217 } 218 void SkipSpaces(StringPiece* str) { 219 while (isspace(SafeFirstChar(*str))) str->remove_prefix(1); 220 } 221 } // namespace 222 223 bool safe_strto64(StringPiece str, int64* value) { 224 SkipSpaces(&str); 225 226 int64 vlimit = kint64max; 227 int sign = 1; 228 if (str_util::ConsumePrefix(&str, "-")) { 229 sign = -1; 230 // Different limit for positive and negative integers. 231 vlimit = kint64min; 232 } 233 234 if (!isdigit(SafeFirstChar(str))) return false; 235 236 int64 result = 0; 237 if (sign == 1) { 238 do { 239 int digit = SafeFirstChar(str) - '0'; 240 if ((vlimit - digit) / 10 < result) { 241 return false; 242 } 243 result = result * 10 + digit; 244 str.remove_prefix(1); 245 } while (isdigit(SafeFirstChar(str))); 246 } else { 247 do { 248 int digit = SafeFirstChar(str) - '0'; 249 if ((vlimit + digit) / 10 > result) { 250 return false; 251 } 252 result = result * 10 - digit; 253 str.remove_prefix(1); 254 } while (isdigit(SafeFirstChar(str))); 255 } 256 257 SkipSpaces(&str); 258 if (!str.empty()) return false; 259 260 *value = result; 261 return true; 262 } 263 264 bool safe_strtou64(StringPiece str, uint64* value) { 265 SkipSpaces(&str); 266 if (!isdigit(SafeFirstChar(str))) return false; 267 268 uint64 result = 0; 269 do { 270 int digit = SafeFirstChar(str) - '0'; 271 if ((kuint64max - digit) / 10 < result) { 272 return false; 273 } 274 result = result * 10 + digit; 275 str.remove_prefix(1); 276 } while (isdigit(SafeFirstChar(str))); 277 278 SkipSpaces(&str); 279 if (!str.empty()) return false; 280 281 *value = result; 282 return true; 283 } 284 285 bool safe_strto32(StringPiece str, int32* value) { 286 SkipSpaces(&str); 287 288 int64 vmax = kint32max; 289 int sign = 1; 290 if (str_util::ConsumePrefix(&str, "-")) { 291 sign = -1; 292 // Different max for positive and negative integers. 293 ++vmax; 294 } 295 296 if (!isdigit(SafeFirstChar(str))) return false; 297 298 int64 result = 0; 299 do { 300 result = result * 10 + SafeFirstChar(str) - '0'; 301 if (result > vmax) { 302 return false; 303 } 304 str.remove_prefix(1); 305 } while (isdigit(SafeFirstChar(str))); 306 307 SkipSpaces(&str); 308 309 if (!str.empty()) return false; 310 311 *value = static_cast<int32>(result * sign); 312 return true; 313 } 314 315 bool safe_strtou32(StringPiece str, uint32* value) { 316 SkipSpaces(&str); 317 if (!isdigit(SafeFirstChar(str))) return false; 318 319 int64 result = 0; 320 do { 321 result = result * 10 + SafeFirstChar(str) - '0'; 322 if (result > kuint32max) { 323 return false; 324 } 325 str.remove_prefix(1); 326 } while (isdigit(SafeFirstChar(str))); 327 328 SkipSpaces(&str); 329 if (!str.empty()) return false; 330 331 *value = static_cast<uint32>(result); 332 return true; 333 } 334 335 bool safe_strtof(StringPiece str, float* value) { 336 int processed_characters_count = -1; 337 auto len = str.size(); 338 339 // If string length exceeds buffer size or int max, fail. 340 if (len >= kFastToBufferSize) return false; 341 if (len > std::numeric_limits<int>::max()) return false; 342 343 *value = StringToFloatConverter().StringToFloat( 344 str.data(), static_cast<int>(len), &processed_characters_count); 345 return processed_characters_count > 0; 346 } 347 348 bool safe_strtod(StringPiece str, double* value) { 349 int processed_characters_count = -1; 350 auto len = str.size(); 351 352 // If string length exceeds buffer size or int max, fail. 353 if (len >= kFastToBufferSize) return false; 354 if (len > std::numeric_limits<int>::max()) return false; 355 356 *value = StringToFloatConverter().StringToDouble( 357 str.data(), static_cast<int>(len), &processed_characters_count); 358 return processed_characters_count > 0; 359 } 360 361 size_t FloatToBuffer(float value, char* buffer) { 362 // FLT_DIG is 6 for IEEE-754 floats, which are used on almost all 363 // platforms these days. Just in case some system exists where FLT_DIG 364 // is significantly larger -- and risks overflowing our buffer -- we have 365 // this assert. 366 static_assert(FLT_DIG < 10, "FLT_DIG is too big"); 367 368 int snprintf_result = 369 snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG, value); 370 371 // The snprintf should never overflow because the buffer is significantly 372 // larger than the precision we asked for. 373 DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); 374 375 float parsed_value; 376 if (!safe_strtof(buffer, &parsed_value) || parsed_value != value) { 377 snprintf_result = 378 snprintf(buffer, kFastToBufferSize, "%.*g", FLT_DIG + 3, value); 379 380 // Should never overflow; see above. 381 DCHECK(snprintf_result > 0 && snprintf_result < kFastToBufferSize); 382 } 383 return snprintf_result; 384 } 385 386 string FpToString(Fprint fp) { 387 char buf[17]; 388 snprintf(buf, sizeof(buf), "%016llx", static_cast<uint64>(fp)); 389 return string(buf); 390 } 391 392 bool StringToFp(const string& s, Fprint* fp) { 393 char junk; 394 uint64_t result; 395 if (sscanf(s.c_str(), "%" SCNx64 "%c", &result, &junk) == 1) { 396 *fp = result; 397 return true; 398 } else { 399 return false; 400 } 401 } 402 403 StringPiece Uint64ToHexString(uint64 v, char* buf) { 404 static const char* hexdigits = "0123456789abcdef"; 405 const int num_byte = 16; 406 buf[num_byte] = '\0'; 407 for (int i = num_byte - 1; i >= 0; i--) { 408 buf[i] = hexdigits[v & 0xf]; 409 v >>= 4; 410 } 411 return StringPiece(buf, num_byte); 412 } 413 414 bool HexStringToUint64(const StringPiece& s, uint64* result) { 415 uint64 v = 0; 416 if (s.empty()) { 417 return false; 418 } 419 for (size_t i = 0; i < s.size(); i++) { 420 char c = s[i]; 421 if (c >= '0' && c <= '9') { 422 v = (v << 4) + (c - '0'); 423 } else if (c >= 'a' && c <= 'f') { 424 v = (v << 4) + 10 + (c - 'a'); 425 } else if (c >= 'A' && c <= 'F') { 426 v = (v << 4) + 10 + (c - 'A'); 427 } else { 428 return false; 429 } 430 } 431 *result = v; 432 return true; 433 } 434 435 string HumanReadableNum(int64 value) { 436 string s; 437 if (value < 0) { 438 s += "-"; 439 value = -value; 440 } 441 if (value < 1000) { 442 Appendf(&s, "%lld", value); 443 } else if (value >= static_cast<int64>(1e15)) { 444 // Number bigger than 1E15; use that notation. 445 Appendf(&s, "%0.3G", static_cast<double>(value)); 446 } else { 447 static const char units[] = "kMBT"; 448 const char* unit = units; 449 while (value >= static_cast<int64>(1000000)) { 450 value /= static_cast<int64>(1000); 451 ++unit; 452 CHECK(unit < units + TF_ARRAYSIZE(units)); 453 } 454 Appendf(&s, "%.2f%c", value / 1000.0, *unit); 455 } 456 return s; 457 } 458 459 string HumanReadableNumBytes(int64 num_bytes) { 460 if (num_bytes == kint64min) { 461 // Special case for number with not representable negation. 462 return "-8E"; 463 } 464 465 const char* neg_str = (num_bytes < 0) ? "-" : ""; 466 if (num_bytes < 0) { 467 num_bytes = -num_bytes; 468 } 469 470 // Special case for bytes. 471 if (num_bytes < 1024) { 472 // No fractions for bytes. 473 char buf[8]; // Longest possible string is '-XXXXB' 474 snprintf(buf, sizeof(buf), "%s%lldB", neg_str, 475 static_cast<int64>(num_bytes)); 476 return string(buf); 477 } 478 479 static const char units[] = "KMGTPE"; // int64 only goes up to E. 480 const char* unit = units; 481 while (num_bytes >= static_cast<int64>(1024) * 1024) { 482 num_bytes /= 1024; 483 ++unit; 484 CHECK(unit < units + TF_ARRAYSIZE(units)); 485 } 486 487 // We use SI prefixes. 488 char buf[16]; 489 snprintf(buf, sizeof(buf), ((*unit == 'K') ? "%s%.1f%ciB" : "%s%.2f%ciB"), 490 neg_str, num_bytes / 1024.0, *unit); 491 return string(buf); 492 } 493 494 string HumanReadableElapsedTime(double seconds) { 495 string human_readable; 496 497 if (seconds < 0) { 498 human_readable = "-"; 499 seconds = -seconds; 500 } 501 502 // Start with us and keep going up to years. 503 // The comparisons must account for rounding to prevent the format breaking 504 // the tested condition and returning, e.g., "1e+03 us" instead of "1 ms". 505 const double microseconds = seconds * 1.0e6; 506 if (microseconds < 999.5) { 507 strings::Appendf(&human_readable, "%0.3g us", microseconds); 508 return human_readable; 509 } 510 double milliseconds = seconds * 1e3; 511 if (milliseconds >= .995 && milliseconds < 1) { 512 // Round half to even in Appendf would convert this to 0.999 ms. 513 milliseconds = 1.0; 514 } 515 if (milliseconds < 999.5) { 516 strings::Appendf(&human_readable, "%0.3g ms", milliseconds); 517 return human_readable; 518 } 519 if (seconds < 60.0) { 520 strings::Appendf(&human_readable, "%0.3g s", seconds); 521 return human_readable; 522 } 523 seconds /= 60.0; 524 if (seconds < 60.0) { 525 strings::Appendf(&human_readable, "%0.3g min", seconds); 526 return human_readable; 527 } 528 seconds /= 60.0; 529 if (seconds < 24.0) { 530 strings::Appendf(&human_readable, "%0.3g h", seconds); 531 return human_readable; 532 } 533 seconds /= 24.0; 534 if (seconds < 30.0) { 535 strings::Appendf(&human_readable, "%0.3g days", seconds); 536 return human_readable; 537 } 538 if (seconds < 365.2425) { 539 strings::Appendf(&human_readable, "%0.3g months", seconds / 30.436875); 540 return human_readable; 541 } 542 seconds /= 365.2425; 543 strings::Appendf(&human_readable, "%0.3g years", seconds); 544 return human_readable; 545 } 546 547 } // namespace strings 548 } // namespace tensorflow 549