1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // Generally useful utility functions that are common to (not specific to any 17 // given part of) the XLA code base. 18 19 #ifndef TENSORFLOW_COMPILER_XLA_UTIL_H_ 20 #define TENSORFLOW_COMPILER_XLA_UTIL_H_ 21 22 #include <algorithm> 23 #include <string> 24 #include <vector> 25 26 #include "tensorflow/compiler/xla/status.h" 27 #include "tensorflow/compiler/xla/status_macros.h" 28 #include "tensorflow/compiler/xla/types.h" 29 #include "tensorflow/compiler/xla/xla_data.pb.h" 30 #include "tensorflow/core/lib/core/status.h" 31 #include "tensorflow/core/lib/core/stringpiece.h" 32 #include "tensorflow/core/lib/gtl/array_slice.h" 33 #include "tensorflow/core/lib/math/math_util.h" 34 #include "tensorflow/core/lib/strings/numbers.h" 35 #include "tensorflow/core/lib/strings/strcat.h" 36 #include "tensorflow/core/platform/logging.h" 37 #include "tensorflow/core/platform/macros.h" 38 #include "tensorflow/core/platform/protobuf.h" 39 #include "tensorflow/core/platform/types.h" 40 41 namespace xla { 42 43 // Logs the provided status message with a backtrace. 44 // 45 // For use by Status-factories, logs a backtrace at the point where the status 46 // is created, such that we can use --vmodule=util=1 to see all status 47 // creation backtraces. 48 Status WithLogBacktrace(const Status& status); 49 50 // Ranks greater than 8 are very rare, so use InlinedVector<int64, 8> to store 51 // the bounds and indices. And for the rare cases of ranks greater than 8, 52 // the InlinedVector will just behave like an std::vector<> and allocate the 53 // memory to store its values. 54 static constexpr int kInlineRank = 8; 55 using DimensionVector = tensorflow::gtl::InlinedVector<int64, kInlineRank>; 56 57 // RAII timer that logs with a given label the wall clock time duration in human 58 // readable form. This differs from base's ElapsedTimer primarily in that it 59 // spits out the human-readable duration form. 60 // 61 // By default, the timing traces are only printed at VLOG(1) and above: 62 // 63 // XLA_SCOPED_LOGGING_TIMER("fooing bar"); // nop if !VLOG_IS_ON(1). 64 // 65 // but you can control this via: 66 // 67 // XLA_SCOPED_LOGGING_TIMER_LEVEL("fooing bar", 2); // nop if !VLOG_IS_ON(2) 68 // 69 #define XLA_SCOPED_LOGGING_TIMER(label) \ 70 XLA_SCOPED_LOGGING_TIMER_HELPER(label, 1, __COUNTER__) 71 #define XLA_SCOPED_LOGGING_TIMER_LEVEL(label, level) \ 72 XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, __COUNTER__) 73 74 // Helper for implementing macros above. Do not use directly. 75 // 76 // Forces the evaluation of "counter", which we expect is equal to __COUNTER__. 77 #define XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, counter) \ 78 XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter) 79 80 // Helper for macros above. Don't use directly. 81 #define XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter) \ 82 ::xla::ScopedLoggingTimer XLA_ScopedLoggingTimerInstance##counter( \ 83 label, VLOG_IS_ON(level)) 84 85 // RAII timer for XLA_SCOPED_LOGGING_TIMER and XLA_SCOPED_LOGGING_TIMER_LEVEL 86 // macros above. Recommended usage is via the macros so you don't have to give 87 // the timer a name or worry about calling VLOG_IS_ON yourself. 88 struct ScopedLoggingTimer { 89 // The timer does nothing if enabled is false. This lets you pass in your 90 // file's VLOG_IS_ON value. 91 ScopedLoggingTimer(const string& label, bool enabled); 92 ~ScopedLoggingTimer(); 93 94 bool enabled; 95 string label; 96 uint64 start_micros; 97 }; 98 99 // Given a vector<T>, returns a MutableArraySlice<char> that points at its 100 // internals. 101 // 102 // Warning: if the vector is updated its storage pointer may change, so use this 103 // with caution (ideally in limited scopes with temporary lifetimes). 104 template <typename T> 105 tensorflow::gtl::MutableArraySlice<uint8> MutableByteSlice(std::vector<T>* v) { 106 return tensorflow::gtl::MutableArraySlice<uint8>( 107 reinterpret_cast<uint8*>(v->data()), v->size() * sizeof(T)); 108 } 109 110 // Turns an immutable slice of type T into an immutable slice of bytes with the 111 // same byte size. 112 template <typename T> 113 tensorflow::gtl::ArraySlice<uint8> CastToByteSlice( 114 tensorflow::gtl::ArraySlice<T> slice) { 115 return tensorflow::gtl::ArraySlice<uint8>( 116 reinterpret_cast<const uint8*>(slice.data()), slice.size() * sizeof(T)); 117 } 118 119 // Casts a byte slice to a non-byte type T, checking that the original slice 120 // length is a multiple of sizeof(T). 121 template <typename T> 122 tensorflow::gtl::ArraySlice<T> CastByteSlice( 123 tensorflow::gtl::ArraySlice<uint8> slice) { 124 CHECK_EQ(0, slice.size() % sizeof(T)); 125 return tensorflow::gtl::ArraySlice<T>( 126 reinterpret_cast<const T*>(slice.data()), slice.size() / sizeof(T)); 127 } 128 129 // Convenience function to force a vector to convert to an immutable slice. 130 template <typename T> 131 tensorflow::gtl::ArraySlice<T> AsSlice(const std::vector<T>& v) { 132 return tensorflow::gtl::ArraySlice<T>(v); 133 } 134 135 // Converts a mutable vector pointer into a MutableArraySlice of the same 136 // type. 137 template <typename T> 138 tensorflow::gtl::MutableArraySlice<T> AsMutableSlice(std::vector<T>* v) { 139 return tensorflow::gtl::MutableArraySlice<T>(v->data(), v->size()); 140 } 141 142 // xla::int64 is not the same type as tensorflow::protobuf_int64 in open-source. 143 // Wrapper function that gives an int64 array slice view of a repeated int64 144 // protobuf field. 145 static inline tensorflow::gtl::ArraySlice<int64> AsInt64Slice( 146 const tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>& v) { 147 tensorflow::gtl::ArraySlice<tensorflow::protobuf_int64> slice(v); 148 return tensorflow::gtl::ArraySlice<int64>( 149 reinterpret_cast<const int64*>(slice.data()), slice.size()); 150 } 151 152 // As above, but for uint64 types. 153 static inline tensorflow::gtl::ArraySlice<uint64> AsUInt64Slice( 154 const tensorflow::protobuf::RepeatedField<tensorflow::protobuf_uint64>& v) { 155 tensorflow::gtl::ArraySlice<tensorflow::protobuf_uint64> slice(v); 156 return tensorflow::gtl::ArraySlice<uint64>( 157 reinterpret_cast<const uint64*>(slice.data()), slice.size()); 158 } 159 160 // Compares two containers for equality. Returns true iff the two containers 161 // have the same size and all their elements compare equal using their 162 // operator==. Like std::equal, but forces size equality. 163 template <typename Container1T, typename Container2T> 164 bool ContainersEqual(const Container1T& c1, const Container2T& c2) { 165 return ((c1.size() == c2.size()) && 166 std::equal(std::begin(c1), std::end(c1), std::begin(c2))); 167 } 168 169 template <typename Container1T, 170 typename ElementType = typename Container1T::value_type> 171 bool ContainersEqual(const Container1T& c1, 172 std::initializer_list<ElementType> il) { 173 tensorflow::gtl::ArraySlice<ElementType> c2{il}; 174 return ContainersEqual(c1, c2); 175 } 176 177 // Compares two containers for equality. Returns true iff the two containers 178 // have the same size and all their elements compare equal using the predicate 179 // p. Like std::equal, but forces size equality. 180 template <typename Container1T, typename Container2T, class PredicateT> 181 bool ContainersEqual(const Container1T& c1, const Container2T& c2, 182 PredicateT p) { 183 return ((c1.size() == c2.size()) && 184 std::equal(std::begin(c1), std::end(c1), std::begin(c2), p)); 185 } 186 187 // Performs a copy of count values from src to dest, using different strides for 188 // source and destination. The source starting index is src_base, while the 189 // destination one is dest_base. 190 template <typename D, typename S> 191 void StridedCopy(tensorflow::gtl::MutableArraySlice<D> dest, int64 dest_base, 192 int64 dest_stride, tensorflow::gtl::ArraySlice<S> src, 193 int64 src_base, int64 src_stride, int64 count) { 194 for (; count > 0; --count, dest_base += dest_stride, src_base += src_stride) { 195 dest[dest_base] = static_cast<D>(src[src_base]); 196 } 197 } 198 199 // Adds some context information to the error message in a 200 // Status. This is useful as Statuses are 201 // propagated upwards. 202 Status AddStatus(Status prior, tensorflow::StringPiece context); 203 Status AppendStatus(Status prior, tensorflow::StringPiece context); 204 205 // Status error shorthands -- printfs the arguments to be 206 // used as an error message and returns a status in the canonical 207 // error space. 208 Status InvalidArgument(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); 209 Status Unimplemented(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); 210 Status InternalError(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); 211 Status FailedPrecondition(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); 212 Status Cancelled(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); 213 Status ResourceExhausted(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); 214 Status NotFound(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); 215 Status Unavailable(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2); 216 217 // Passed-varargs variant of the InvalidArgument factory above. 218 Status InvalidArgumentV(const char* format, va_list args); 219 220 template <typename... Args> 221 Status UnimplementedStrCat(Args&&... concat) { 222 return Unimplemented( 223 "%s", tensorflow::strings::StrCat(std::forward<Args>(concat)...).c_str()); 224 } 225 226 template <typename... Args> 227 Status InternalErrorStrCat(Args&&... concat) { 228 return InternalError( 229 "%s", tensorflow::strings::StrCat(std::forward<Args>(concat)...).c_str()); 230 } 231 232 template <typename... Args> 233 Status ResourceExhaustedStrCat(Args&&... concat) { 234 return ResourceExhausted( 235 "%s", tensorflow::strings::StrCat(std::forward<Args>(concat)...).c_str()); 236 } 237 238 // Splits the lines of the original, replaces leading whitespace with the prefix 239 // given by "indentation", and returns the string joined by newlines again. As a 240 // side effect, any additional trailing whitespace is removed. 241 // 242 // Note: even different amounts of leading whitespace on different lines will be 243 // uniformly replaced with "indentation". 244 string Reindent(tensorflow::StringPiece original, 245 tensorflow::StringPiece indentation); 246 247 // Checks whether permutation is a permutation of the [0, rank) integer range. 248 bool IsPermutation(tensorflow::gtl::ArraySlice<int64> permutation, int64 rank); 249 250 // Applies `permutation` on `input` and returns the permuted array. 251 // For each i, output[permutation[i]] = input[i]. 252 // 253 // Precondition: 254 // 1. `permutation` is a permutation of 0..permutation.size()-1. 255 // 2. permutation.size() == input.size(). 256 template <template <typename...> class C, typename T> 257 std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation, 258 C<T> input) { 259 tensorflow::gtl::ArraySlice<T> data(input); 260 CHECK(IsPermutation(permutation, data.size())); 261 std::vector<T> output(data.size()); 262 for (size_t i = 0; i < permutation.size(); ++i) { 263 output[permutation[i]] = data[i]; 264 } 265 return output; 266 } 267 268 // Override of the above that works around compile failures with gcc 7.1.1. 269 // For details see https://github.com/tensorflow/tensorflow/issues/10843 270 // Hide this workaround from MSVC as it causes ambiguous error. 271 #ifndef _MSC_VER 272 template <typename T> 273 std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation, 274 const std::vector<T>& input) { 275 return Permute<std::vector, T>(permutation, input); 276 } 277 #endif 278 279 // Inverts a permutation, i.e., output_permutation[input_permutation[i]] = i. 280 std::vector<int64> InversePermutation( 281 tensorflow::gtl::ArraySlice<int64> input_permutation); 282 283 // Composes two permutations: output[i] = p1[p2[i]]. 284 std::vector<int64> ComposePermutations(tensorflow::gtl::ArraySlice<int64> p1, 285 tensorflow::gtl::ArraySlice<int64> p2); 286 287 // Returns true iff permutation == {0, 1, 2, ...}. 288 bool IsIdentityPermutation(tensorflow::gtl::ArraySlice<int64> permutation); 289 290 template <typename Container> 291 int64 PositionInContainer(const Container& container, int64 value) { 292 return std::distance(container.begin(), 293 std::find(container.begin(), container.end(), value)); 294 } 295 296 // Formats the container as a comma-separated string. StrAppend must support 297 // appending the elements of the container. Prefix is prepended and suffix is 298 // appended to the returned string. 299 template <typename Container> 300 string CommaSeparatedString(const Container& c, const char* prefix = "", 301 const char* suffix = "") { 302 // Not using Join() since the implementation here is simple anyway and this 303 // avoids copying the string to append prefix. 304 string comma_separated = prefix; 305 const char* separator = ""; 306 for (const auto& entry : c) { 307 tensorflow::strings::StrAppend(&comma_separated, separator, entry); 308 separator = ", "; 309 } 310 comma_separated += suffix; 311 return comma_separated; 312 } 313 314 // Overload needed to allow the container to be an initializer list. The default 315 // type for T makes an empty initializer list work as well. 316 template <typename T = int> 317 string CommaSeparatedString(const std::initializer_list<T>& c, 318 const char* prefix = "", const char* suffix = "") { 319 return CommaSeparatedString<std::initializer_list<T>>(c, prefix, suffix); 320 } 321 322 // Formats the container in the mathematical notation for a vector, e.g. (1, 3, 323 // 7). StrAppend must support appending the elements of c. 324 template <typename Container> 325 string VectorString(const Container& c) { 326 return CommaSeparatedString(c, "(", ")"); 327 } 328 329 // Overload needed to allow the container to be an initializer list. The default 330 // type for T makes an empty initializer list work as well. 331 template <typename T = int> 332 string VectorString(const std::initializer_list<T>& c) { 333 return VectorString<std::initializer_list<T>>(c); 334 } 335 336 // Returns a PaddingConfig object that represents no padding for the given rank. 337 PaddingConfig MakeNoPaddingConfig(int64 rank); 338 339 // Returns a PaddingConfig object where 'padding' contains 340 // (low edge padding, high edge padding) pairs for each dimension. 341 PaddingConfig MakeEdgePaddingConfig( 342 tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding); 343 344 // Returns true if the padding configuration has at least one dimension with 345 // non-zero interior padding. 346 bool HasInteriorPadding(const PaddingConfig& config); 347 348 // Imports the templated FloorOfRatio math function from the TensorFlow 349 // namespace, as it is very commonly used. 350 template <typename T> 351 T FloorOfRatio(T dividend, T divisor) { 352 return tensorflow::MathUtil::FloorOfRatio<T>(dividend, divisor); 353 } 354 355 // Imports the templated CeilOfRatio math function from the TensorFlow 356 // namespace, as it is very commonly used. 357 template <typename T> 358 T CeilOfRatio(T dividend, T divisor) { 359 return tensorflow::MathUtil::CeilOfRatio<T>(dividend, divisor); 360 } 361 362 // Rounds the value up to a multiple of the divisor by first calling CeilOfRatio 363 // then multiplying by the divisor. For example: RoundUpToNearest(13, 8) => 16 364 template <typename T> 365 T RoundUpToNearest(T value, T divisor) { 366 return CeilOfRatio(value, divisor) * divisor; 367 } 368 369 // Rounds the value down to a multiple of the divisor by first calling 370 // FloorOfRatio then multiplying by the divisor. For example: 371 // RoundDownToNearest(13, 8) => 8 372 template <typename T> 373 T RoundDownToNearest(T value, T divisor) { 374 return FloorOfRatio(value, divisor) * divisor; 375 } 376 377 // Given a number of flops executed in an amount of time, produces a string that 378 // represents the throughput; 379 // e.g. HumanReadableNumFlops(1e9, 1e9) => 1.00GFLOP/s. 380 string HumanReadableNumFlops(double flops, double nanoseconds); 381 382 // Given a number of transcendental ops executed in an amount of time, produces 383 // a string that represents the throughput; 384 // e.g. HumanReadableNumTranscendentalOps(1e9, 1e9) => 1.00GTROP/s. 385 string HumanReadableNumTranscendentalOps(double trops, double nanoseconds); 386 387 // Split the text into multiple lines and log each line with the given 388 // severity, filename, and line number. 389 void LogLines(int sev, tensorflow::StringPiece text, const char* fname, 390 int lineno); 391 392 template <typename T> 393 inline bool IsPowerOfTwo(T x) { 394 static_assert(!std::numeric_limits<T>::is_signed, "unsigned types only"); 395 return x != 0 && (x & (x - 1)) == 0; 396 } 397 398 // Returns a mask with "bits" number of least significant bits set. 399 inline uint32 LsbMaskU32(int bits) { 400 CHECK_GE(bits, 0); 401 return (1U << bits) - 1; 402 } 403 404 // Utility for performing a static_cast<> on a std::unique_ptr<>. 405 template <typename Derived, typename Base> 406 std::unique_ptr<Derived> unique_ptr_static_cast(std::unique_ptr<Base> ptr) { 407 return std::unique_ptr<Derived>(static_cast<Derived*>(ptr.release())); 408 } 409 410 int64 Product(tensorflow::gtl::ArraySlice<int64> xs); 411 412 // Returns the start indices of consecutive non-overlapping subsequences of `a` 413 // and `b` with the same product, i.e. `(i, j)` so 414 // a = {a[0 = i_0], ..., a[i_1 - 1], a[i_1], ... , a[i_2 - 1], ...} 415 // b = {b[0 = j_0], ..., b[j_1 - 1], b[j_1], ... , b[j_2 - 1], ...} 416 // k . 0 <= k < CommonFactors(a, b).size - 1 => 417 // a[i_k] a[i_k + 1] ... a[i_(k+1) - 1] = 418 // b[j_k] b[j_k + 1] ... b[j_(k+1) - 1] 419 // where `CommonFactors(a, b)[CommonFactors(a, b).size - 1] = (a.size, b.size)` 420 // 421 // If the given shapes have non-zero size, returns the bounds of the shortest 422 // possible such subsequences; else, returns `{(0, 0), (a.size, b.size)}`. 423 std::vector<std::pair<int64, int64>> CommonFactors( 424 tensorflow::gtl::ArraySlice<int64> a, tensorflow::gtl::ArraySlice<int64> b); 425 426 // Removes illegal characters from filenames. 427 string SanitizeFileName(string file_name); 428 429 template <typename Container, typename Predicate> 430 bool c_all_of(Container container, Predicate predicate) { 431 return std::all_of(std::begin(container), std::end(container), predicate); 432 } 433 434 template <typename InputContainer, typename OutputIterator, 435 typename UnaryOperation> 436 OutputIterator c_transform(InputContainer input_container, 437 OutputIterator output_iterator, 438 UnaryOperation unary_op) { 439 return std::transform(std::begin(input_container), std::end(input_container), 440 output_iterator, unary_op); 441 } 442 443 template <class InputContainer, class OutputIterator, class UnaryPredicate> 444 OutputIterator c_copy_if(InputContainer input_container, 445 OutputIterator output_iterator, 446 UnaryPredicate predicate) { 447 return std::copy_if(std::begin(input_container), std::end(input_container), 448 output_iterator, predicate); 449 } 450 451 template <class InputContainer, class OutputIterator> 452 OutputIterator c_copy(InputContainer input_container, 453 OutputIterator output_iterator) { 454 return std::copy(std::begin(input_container), std::end(input_container), 455 output_iterator); 456 } 457 458 template <class InputContainer> 459 void c_sort(InputContainer& input_container) { 460 std::sort(std::begin(input_container), std::end(input_container)); 461 } 462 463 template <class InputContainer, class Comparator> 464 void c_sort(InputContainer& input_container, Comparator comparator) { 465 std::sort(std::begin(input_container), std::end(input_container), comparator); 466 } 467 468 template <typename Sequence, typename T> 469 bool c_binary_search(Sequence& sequence, T&& value) { 470 return std::binary_search(std::begin(sequence), std::end(sequence), 471 std::forward<T>(value)); 472 } 473 474 template <typename C> 475 bool c_is_sorted(const C& c) { 476 return std::is_sorted(std::begin(c), std::end(c)); 477 } 478 479 template <typename C> 480 auto c_adjacent_find(const C& c) -> decltype(std::begin(c)) { 481 return std::adjacent_find(std::begin(c), std::end(c)); 482 } 483 } // namespace xla 484 485 #define XLA_LOG_LINES(SEV, STRING) \ 486 ::xla::LogLines(SEV, STRING, __FILE__, __LINE__) 487 488 #define XLA_VLOG_LINES(LEVEL, STRING) \ 489 do { \ 490 if (VLOG_IS_ON(LEVEL)) XLA_LOG_LINES(::tensorflow::INFO, STRING); \ 491 } while (false); 492 493 // Utility macro that performs the equivalent of what one would expect 494 // LOG_LINES(FATAL, X) to do but can be used at the end of a function that 495 // returns a value without getting a compiler warning that no value is returned. 496 #define XLA_FATAL_LOG(X) \ 497 XLA_LOG_LINES(::tensorflow::ERROR, X); \ 498 LOG(FATAL) << "Aborting in " << __FUNCTION__ << " due to previous errors."; 499 500 #endif // TENSORFLOW_COMPILER_XLA_UTIL_H_ 501