Home | History | Annotate | Download | only in xla
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // Generally useful utility functions that are common to (not specific to any
     17 // given part of) the XLA code base.
     18 
     19 #ifndef TENSORFLOW_COMPILER_XLA_UTIL_H_
     20 #define TENSORFLOW_COMPILER_XLA_UTIL_H_
     21 
     22 #include <algorithm>
     23 #include <string>
     24 #include <vector>
     25 
     26 #include "tensorflow/compiler/xla/status.h"
     27 #include "tensorflow/compiler/xla/status_macros.h"
     28 #include "tensorflow/compiler/xla/types.h"
     29 #include "tensorflow/compiler/xla/xla_data.pb.h"
     30 #include "tensorflow/core/lib/core/status.h"
     31 #include "tensorflow/core/lib/core/stringpiece.h"
     32 #include "tensorflow/core/lib/gtl/array_slice.h"
     33 #include "tensorflow/core/lib/math/math_util.h"
     34 #include "tensorflow/core/lib/strings/numbers.h"
     35 #include "tensorflow/core/lib/strings/strcat.h"
     36 #include "tensorflow/core/platform/logging.h"
     37 #include "tensorflow/core/platform/macros.h"
     38 #include "tensorflow/core/platform/protobuf.h"
     39 #include "tensorflow/core/platform/types.h"
     40 
     41 namespace xla {
     42 
     43 // Logs the provided status message with a backtrace.
     44 //
     45 // For use by Status-factories, logs a backtrace at the point where the status
     46 // is created, such that we can use --vmodule=util=1 to see all status
     47 // creation backtraces.
     48 Status WithLogBacktrace(const Status& status);
     49 
     50 // Ranks greater than 8 are very rare, so use InlinedVector<int64, 8> to store
     51 // the bounds and indices. And for the rare cases of ranks greater than 8,
     52 // the InlinedVector will just behave like an std::vector<> and allocate the
     53 // memory to store its values.
     54 static constexpr int kInlineRank = 8;
     55 using DimensionVector = tensorflow::gtl::InlinedVector<int64, kInlineRank>;
     56 
     57 // RAII timer that logs with a given label the wall clock time duration in human
     58 // readable form. This differs from base's ElapsedTimer primarily in that it
     59 // spits out the human-readable duration form.
     60 //
     61 // By default, the timing traces are only printed at VLOG(1) and above:
     62 //
     63 //   XLA_SCOPED_LOGGING_TIMER("fooing bar");  // nop if !VLOG_IS_ON(1).
     64 //
     65 // but you can control this via:
     66 //
     67 //   XLA_SCOPED_LOGGING_TIMER_LEVEL("fooing bar", 2);  // nop if !VLOG_IS_ON(2)
     68 //
     69 #define XLA_SCOPED_LOGGING_TIMER(label) \
     70   XLA_SCOPED_LOGGING_TIMER_HELPER(label, 1, __COUNTER__)
     71 #define XLA_SCOPED_LOGGING_TIMER_LEVEL(label, level) \
     72   XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, __COUNTER__)
     73 
     74 // Helper for implementing macros above.  Do not use directly.
     75 //
     76 // Forces the evaluation of "counter", which we expect is equal to __COUNTER__.
     77 #define XLA_SCOPED_LOGGING_TIMER_HELPER(label, level, counter) \
     78   XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter)
     79 
     80 // Helper for macros above.  Don't use directly.
     81 #define XLA_SCOPED_LOGGING_TIMER_HELPER2(label, level, counter)      \
     82   ::xla::ScopedLoggingTimer XLA_ScopedLoggingTimerInstance##counter( \
     83       label, VLOG_IS_ON(level))
     84 
     85 // RAII timer for XLA_SCOPED_LOGGING_TIMER and XLA_SCOPED_LOGGING_TIMER_LEVEL
     86 // macros above.  Recommended usage is via the macros so you don't have to give
     87 // the timer a name or worry about calling VLOG_IS_ON yourself.
     88 struct ScopedLoggingTimer {
     89   // The timer does nothing if enabled is false.  This lets you pass in your
     90   // file's VLOG_IS_ON value.
     91   ScopedLoggingTimer(const string& label, bool enabled);
     92   ~ScopedLoggingTimer();
     93 
     94   bool enabled;
     95   string label;
     96   uint64 start_micros;
     97 };
     98 
     99 // Given a vector<T>, returns a MutableArraySlice<char> that points at its
    100 // internals.
    101 //
    102 // Warning: if the vector is updated its storage pointer may change, so use this
    103 // with caution (ideally in limited scopes with temporary lifetimes).
    104 template <typename T>
    105 tensorflow::gtl::MutableArraySlice<uint8> MutableByteSlice(std::vector<T>* v) {
    106   return tensorflow::gtl::MutableArraySlice<uint8>(
    107       reinterpret_cast<uint8*>(v->data()), v->size() * sizeof(T));
    108 }
    109 
    110 // Turns an immutable slice of type T into an immutable slice of bytes with the
    111 // same byte size.
    112 template <typename T>
    113 tensorflow::gtl::ArraySlice<uint8> CastToByteSlice(
    114     tensorflow::gtl::ArraySlice<T> slice) {
    115   return tensorflow::gtl::ArraySlice<uint8>(
    116       reinterpret_cast<const uint8*>(slice.data()), slice.size() * sizeof(T));
    117 }
    118 
    119 // Casts a byte slice to a non-byte type T, checking that the original slice
    120 // length is a multiple of sizeof(T).
    121 template <typename T>
    122 tensorflow::gtl::ArraySlice<T> CastByteSlice(
    123     tensorflow::gtl::ArraySlice<uint8> slice) {
    124   CHECK_EQ(0, slice.size() % sizeof(T));
    125   return tensorflow::gtl::ArraySlice<T>(
    126       reinterpret_cast<const T*>(slice.data()), slice.size() / sizeof(T));
    127 }
    128 
    129 // Convenience function to force a vector to convert to an immutable slice.
    130 template <typename T>
    131 tensorflow::gtl::ArraySlice<T> AsSlice(const std::vector<T>& v) {
    132   return tensorflow::gtl::ArraySlice<T>(v);
    133 }
    134 
    135 // Converts a mutable vector pointer into a MutableArraySlice of the same
    136 // type.
    137 template <typename T>
    138 tensorflow::gtl::MutableArraySlice<T> AsMutableSlice(std::vector<T>* v) {
    139   return tensorflow::gtl::MutableArraySlice<T>(v->data(), v->size());
    140 }
    141 
    142 // xla::int64 is not the same type as tensorflow::protobuf_int64 in open-source.
    143 // Wrapper function that gives an int64 array slice view of a repeated int64
    144 // protobuf field.
    145 static inline tensorflow::gtl::ArraySlice<int64> AsInt64Slice(
    146     const tensorflow::protobuf::RepeatedField<tensorflow::protobuf_int64>& v) {
    147   tensorflow::gtl::ArraySlice<tensorflow::protobuf_int64> slice(v);
    148   return tensorflow::gtl::ArraySlice<int64>(
    149       reinterpret_cast<const int64*>(slice.data()), slice.size());
    150 }
    151 
    152 // As above, but for uint64 types.
    153 static inline tensorflow::gtl::ArraySlice<uint64> AsUInt64Slice(
    154     const tensorflow::protobuf::RepeatedField<tensorflow::protobuf_uint64>& v) {
    155   tensorflow::gtl::ArraySlice<tensorflow::protobuf_uint64> slice(v);
    156   return tensorflow::gtl::ArraySlice<uint64>(
    157       reinterpret_cast<const uint64*>(slice.data()), slice.size());
    158 }
    159 
    160 // Compares two containers for equality. Returns true iff the two containers
    161 // have the same size and all their elements compare equal using their
    162 // operator==. Like std::equal, but forces size equality.
    163 template <typename Container1T, typename Container2T>
    164 bool ContainersEqual(const Container1T& c1, const Container2T& c2) {
    165   return ((c1.size() == c2.size()) &&
    166           std::equal(std::begin(c1), std::end(c1), std::begin(c2)));
    167 }
    168 
    169 template <typename Container1T,
    170           typename ElementType = typename Container1T::value_type>
    171 bool ContainersEqual(const Container1T& c1,
    172                      std::initializer_list<ElementType> il) {
    173   tensorflow::gtl::ArraySlice<ElementType> c2{il};
    174   return ContainersEqual(c1, c2);
    175 }
    176 
    177 // Compares two containers for equality. Returns true iff the two containers
    178 // have the same size and all their elements compare equal using the predicate
    179 // p. Like std::equal, but forces size equality.
    180 template <typename Container1T, typename Container2T, class PredicateT>
    181 bool ContainersEqual(const Container1T& c1, const Container2T& c2,
    182                      PredicateT p) {
    183   return ((c1.size() == c2.size()) &&
    184           std::equal(std::begin(c1), std::end(c1), std::begin(c2), p));
    185 }
    186 
    187 // Performs a copy of count values from src to dest, using different strides for
    188 // source and destination. The source starting index is src_base, while the
    189 // destination one is dest_base.
    190 template <typename D, typename S>
    191 void StridedCopy(tensorflow::gtl::MutableArraySlice<D> dest, int64 dest_base,
    192                  int64 dest_stride, tensorflow::gtl::ArraySlice<S> src,
    193                  int64 src_base, int64 src_stride, int64 count) {
    194   for (; count > 0; --count, dest_base += dest_stride, src_base += src_stride) {
    195     dest[dest_base] = static_cast<D>(src[src_base]);
    196   }
    197 }
    198 
    199 // Adds some context information to the error message in a
    200 // Status.  This is useful as Statuses are
    201 // propagated upwards.
    202 Status AddStatus(Status prior, tensorflow::StringPiece context);
    203 Status AppendStatus(Status prior, tensorflow::StringPiece context);
    204 
    205 // Status error shorthands -- printfs the arguments to be
    206 // used as an error message and returns a status in the canonical
    207 // error space.
    208 Status InvalidArgument(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2);
    209 Status Unimplemented(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2);
    210 Status InternalError(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2);
    211 Status FailedPrecondition(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2);
    212 Status Cancelled(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2);
    213 Status ResourceExhausted(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2);
    214 Status NotFound(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2);
    215 Status Unavailable(const char* format, ...) TF_PRINTF_ATTRIBUTE(1, 2);
    216 
    217 // Passed-varargs variant of the InvalidArgument factory above.
    218 Status InvalidArgumentV(const char* format, va_list args);
    219 
    220 template <typename... Args>
    221 Status UnimplementedStrCat(Args&&... concat) {
    222   return Unimplemented(
    223       "%s", tensorflow::strings::StrCat(std::forward<Args>(concat)...).c_str());
    224 }
    225 
    226 template <typename... Args>
    227 Status InternalErrorStrCat(Args&&... concat) {
    228   return InternalError(
    229       "%s", tensorflow::strings::StrCat(std::forward<Args>(concat)...).c_str());
    230 }
    231 
    232 template <typename... Args>
    233 Status ResourceExhaustedStrCat(Args&&... concat) {
    234   return ResourceExhausted(
    235       "%s", tensorflow::strings::StrCat(std::forward<Args>(concat)...).c_str());
    236 }
    237 
    238 // Splits the lines of the original, replaces leading whitespace with the prefix
    239 // given by "indentation", and returns the string joined by newlines again. As a
    240 // side effect, any additional trailing whitespace is removed.
    241 //
    242 // Note: even different amounts of leading whitespace on different lines will be
    243 // uniformly replaced with "indentation".
    244 string Reindent(tensorflow::StringPiece original,
    245                 tensorflow::StringPiece indentation);
    246 
    247 // Checks whether permutation is a permutation of the [0, rank) integer range.
    248 bool IsPermutation(tensorflow::gtl::ArraySlice<int64> permutation, int64 rank);
    249 
    250 // Applies `permutation` on `input` and returns the permuted array.
    251 // For each i, output[permutation[i]] = input[i].
    252 //
    253 // Precondition:
    254 // 1. `permutation` is a permutation of 0..permutation.size()-1.
    255 // 2. permutation.size() == input.size().
    256 template <template <typename...> class C, typename T>
    257 std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation,
    258                        C<T> input) {
    259   tensorflow::gtl::ArraySlice<T> data(input);
    260   CHECK(IsPermutation(permutation, data.size()));
    261   std::vector<T> output(data.size());
    262   for (size_t i = 0; i < permutation.size(); ++i) {
    263     output[permutation[i]] = data[i];
    264   }
    265   return output;
    266 }
    267 
    268 // Override of the above that works around compile failures with gcc 7.1.1.
    269 // For details see https://github.com/tensorflow/tensorflow/issues/10843
    270 // Hide this workaround from MSVC as it causes ambiguous error.
    271 #ifndef _MSC_VER
    272 template <typename T>
    273 std::vector<T> Permute(tensorflow::gtl::ArraySlice<int64> permutation,
    274                        const std::vector<T>& input) {
    275   return Permute<std::vector, T>(permutation, input);
    276 }
    277 #endif
    278 
    279 // Inverts a permutation, i.e., output_permutation[input_permutation[i]] = i.
    280 std::vector<int64> InversePermutation(
    281     tensorflow::gtl::ArraySlice<int64> input_permutation);
    282 
    283 // Composes two permutations: output[i] = p1[p2[i]].
    284 std::vector<int64> ComposePermutations(tensorflow::gtl::ArraySlice<int64> p1,
    285                                        tensorflow::gtl::ArraySlice<int64> p2);
    286 
    287 // Returns true iff permutation == {0, 1, 2, ...}.
    288 bool IsIdentityPermutation(tensorflow::gtl::ArraySlice<int64> permutation);
    289 
    290 template <typename Container>
    291 int64 PositionInContainer(const Container& container, int64 value) {
    292   return std::distance(container.begin(),
    293                        std::find(container.begin(), container.end(), value));
    294 }
    295 
    296 // Formats the container as a comma-separated string. StrAppend must support
    297 // appending the elements of the container. Prefix is prepended and suffix is
    298 // appended to the returned string.
    299 template <typename Container>
    300 string CommaSeparatedString(const Container& c, const char* prefix = "",
    301                             const char* suffix = "") {
    302   // Not using Join() since the implementation here is simple anyway and this
    303   // avoids copying the string to append prefix.
    304   string comma_separated = prefix;
    305   const char* separator = "";
    306   for (const auto& entry : c) {
    307     tensorflow::strings::StrAppend(&comma_separated, separator, entry);
    308     separator = ", ";
    309   }
    310   comma_separated += suffix;
    311   return comma_separated;
    312 }
    313 
    314 // Overload needed to allow the container to be an initializer list. The default
    315 // type for T makes an empty initializer list work as well.
    316 template <typename T = int>
    317 string CommaSeparatedString(const std::initializer_list<T>& c,
    318                             const char* prefix = "", const char* suffix = "") {
    319   return CommaSeparatedString<std::initializer_list<T>>(c, prefix, suffix);
    320 }
    321 
    322 // Formats the container in the mathematical notation for a vector, e.g. (1, 3,
    323 // 7). StrAppend must support appending the elements of c.
    324 template <typename Container>
    325 string VectorString(const Container& c) {
    326   return CommaSeparatedString(c, "(", ")");
    327 }
    328 
    329 // Overload needed to allow the container to be an initializer list. The default
    330 // type for T makes an empty initializer list work as well.
    331 template <typename T = int>
    332 string VectorString(const std::initializer_list<T>& c) {
    333   return VectorString<std::initializer_list<T>>(c);
    334 }
    335 
    336 // Returns a PaddingConfig object that represents no padding for the given rank.
    337 PaddingConfig MakeNoPaddingConfig(int64 rank);
    338 
    339 // Returns a PaddingConfig object where 'padding' contains
    340 // (low edge padding, high edge padding) pairs for each dimension.
    341 PaddingConfig MakeEdgePaddingConfig(
    342     tensorflow::gtl::ArraySlice<std::pair<int64, int64>> padding);
    343 
    344 // Returns true if the padding configuration has at least one dimension with
    345 // non-zero interior padding.
    346 bool HasInteriorPadding(const PaddingConfig& config);
    347 
    348 // Imports the templated FloorOfRatio math function from the TensorFlow
    349 // namespace, as it is very commonly used.
    350 template <typename T>
    351 T FloorOfRatio(T dividend, T divisor) {
    352   return tensorflow::MathUtil::FloorOfRatio<T>(dividend, divisor);
    353 }
    354 
    355 // Imports the templated CeilOfRatio math function from the TensorFlow
    356 // namespace, as it is very commonly used.
    357 template <typename T>
    358 T CeilOfRatio(T dividend, T divisor) {
    359   return tensorflow::MathUtil::CeilOfRatio<T>(dividend, divisor);
    360 }
    361 
    362 // Rounds the value up to a multiple of the divisor by first calling CeilOfRatio
    363 // then multiplying by the divisor. For example: RoundUpToNearest(13, 8) => 16
    364 template <typename T>
    365 T RoundUpToNearest(T value, T divisor) {
    366   return CeilOfRatio(value, divisor) * divisor;
    367 }
    368 
    369 // Rounds the value down to a multiple of the divisor by first calling
    370 // FloorOfRatio then multiplying by the divisor. For example:
    371 // RoundDownToNearest(13, 8) => 8
    372 template <typename T>
    373 T RoundDownToNearest(T value, T divisor) {
    374   return FloorOfRatio(value, divisor) * divisor;
    375 }
    376 
    377 // Given a number of flops executed in an amount of time, produces a string that
    378 // represents the throughput;
    379 // e.g. HumanReadableNumFlops(1e9, 1e9) => 1.00GFLOP/s.
    380 string HumanReadableNumFlops(double flops, double nanoseconds);
    381 
    382 // Given a number of transcendental ops executed in an amount of time, produces
    383 // a string that represents the throughput;
    384 // e.g. HumanReadableNumTranscendentalOps(1e9, 1e9) => 1.00GTROP/s.
    385 string HumanReadableNumTranscendentalOps(double trops, double nanoseconds);
    386 
    387 // Split the text into multiple lines and log each line with the given
    388 // severity, filename, and line number.
    389 void LogLines(int sev, tensorflow::StringPiece text, const char* fname,
    390               int lineno);
    391 
    392 template <typename T>
    393 inline bool IsPowerOfTwo(T x) {
    394   static_assert(!std::numeric_limits<T>::is_signed, "unsigned types only");
    395   return x != 0 && (x & (x - 1)) == 0;
    396 }
    397 
    398 // Returns a mask with "bits" number of least significant bits set.
    399 inline uint32 LsbMaskU32(int bits) {
    400   CHECK_GE(bits, 0);
    401   return (1U << bits) - 1;
    402 }
    403 
    404 // Utility for performing a static_cast<> on a std::unique_ptr<>.
    405 template <typename Derived, typename Base>
    406 std::unique_ptr<Derived> unique_ptr_static_cast(std::unique_ptr<Base> ptr) {
    407   return std::unique_ptr<Derived>(static_cast<Derived*>(ptr.release()));
    408 }
    409 
    410 int64 Product(tensorflow::gtl::ArraySlice<int64> xs);
    411 
    412 // Returns the start indices of consecutive non-overlapping subsequences of `a`
    413 // and `b` with the same product, i.e. `(i, j)` so
    414 //  a = {a[0 = i_0], ..., a[i_1 - 1], a[i_1], ... , a[i_2 - 1], ...}
    415 //  b = {b[0 = j_0], ..., b[j_1 - 1], b[j_1], ... , b[j_2 - 1], ...}
    416 //   k . 0 <= k < CommonFactors(a, b).size - 1 =>
    417 //         a[i_k]  a[i_k + 1]  ...  a[i_(k+1) - 1] =
    418 //         b[j_k]  b[j_k + 1]  ...  b[j_(k+1) - 1]
    419 // where `CommonFactors(a, b)[CommonFactors(a, b).size - 1] = (a.size, b.size)`
    420 //
    421 // If the given shapes have non-zero size, returns the bounds of the shortest
    422 // possible such subsequences; else, returns `{(0, 0), (a.size, b.size)}`.
    423 std::vector<std::pair<int64, int64>> CommonFactors(
    424     tensorflow::gtl::ArraySlice<int64> a, tensorflow::gtl::ArraySlice<int64> b);
    425 
    426 // Removes illegal characters from filenames.
    427 string SanitizeFileName(string file_name);
    428 
    429 template <typename Container, typename Predicate>
    430 bool c_all_of(Container container, Predicate predicate) {
    431   return std::all_of(std::begin(container), std::end(container), predicate);
    432 }
    433 
    434 template <typename InputContainer, typename OutputIterator,
    435           typename UnaryOperation>
    436 OutputIterator c_transform(InputContainer input_container,
    437                            OutputIterator output_iterator,
    438                            UnaryOperation unary_op) {
    439   return std::transform(std::begin(input_container), std::end(input_container),
    440                         output_iterator, unary_op);
    441 }
    442 
    443 template <class InputContainer, class OutputIterator, class UnaryPredicate>
    444 OutputIterator c_copy_if(InputContainer input_container,
    445                          OutputIterator output_iterator,
    446                          UnaryPredicate predicate) {
    447   return std::copy_if(std::begin(input_container), std::end(input_container),
    448                       output_iterator, predicate);
    449 }
    450 
    451 template <class InputContainer, class OutputIterator>
    452 OutputIterator c_copy(InputContainer input_container,
    453                       OutputIterator output_iterator) {
    454   return std::copy(std::begin(input_container), std::end(input_container),
    455                    output_iterator);
    456 }
    457 
    458 template <class InputContainer>
    459 void c_sort(InputContainer& input_container) {
    460   std::sort(std::begin(input_container), std::end(input_container));
    461 }
    462 
    463 template <class InputContainer, class Comparator>
    464 void c_sort(InputContainer& input_container, Comparator comparator) {
    465   std::sort(std::begin(input_container), std::end(input_container), comparator);
    466 }
    467 
    468 template <typename Sequence, typename T>
    469 bool c_binary_search(Sequence& sequence, T&& value) {
    470   return std::binary_search(std::begin(sequence), std::end(sequence),
    471                             std::forward<T>(value));
    472 }
    473 
    474 template <typename C>
    475 bool c_is_sorted(const C& c) {
    476   return std::is_sorted(std::begin(c), std::end(c));
    477 }
    478 
    479 template <typename C>
    480 auto c_adjacent_find(const C& c) -> decltype(std::begin(c)) {
    481   return std::adjacent_find(std::begin(c), std::end(c));
    482 }
    483 }  // namespace xla
    484 
    485 #define XLA_LOG_LINES(SEV, STRING) \
    486   ::xla::LogLines(SEV, STRING, __FILE__, __LINE__)
    487 
    488 #define XLA_VLOG_LINES(LEVEL, STRING)                                 \
    489   do {                                                                \
    490     if (VLOG_IS_ON(LEVEL)) XLA_LOG_LINES(::tensorflow::INFO, STRING); \
    491   } while (false);
    492 
    493 // Utility macro that performs the equivalent of what one would expect
    494 // LOG_LINES(FATAL, X) to do but can be used at the end of a function that
    495 // returns a value without getting a compiler warning that no value is returned.
    496 #define XLA_FATAL_LOG(X)                 \
    497   XLA_LOG_LINES(::tensorflow::ERROR, X); \
    498   LOG(FATAL) << "Aborting in " << __FUNCTION__ << " due to previous errors.";
    499 
    500 #endif  // TENSORFLOW_COMPILER_XLA_UTIL_H_
    501