1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_COMPILER_XLA_ARRAY_H_ 17 #define TENSORFLOW_COMPILER_XLA_ARRAY_H_ 18 19 #include <algorithm> 20 #include <array> 21 #include <functional> 22 #include <initializer_list> 23 #include <iterator> 24 #include <memory> 25 #include <numeric> 26 #include <random> 27 #include <type_traits> 28 #include <vector> 29 30 #include "tensorflow/compiler/xla/status.h" 31 #include "tensorflow/compiler/xla/types.h" 32 #include "tensorflow/core/lib/core/bits.h" 33 #include "tensorflow/core/lib/strings/str_util.h" 34 #include "tensorflow/core/lib/strings/strcat.h" 35 #include "tensorflow/core/platform/logging.h" 36 #include "tensorflow/core/platform/macros.h" 37 #include "tensorflow/core/platform/types.h" 38 39 namespace xla { 40 41 namespace array_impl { 42 43 // conjunction 44 // 45 // Performs a compile-time logical AND operation on the passed types (which 46 // must have `::value` members convertible to `bool`. Short-circuits if it 47 // encounters any `false` members (and does not compare the `::value` members 48 // of any remaining arguments). 49 // 50 // This metafunction is designed to be a drop-in replacement for the C++17 51 // `std::conjunction` metafunction. 52 template <typename... Ts> 53 struct conjunction; 54 55 template <typename T, typename... Ts> 56 struct conjunction<T, Ts...> 57 : std::conditional<T::value, conjunction<Ts...>, T>::type {}; 58 59 template <> 60 struct conjunction<> : std::true_type {}; 61 62 // A type trait that is valid when all elements in a parameter pack are of 63 // integral type. 64 template <typename... T> 65 using pack_is_integral = conjunction<std::is_integral<T>...>; 66 67 // Compares three same-sized vectors elementwise. For each item in `values`, 68 // returns false if any of values[i] is outside the half-open range [starts[i], 69 // ends[i]). 70 template <typename C1, typename C2, typename C3> 71 bool all_inside_range(const C1& values, const C2& range_starts, 72 const C3& range_ends) { 73 for (size_t i = 0, e = values.size(); i < e; ++i) { 74 if (values[i] < range_starts[i] || values[i] >= range_ends[i]) { 75 return false; 76 } 77 } 78 return true; 79 } 80 81 } // namespace array_impl 82 83 // General N dimensional array class with arbitrary value type. 84 template <typename T> 85 class Array { 86 public: 87 // Type inference can have a hard time parsing very deep initializer list 88 // nests, especially if one or more dimensions is one as the compiler just 89 // sees a single-element integer initializer. These typedefs allow casting 90 // explicitly with less typing. 91 using InitializerList1D = std::initializer_list<T>; 92 using InitializerList2D = std::initializer_list<InitializerList1D>; 93 using InitializerList3D = std::initializer_list<InitializerList2D>; 94 using InitializerList4D = std::initializer_list<InitializerList3D>; 95 96 using value_type = T; 97 98 // Creates a new array with the specified dimensions. 99 explicit Array(tensorflow::gtl::ArraySlice<int64> sizes) 100 : Array(sizes, T()) {} 101 102 // Creates a new array with the specified dimensions and specified value for 103 // every cell. 104 Array(tensorflow::gtl::ArraySlice<int64> sizes, T value) 105 : sizes_(sizes.begin(), sizes.end()), values_(new T[num_elements()]) { 106 Fill(value); 107 } 108 109 // Creates a 2D array from the given nested initializer list. The outer 110 // initializer list is the first dimension, the inner is the second dimension. 111 // For example, {{1, 2, 3}, {4, 5, 6}} results in an array with n1=2 and n2=3. 112 Array(InitializerList2D values) 113 : Array(ToInt64Vector({values.size(), values.begin()->size()})) { 114 int64 idx = 0; 115 for (const auto& it1 : values) { 116 for (const auto& it2 : it1) { 117 values_[idx] = it2; 118 ++idx; 119 } 120 } 121 CHECK(idx == num_elements()); 122 } 123 124 // Creates a 2D array of Eigen::half from the given nested initializer list of 125 // float values. 126 template <typename T2, typename = typename std::enable_if< 127 std::is_same<T, Eigen::half>::value && 128 std::is_same<T2, float>::value>::type> 129 Array(std::initializer_list<std::initializer_list<T2>> values) 130 : Array(ToInt64Vector({values.size(), values.begin()->size()})) { 131 int64 idx = 0; 132 for (const auto& it1 : values) { 133 for (const auto& it2 : it1) { 134 values_[idx] = static_cast<T>(it2); 135 ++idx; 136 } 137 } 138 CHECK(idx == num_elements()); 139 } 140 141 // Creates a 3D array from the given nested initializer list. The outer 142 // initializer list is the first dimension, and so on. 143 Array(InitializerList3D values) 144 : Array(ToInt64Vector({values.size(), values.begin()->size(), 145 values.begin()->begin()->size()})) { 146 int64 idx = 0; 147 for (const auto& it1 : values) { 148 for (const auto& it2 : it1) { 149 for (const auto& it3 : it2) { 150 values_[idx] = it3; 151 ++idx; 152 } 153 } 154 } 155 CHECK(idx == num_elements()); 156 } 157 158 // Creates a 3D array of Eigen::half from the given nested initializer list of 159 // float values. 160 template <typename T2, typename = typename std::enable_if< 161 std::is_same<T, Eigen::half>::value && 162 std::is_same<T2, float>::value>::type> 163 Array(std::initializer_list<std::initializer_list<std::initializer_list<T2>>> 164 values) 165 : Array(ToInt64Vector({values.size(), values.begin()->size(), 166 values.begin()->begin()->size()})) { 167 int64 idx = 0; 168 for (const auto& it1 : values) { 169 for (const auto& it2 : it1) { 170 for (const auto& it3 : it2) { 171 values_[idx] = static_cast<T>(it3); 172 ++idx; 173 } 174 } 175 } 176 CHECK(idx == num_elements()); 177 } 178 179 // Creates a 4D array from the given nested initializer list. The outer 180 // initializer list is the first dimension, and so on. 181 Array(InitializerList4D values) 182 : Array(ToInt64Vector({values.size(), values.begin()->size(), 183 values.begin()->begin()->size(), 184 values.begin()->begin()->begin()->size()})) { 185 int64 idx = 0; 186 for (const auto& it1 : values) { 187 for (const auto& it2 : it1) { 188 for (const auto& it3 : it2) { 189 for (const auto& it4 : it3) { 190 values_[idx] = it4; 191 ++idx; 192 } 193 } 194 } 195 } 196 CHECK(idx == num_elements()); 197 } 198 199 // Creates a 4D array of Eigen::half from the given nested initializer list of 200 // float values. 201 template <typename T2, typename = typename std::enable_if< 202 std::is_same<T, Eigen::half>::value && 203 std::is_same<T2, float>::value>::type> 204 Array(std::initializer_list< 205 std::initializer_list<std::initializer_list<std::initializer_list<T2>>>> 206 values) 207 : Array(ToInt64Vector({values.size(), values.begin()->size(), 208 values.begin()->begin()->size(), 209 values.begin()->begin()->begin()->size()})) { 210 int64 idx = 0; 211 for (const auto& it1 : values) { 212 for (const auto& it2 : it1) { 213 for (const auto& it3 : it2) { 214 for (const auto& it4 : it3) { 215 values_[idx] = static_cast<T>(it4); 216 ++idx; 217 } 218 } 219 } 220 } 221 CHECK(idx == num_elements()); 222 } 223 224 Array(const Array<T>& other) 225 : sizes_(other.sizes_), values_(new T[num_elements()]) { 226 std::copy(&other.values_[0], &other.values_[0] + num_elements(), 227 &values_[0]); 228 } 229 230 Array<T>& operator=(const Array<T>& other) { 231 sizes_ = other.sizes_; 232 values_.reset(new T[num_elements()]); 233 std::copy(&other.values_[0], &other.values_[0] + num_elements(), 234 &values_[0]); 235 return *this; 236 } 237 238 // Fills the array with the specified value. 239 void Fill(const T& value) { 240 std::fill(&values_[0], &values_[0] + num_elements(), value); 241 } 242 243 // Fills the array with sequentially increasing values. 244 void FillIota(const T& value) { 245 std::iota(&values_[0], &values_[0] + num_elements(), value); 246 } 247 248 // Fills the array with the sequence i*multiplier for i=0,1,... 249 void FillWithMultiples(const T& multiplier) { 250 for (int64 i = 0; i < num_elements(); ++i) { 251 values_[i] = static_cast<T>(i) * multiplier; 252 } 253 } 254 255 // Fills the array with random normal variables with the specified mean. 256 void FillRandom(const T& value, const double mean = 0.0, 257 const int seed = 12345) { 258 std::mt19937 g(seed); 259 std::normal_distribution<double> distribution(mean, 260 static_cast<double>(value)); 261 for (int64 i = 0; i < num_elements(); ++i) { 262 values_[i] = static_cast<T>(distribution(g)); 263 } 264 } 265 266 // Sets all the values in the array to values specified in the container. 267 template <typename Container = std::initializer_list<T>> 268 void SetValues(const Container& container) { 269 CHECK_EQ(std::distance(std::begin(container), std::end(container)), 270 num_elements()); 271 std::copy(std::begin(container), std::end(container), &values_[0]); 272 } 273 274 // Invokes a callback with the (indices, value_ptr) for each cell in the 275 // array. 276 void Each(std::function<void(tensorflow::gtl::ArraySlice<int64>, T*)> f) { 277 std::vector<int64> index(sizes_.size()); 278 for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { 279 f(index, &values_[i]); 280 } 281 } 282 283 // Invokes a callback with the (indices, value) for each cell in the array. 284 void Each( 285 std::function<void(tensorflow::gtl::ArraySlice<int64>, T)> f) const { 286 std::vector<int64> index(sizes_.size()); 287 for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { 288 f(index, values_[i]); 289 } 290 } 291 292 // Invokes a callback with the (indices, value_ptr) for each cell in the 293 // array. If a callback returns a non-OK status, returns that else returns 294 // Status::OK(). 295 Status EachStatus( 296 std::function<Status(tensorflow::gtl::ArraySlice<int64>, T*)> f) { 297 std::vector<int64> index(sizes_.size()); 298 for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { 299 Status s = f(index, &values_[i]); 300 if (!s.ok()) { 301 return s; 302 } 303 } 304 return Status::OK(); 305 } 306 307 // Invokes a callback with the (indices, value) for each cell in the array. 308 // If a callback returns a non-OK status, returns that else returns 309 // Status::OK(). 310 Status EachStatus( 311 std::function<Status(tensorflow::gtl::ArraySlice<int64>, T)> f) const { 312 std::vector<int64> index(sizes_.size()); 313 for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { 314 Status s = f(index, values_[i]); 315 if (!s.ok()) { 316 return s; 317 } 318 } 319 return Status::OK(); 320 } 321 322 // Returns the value at the cell specified by the indexes. The number of 323 // arguments have to match with the number of dimensions for the array. 324 // 325 // The type trait is required to avoid this overload participating too 326 // eagerly; a parameter pack can take zero or more elements, so we must 327 // restrict this to only parameter packs that are all of integral type. 328 template <typename... Dims> 329 typename std::enable_if<array_impl::pack_is_integral<Dims...>::value, 330 const T&>::type 331 operator()(Dims... dims) const { 332 // We are using a std::array to avoid having to allocate memory in this 333 // function for performance reasons. 334 std::array<int64, sizeof...(dims)> indexes{{static_cast<int64>(dims)...}}; 335 return values_[calculate_index(indexes)]; 336 } 337 338 // Returns the value at the cell specified by the indexes. The number of 339 // arguments have to match with the number of dimensions for the array. 340 template <typename... Dims> 341 typename std::enable_if<array_impl::pack_is_integral<Dims...>::value, 342 T&>::type 343 operator()(Dims... dims) { 344 // We are using a std::array to avoid having to allocate memory in this 345 // function for performance reasons. 346 std::array<int64, sizeof...(dims)> indexes{{static_cast<int64>(dims)...}}; 347 return values_[calculate_index(indexes)]; 348 } 349 350 // Returns the value at the cell specified by the indexes. The number of 351 // arguments have to match with the number of dimensions for the array. 352 const T& operator()(tensorflow::gtl::ArraySlice<int64> indexes) const { 353 return values_[calculate_index(indexes)]; 354 } 355 356 // Returns the value at the cell specified by the indexes. The number of 357 // arguments have to match with the number of dimensions for the array. 358 T& operator()(tensorflow::gtl::ArraySlice<int64> indexes) { 359 return values_[calculate_index(indexes)]; 360 } 361 362 // Low-level accessor for stuff like memcmp, handle with care. Returns pointer 363 // to the underlying storage of the array (similarly to std::vector::data()). 364 T* data() const { 365 // TODO(tberghammer): Get rid of the const_cast. Currently it is needed 366 // because the Eigen backend needs a non-const pointers even for reading 367 // from the array. 368 return const_cast<Array*>(this)->values_.get(); 369 } 370 371 // Returns the size of the dimension at the given index. 372 int64 dim(int64 n) const { 373 CHECK(n < sizes_.size()); 374 return sizes_[n]; 375 } 376 377 // Returns a vector containing the dimensions of the array. 378 const std::vector<int64>& dimensions() const { return sizes_; } 379 380 int64 num_dimensions() const { return sizes_.size(); } 381 382 // Returns the total number of elements in the array. 383 int64 num_elements() const { 384 return std::accumulate(sizes_.begin(), sizes_.end(), 1, 385 std::multiplies<int64>()); 386 } 387 388 const T* begin() const { return &values_[0]; } 389 T* begin() { return &values_[0]; } 390 const T* end() const { return &values_[num_elements()]; } 391 T* end() { return &values_[num_elements()]; } 392 393 bool operator==(const Array<T>& other) const { 394 if (sizes_.size() != other.sizes_.size()) { 395 return false; 396 } 397 for (int64 i = 0; i < sizes_.size(); ++i) { 398 if (sizes_[i] != other.sizes_[i]) { 399 return false; 400 } 401 } 402 for (int64 i = 0; i < num_elements(); ++i) { 403 if (values_[i] != other.values_[i]) { 404 return false; 405 } 406 } 407 return true; 408 } 409 410 bool operator!=(const Array<T>& other) const { return !(*this == other); } 411 412 // Performs the equivalent of a slice operation on this array. 413 Array<T> Slice(tensorflow::gtl::ArraySlice<int64> starts, 414 tensorflow::gtl::ArraySlice<int64> limits) const { 415 CHECK_EQ(starts.size(), num_dimensions()); 416 CHECK_EQ(limits.size(), num_dimensions()); 417 418 std::vector<int64> sizes; 419 std::transform(starts.begin(), starts.end(), limits.begin(), 420 std::back_inserter(sizes), 421 [](int64 start, int64 limit) { return limit - start; }); 422 Array<T> result(sizes); 423 424 std::vector<int64> index(sizes_.size()); 425 int64 slice_i = 0; 426 for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { 427 if (array_impl::all_inside_range(index, starts, limits)) { 428 // Even though the bounds of result are different to our bounds, we're 429 // iterating in the same order. So we can simply write successive linear 430 // indices instead of recalculating a multi-dimensional index. 431 result.values_[slice_i++] = values_[i]; 432 } 433 } 434 return result; 435 } 436 437 // Performs the equivalent of a DynamicUpdateSlice in-place on this array. 438 void UpdateSlice(const Array<T>& from, 439 tensorflow::gtl::ArraySlice<int64> start_indices) { 440 CHECK_EQ(from.num_dimensions(), num_dimensions()); 441 std::vector<int64> limit_indices; 442 std::transform(start_indices.begin(), start_indices.end(), 443 from.dimensions().begin(), std::back_inserter(limit_indices), 444 std::plus<int64>{}); 445 std::vector<int64> index(sizes_.size()); 446 int64 from_i = 0; 447 for (int64 i = 0; i < num_elements(); ++i, next_index(&index)) { 448 if (array_impl::all_inside_range(index, start_indices, limit_indices)) { 449 // Even though the bounds of from are different to our bounds, we're 450 // iterating in the same order. So we can simply write successive linear 451 // indices instead of recalculating a multi-dimensional index. 452 values_[i] = from.values_[from_i++]; 453 } 454 } 455 } 456 457 // Performs an in-place reshape, modifying the dimensions but not the 458 // underlying data. 459 void Reshape(tensorflow::gtl::ArraySlice<int64> new_dimensions) { 460 int64 old_num_elements = num_elements(); 461 sizes_ = std::vector<int64>(new_dimensions.begin(), new_dimensions.end()); 462 CHECK_EQ(num_elements(), old_num_elements); 463 } 464 465 // Returns a string representation of the array suitable for debugging. 466 string ToString() const { 467 std::vector<string> pieces; 468 std::vector<int64> index(sizes_.size()); 469 do { 470 // Emit leading spaces and opening square brackets 471 if (index.back() == 0) { 472 for (int64 i = sizes_.size() - 1; i >= 0; --i) { 473 if (i == 0 || index[i - 1] != 0) { 474 for (int64 j = 0; j < sizes_.size(); ++j) { 475 pieces.push_back(j < i ? " " : "["); 476 } 477 break; 478 } 479 } 480 } 481 482 pieces.push_back( 483 tensorflow::strings::AlphaNum(values_[calculate_index(index)]) 484 .data()); 485 486 // Emit comma if it isn't the last element 487 if (index.back() != sizes_.back() - 1) { 488 pieces.push_back(", "); 489 } 490 491 // Emit closing square brackets 492 for (int64 i = sizes_.size() - 1; i >= 0; --i) { 493 if (index[i] != sizes_[i] - 1) { 494 break; 495 } 496 pieces.push_back("]"); 497 if (i != 0 && index[i - 1] != sizes_[i - 1] - 1) { 498 pieces.push_back(",\n"); 499 } 500 } 501 } while (next_index(&index)); 502 return tensorflow::str_util::Join(pieces, ""); 503 } 504 505 private: 506 // Converts an initializer_list of type U to a vector of type int64. Used by 507 // the initializer list based constructors to convert the size type into int64 508 // to be passed to the size based constructor. 509 template <typename U> 510 static std::vector<int64> ToInt64Vector( 511 const std::initializer_list<U>& data) { 512 return std::vector<int64>(data.begin(), data.end()); 513 } 514 515 // Returns the linear index from the list of per-dimension indexes. Function 516 // is templated so can be used with an std::array from operator() to avoid 517 // memory allocation. 518 template <typename U> 519 int64 calculate_index(const U& indexes) const { 520 CHECK_EQ(sizes_.size(), indexes.size()); 521 int64 index = 0; 522 for (int64 i = 0; i < sizes_.size(); ++i) { 523 index *= sizes_[i]; 524 index += indexes[i]; 525 } 526 return index; 527 } 528 529 // Advances the specified set of indexes and returns true if we haven't 530 // wrapped around (i.e. result isn't {0, 0, ...}). 531 bool next_index(std::vector<int64>* index) const { 532 CHECK_EQ(index->size(), sizes_.size()); 533 for (int64 i = sizes_.size() - 1; i >= 0; --i) { 534 (*index)[i]++; 535 if ((*index)[i] < sizes_[i]) { 536 return true; 537 } 538 (*index)[i] = 0; 539 } 540 return false; 541 } 542 543 std::vector<int64> sizes_; 544 std::unique_ptr<T[]> values_; 545 }; 546 547 } // namespace xla 548 549 #endif // TENSORFLOW_COMPILER_XLA_ARRAY_H_ 550