1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/array_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #ifdef GOOGLE_CUDA 21 #define EIGEN_USE_GPU 22 #endif // GOOGLE_CUDA 23 24 #include <vector> 25 26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 27 28 #include "tensorflow/core/framework/numeric_op.h" 29 #include "tensorflow/core/framework/op_kernel.h" 30 #include "tensorflow/core/framework/register_types.h" 31 #include "tensorflow/core/framework/tensor.h" 32 #include "tensorflow/core/framework/tensor_types.h" 33 #include "tensorflow/core/framework/type_index.h" 34 #include "tensorflow/core/lib/core/errors.h" 35 #include "tensorflow/core/lib/gtl/array_slice.h" 36 #include "tensorflow/core/platform/macros.h" 37 #include "tensorflow/core/platform/types.h" 38 39 namespace tensorflow { 40 41 typedef Eigen::ThreadPoolDevice CPUDevice; 42 typedef Eigen::GpuDevice GPUDevice; 43 #ifdef TENSORFLOW_USE_SYCL 44 typedef Eigen::SyclDevice SYCLDevice; 45 #endif // TENSORFLOW_USE_SYCL 46 47 // Forward declarations of functors that will be defined in tile_ops_impl.h 48 namespace functor { 49 template <typename Device, typename T, typename Tmultiple> 50 struct Tile { 51 void operator()(const Device& d, Tensor* out, const Tensor& in, 52 const gtl::ArraySlice<Tmultiple> broadcast_array) const; 53 }; 54 55 template <typename Device, typename T, int NDIM> 56 struct TileGrad { 57 void operator()(const Device& d, typename TTypes<T, NDIM>::Tensor out, 58 typename TTypes<T, NDIM>::ConstTensor in, 59 const Eigen::DSizes<Eigen::DenseIndex, NDIM>& indices, 60 const Eigen::DSizes<Eigen::DenseIndex, NDIM>& sizes, 61 bool first) const; 62 }; 63 64 template <typename Device, typename T> 65 struct TileGrad<Device, T, 0> { 66 void operator()(const Device& d, typename TTypes<T, 0>::Tensor out, 67 typename TTypes<T, 0>::ConstTensor in, 68 const Eigen::DSizes<Eigen::DenseIndex, 0>&, 69 const Eigen::DSizes<Eigen::DenseIndex, 0>&, bool first) const; 70 }; 71 72 template <typename Device, typename T, int NDIM, int REDUCEDNDIM> 73 struct ReduceAndReshape { 74 void operator()( 75 const Device& d, typename TTypes<T, NDIM>::Tensor out, 76 typename TTypes<T, NDIM>::ConstTensor in, 77 const Eigen::DSizes<Eigen::DenseIndex, REDUCEDNDIM>& reduce_dim, 78 const Eigen::DSizes<Eigen::DenseIndex, NDIM>& reshape_dim) const; 79 }; 80 } // namespace functor 81 82 // -------------------------------------------------------------------------- 83 template <typename Device, typename Tmultiples> 84 class TileOp : public OpKernel { 85 public: 86 explicit TileOp(OpKernelConstruction* context) : OpKernel(context) {} 87 88 void Compute(OpKernelContext* context) override { 89 const Tensor& input = context->input(0); 90 const Tensor& multiples = context->input(1); 91 92 OP_REQUIRES( 93 context, IsLegacyVector(multiples.shape()), 94 errors::InvalidArgument("Expected multiples to be 1-D, but got shape ", 95 multiples.shape().DebugString())); 96 OP_REQUIRES(context, input.dims() == multiples.NumElements(), 97 errors::InvalidArgument( 98 "Expected multiples argument to be a vector of length ", 99 input.dims(), " but got length ", multiples.dim_size(0))); 100 const int input_dims = input.dims(); 101 102 // Eigen doesn't support scalars on the GPU, so handle 0-D specially 103 if (input_dims == 0) { 104 context->set_output(0, input); 105 return; 106 } 107 108 const gtl::ArraySlice<Tmultiples> multiples_array( 109 multiples.flat<Tmultiples>().data(), input_dims); 110 TensorShape output_shape; 111 for (int i = 0; i < input_dims; ++i) { 112 OP_REQUIRES( 113 context, multiples_array[i] >= 0, 114 errors::InvalidArgument("Expected multiples[", i, "] >= 0, but got ", 115 multiples_array[i])); 116 output_shape.AddDim(input.dim_size(i) * multiples_array[i]); 117 } 118 if (output_shape == input.shape()) { 119 context->set_output(0, input); 120 return; 121 } 122 Tensor* result = nullptr; 123 OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &result)); 124 125 // If there's no output, there's nothing to do. 126 if (output_shape.num_elements() == 0) return; 127 128 #define HANDLE_TYPE(DT) \ 129 if (context->input(0).dtype() == DT) { \ 130 HandleCase<DT>(context, multiples_array, result); \ 131 return; \ 132 } 133 134 #define HANDLE_TYPE_NAME(T) HANDLE_TYPE(DataTypeToEnum<T>::value) 135 136 // Invoke macro using TF_CALL_* so type-filtering for platform applies. 137 TF_CALL_bool(HANDLE_TYPE_NAME); 138 TF_CALL_float(HANDLE_TYPE_NAME); 139 TF_CALL_double(HANDLE_TYPE_NAME); 140 TF_CALL_uint8(HANDLE_TYPE_NAME); 141 TF_CALL_int32(HANDLE_TYPE_NAME); 142 TF_CALL_int16(HANDLE_TYPE_NAME); 143 TF_CALL_int64(HANDLE_TYPE_NAME); 144 TF_CALL_half(HANDLE_TYPE_NAME); 145 TF_CALL_string(HANDLE_TYPE_NAME); // when DEVICE=CPUDevice. 146 TF_CALL_complex64(HANDLE_TYPE_NAME); 147 TF_CALL_complex128(HANDLE_TYPE_NAME); 148 149 #undef HANDLE_TYPE_NAME 150 #undef HANDLE_TYPE 151 152 OP_REQUIRES(context, false, 153 errors::Unimplemented( 154 "TileOp : Unhandled input dimensions, DT : ", 155 context->input(0).dtype(), ", dims : ", input_dims)); 156 } 157 158 private: 159 template <DataType DT> 160 void HandleCaseImpl(OpKernelContext* context, 161 const gtl::ArraySlice<Tmultiples>& multiples_array, 162 Tensor* result) { 163 typedef typename EnumToDataType<DT>::Type T; 164 functor::Tile<Device, T, Tmultiples>()(context->eigen_device<Device>(), 165 result, context->input(0), 166 multiples_array); 167 } 168 169 template <DataType DT> 170 void HandleCase(OpKernelContext* context, 171 const gtl::ArraySlice<Tmultiples>& multiples_array, 172 Tensor* result); 173 174 TF_DISALLOW_COPY_AND_ASSIGN(TileOp); 175 }; 176 177 template <typename Device, typename Tmultiples> 178 template <DataType DT> 179 inline void TileOp<Device, Tmultiples>::HandleCase( 180 OpKernelContext* context, 181 const gtl::ArraySlice<Tmultiples>& multiples_array, Tensor* result) { 182 // TODO(vrv): print out the device name if useful. Currently disabled to avoid 183 // having to use RTTI. 184 LOG(FATAL) << "TileOp: Invalid combination of Device, DT: " 185 // << typeid(Device).name() << ", " 186 << DataTypeString(DT); 187 } 188 189 #define HANDLE_CASE(device, dtype, Tmultiples) \ 190 template <> \ 191 template <> \ 192 void TileOp<device, Tmultiples>::HandleCase<dtype>( \ 193 OpKernelContext * context, \ 194 const gtl::ArraySlice<Tmultiples>& multiples_array, Tensor* result) { \ 195 HandleCaseImpl<dtype>(context, multiples_array, result); \ 196 } 197 198 #define HANDLE_TYPE_NAME_CPU(T) \ 199 HANDLE_CASE(CPUDevice, DataTypeToEnum<T>::value, int32); \ 200 HANDLE_CASE(CPUDevice, DataTypeToEnum<T>::value, int64); 201 202 #define HANDLE_TYPE_NAME_GPU(T) \ 203 HANDLE_CASE(GPUDevice, DataTypeToEnum<T>::value, int32); \ 204 HANDLE_CASE(GPUDevice, DataTypeToEnum<T>::value, int64); 205 206 #ifdef TENSORFLOW_USE_SYCL 207 #define HANDLE_TYPE_NAME_SYCL(T) \ 208 HANDLE_CASE(SYCLDevice, DataTypeToEnum<T>::value, int32); \ 209 HANDLE_CASE(SYCLDevice, DataTypeToEnum<T>::value, int64); 210 #endif // TENSORFLOW_USE_SYCL 211 212 TF_CALL_bool(HANDLE_TYPE_NAME_CPU); 213 TF_CALL_float(HANDLE_TYPE_NAME_CPU); 214 TF_CALL_double(HANDLE_TYPE_NAME_CPU); 215 TF_CALL_uint8(HANDLE_TYPE_NAME_CPU); 216 TF_CALL_int32(HANDLE_TYPE_NAME_CPU); 217 TF_CALL_int16(HANDLE_TYPE_NAME_CPU); 218 TF_CALL_int64(HANDLE_TYPE_NAME_CPU); 219 TF_CALL_half(HANDLE_TYPE_NAME_CPU); 220 TF_CALL_complex64(HANDLE_TYPE_NAME_CPU); 221 TF_CALL_complex128(HANDLE_TYPE_NAME_CPU); 222 TF_CALL_string(HANDLE_TYPE_NAME_CPU); 223 224 #if GOOGLE_CUDA 225 TF_CALL_bool(HANDLE_TYPE_NAME_GPU); 226 TF_CALL_float(HANDLE_TYPE_NAME_GPU); 227 TF_CALL_double(HANDLE_TYPE_NAME_GPU); 228 TF_CALL_int16(HANDLE_TYPE_NAME_GPU); 229 TF_CALL_int32(HANDLE_TYPE_NAME_GPU); 230 TF_CALL_int64(HANDLE_TYPE_NAME_GPU); 231 TF_CALL_half(HANDLE_TYPE_NAME_GPU); 232 TF_CALL_complex64(HANDLE_TYPE_NAME_GPU); 233 TF_CALL_complex128(HANDLE_TYPE_NAME_GPU); 234 #endif // GOOGLE_CUDA 235 236 #ifdef TENSORFLOW_USE_SYCL 237 TF_CALL_float(HANDLE_TYPE_NAME_SYCL); 238 TF_CALL_double(HANDLE_TYPE_NAME_SYCL); 239 TF_CALL_int16(HANDLE_TYPE_NAME_SYCL); 240 TF_CALL_int32(HANDLE_TYPE_NAME_SYCL); 241 TF_CALL_int64(HANDLE_TYPE_NAME_SYCL); 242 #endif // TENSORFLOW_USE_SYCL 243 244 #undef HANDLE_TYPE_NAME_CPU 245 #undef HANDLE_TYPE_NAME_GPU 246 #ifdef TENSORFLOW_USE_SYCL 247 #undef HANDLE_TYPE_NAME_SYCL 248 #endif // TENSORFLOW_USE_SYCL 249 #undef HANDLE_CASE 250 251 // -------------------------------------------------------------------------- 252 template <typename Device, typename Tmultiples> 253 class TileGradientOp : public OpKernel { 254 public: 255 explicit TileGradientOp(OpKernelConstruction* context) : OpKernel(context) {} 256 257 void Compute(OpKernelContext* context) override { 258 const Tensor& input = context->input(0); 259 const Tensor& multiples = context->input(1); 260 OP_REQUIRES( 261 context, IsLegacyVector(multiples.shape()), 262 errors::InvalidArgument("Expected multiples to be 1-D, but got shape ", 263 multiples.shape().DebugString())); 264 OP_REQUIRES(context, input.dims() == multiples.NumElements(), 265 errors::InvalidArgument( 266 "Expected multiples argument to be a vector of length ", 267 input.dims(), " but got length ", multiples.dim_size(0))); 268 269 const int input_dims = input.dims(); 270 271 // Eigen doesn't support scalars on the GPU, so handle 0-D specially 272 if (input_dims == 0) { 273 context->set_output(0, input); 274 return; 275 } 276 277 const gtl::ArraySlice<Tmultiples> multiples_array( 278 multiples.flat<Tmultiples>().data(), input_dims); 279 TensorShape output_shape; 280 std::vector<Tmultiples> input_dim_size_vec; 281 for (int i = 0; i < input_dims; ++i) { 282 OP_REQUIRES( 283 context, multiples_array[i] > 0, 284 errors::InvalidArgument("Expected multiples[", i, "] > 0, but got ", 285 multiples_array[i])); 286 OP_REQUIRES(context, input.dim_size(i) % multiples_array[i] == 0, 287 errors::InvalidArgument("Expected input_dim[", i, 288 "] to be divisible by multiples[", i, 289 "], but ", input.dim_size(i), " % ", 290 multiples_array[i], " != 0")); 291 output_shape.AddDim(input.dim_size(i) / multiples_array[i]); 292 input_dim_size_vec.push_back(input.dim_size(i)); 293 } 294 if (output_shape == input.shape()) { 295 context->set_output(0, input); 296 return; 297 } 298 Tensor* result = nullptr; 299 OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &result)); 300 301 #define HANDLE_DIM(DT, NDIM) \ 302 if (context->input(0).dtype() == DT && input_dims == NDIM) { \ 303 HandleCase<DT, NDIM>(context, input_dim_size_vec, multiples_array, \ 304 result); \ 305 return; \ 306 } 307 308 #define HANDLE_TYPE(T) \ 309 HANDLE_DIM(T, 1) \ 310 HANDLE_DIM(T, 2) \ 311 HANDLE_DIM(T, 3) \ 312 HANDLE_DIM(T, 4) \ 313 HANDLE_DIM(T, 5) \ 314 HANDLE_DIM(T, 6) \ 315 HANDLE_DIM(T, 7) 316 317 #define HANDLE_TYPE_NAME(T) HANDLE_TYPE(DataTypeToEnum<T>::value) 318 319 TF_CALL_float(HANDLE_TYPE_NAME); 320 TF_CALL_double(HANDLE_TYPE_NAME); 321 TF_CALL_int32(HANDLE_TYPE_NAME); 322 TF_CALL_int16(HANDLE_TYPE_NAME); 323 TF_CALL_int64(HANDLE_TYPE_NAME); 324 TF_CALL_half(HANDLE_TYPE_NAME); 325 TF_CALL_complex64(HANDLE_TYPE_NAME); 326 TF_CALL_complex128(HANDLE_TYPE_NAME); 327 328 #undef HANDLE_TYPE_NAME 329 #undef HANDLE_TYPE 330 #undef HANDLE_DIM 331 332 OP_REQUIRES(context, false, 333 errors::Unimplemented( 334 "TileGradientOp : Unhandled input dimensions, DT : ", 335 context->input(0).dtype(), ", dims : ", input_dims)); 336 } 337 338 private: 339 template <DataType DT, int NDIM> 340 void HandleCase(OpKernelContext* context, 341 const std::vector<Tmultiples>& input_dims, 342 const gtl::ArraySlice<Tmultiples>& multiples_array, 343 Tensor* result); 344 345 template <DataType DT, int NDIM> 346 void HandleCaseImpl(OpKernelContext* context, 347 const std::vector<Tmultiples>& input_dims, 348 const gtl::ArraySlice<Tmultiples>& multiples_array, 349 Tensor* result) { 350 typedef typename EnumToDataType<DT>::Type T; 351 352 bool reduction_only = true; 353 std::vector<Tmultiples> reduction_dims; 354 355 for (int i = 0; i < NDIM; ++i) { 356 if (input_dims[i] > multiples_array[i] && multiples_array[i] > 1) { 357 reduction_only = false; 358 break; 359 } else { 360 if (multiples_array[i] == input_dims[i]) { 361 reduction_dims.push_back(i); 362 } 363 } 364 } 365 366 if (reduction_only) { 367 #define HANDLE_DIM(D) \ 368 if (reduction_dims.size() == (D)) { \ 369 HandleReduce<T, NDIM, (D)>(context, reduction_dims, result); \ 370 return; \ 371 } 372 // NOTE(keveman): Handling the most common case here. 373 // Adding more cases here would require more templating and code 374 // explosion. For instance, HANDLE_DIM(2) wouldn't make sense for NDIM=1. 375 HANDLE_DIM(1); 376 377 // Fall through to the unoptimized version. 378 #undef HANDLE_DIM 379 } 380 381 Eigen::DSizes<Eigen::DenseIndex, NDIM> indices; 382 Eigen::DSizes<Eigen::DenseIndex, NDIM> sizes; 383 384 // Accumulate slices along the dimensions into the output. The number of 385 // slices along dimension 'i' is simply the multiple along dimension 'i' 386 // passed to the original Tile op. 387 for (int i = 0; i < NDIM; ++i) { 388 sizes[i] = input_dims[i] / multiples_array[i]; 389 indices[i] = 0; 390 } 391 392 bool first = true; 393 while (true) { 394 functor::TileGrad<Device, T, NDIM>()( 395 context->eigen_device<Device>(), result->tensor<T, NDIM>(), 396 context->input(0).tensor<T, NDIM>(), indices, sizes, first); 397 first = false; 398 // Increment the begin indices. 399 int i = 0; 400 while (i < NDIM && indices[i] / sizes[i] == multiples_array[i] - 1) { 401 indices[i] = 0; 402 ++i; 403 } 404 // We are finished if we have iterated to the maximum along all 405 // dimensions. 406 if (i == NDIM) { 407 break; 408 } 409 indices[i] += sizes[i]; 410 } 411 } 412 413 template <typename T, int NDIM, int REDUCENDIM> 414 void HandleReduce(OpKernelContext* context, 415 const std::vector<Tmultiples>& reduce_dim_in, 416 Tensor* result) { 417 static_assert(NDIM >= REDUCENDIM, "Too many reduced dimensions"); 418 Eigen::DSizes<Eigen::DenseIndex, REDUCENDIM> reduce_dim; 419 Eigen::DSizes<Eigen::DenseIndex, NDIM> reshape_dim; 420 421 for (int i = 0; i < REDUCENDIM; ++i) { 422 reduce_dim[i] = reduce_dim_in[i]; 423 } 424 425 for (int i = 0; i < NDIM; ++i) { 426 reshape_dim[i] = result->dim_size(i); 427 } 428 429 functor::ReduceAndReshape<Device, T, NDIM, REDUCENDIM>()( 430 context->eigen_device<Device>(), result->tensor<T, NDIM>(), 431 context->input(0).tensor<T, NDIM>(), reduce_dim, reshape_dim); 432 } 433 434 TF_DISALLOW_COPY_AND_ASSIGN(TileGradientOp); 435 }; 436 437 template <typename Device, typename Tmultiples> 438 template <DataType DT, int NDIM> 439 inline void TileGradientOp<Device, Tmultiples>::HandleCase( 440 OpKernelContext* context, const std::vector<Tmultiples>& input_dims, 441 const gtl::ArraySlice<Tmultiples>& multiples_array, Tensor* result) { 442 LOG(FATAL) << "TileGradientOp: Invalid combination of Device, DT and NDIM: " 443 << MakeTypeIndex<Device>().name() << ", " << DataTypeString(DT) 444 << ", " << NDIM; 445 } 446 447 #define HANDLE_CASE(device, T, dtype, Tmultiples, ndim) \ 448 template <> \ 449 template <> \ 450 void TileGradientOp<device, Tmultiples>::HandleCase<dtype, ndim>( \ 451 OpKernelContext * context, const std::vector<Tmultiples>& input_dims, \ 452 const gtl::ArraySlice<Tmultiples>& multiples_array, Tensor* result) { \ 453 HandleCaseImpl<dtype, ndim>(context, input_dims, multiples_array, result); \ 454 } 455 456 // 0-D handled specially above 457 #define HANDLE_CASE_DIM(device, T, dtype) \ 458 HANDLE_CASE(device, T, dtype, int32, 1); \ 459 HANDLE_CASE(device, T, dtype, int32, 2); \ 460 HANDLE_CASE(device, T, dtype, int32, 3); \ 461 HANDLE_CASE(device, T, dtype, int32, 4); \ 462 HANDLE_CASE(device, T, dtype, int32, 5); \ 463 HANDLE_CASE(device, T, dtype, int32, 6); \ 464 HANDLE_CASE(device, T, dtype, int32, 7); \ 465 HANDLE_CASE(device, T, dtype, int64, 1); \ 466 HANDLE_CASE(device, T, dtype, int64, 2); \ 467 HANDLE_CASE(device, T, dtype, int64, 3); \ 468 HANDLE_CASE(device, T, dtype, int64, 4); \ 469 HANDLE_CASE(device, T, dtype, int64, 5); \ 470 HANDLE_CASE(device, T, dtype, int64, 6); \ 471 HANDLE_CASE(device, T, dtype, int64, 7); 472 473 #define HANDLE_TYPE_NAME_CPU(T) \ 474 HANDLE_CASE_DIM(CPUDevice, T, DataTypeToEnum<T>::value); 475 476 #define HANDLE_TYPE_NAME_GPU(T) \ 477 HANDLE_CASE_DIM(GPUDevice, T, DataTypeToEnum<T>::value); 478 479 TF_CALL_float(HANDLE_TYPE_NAME_CPU); 480 TF_CALL_double(HANDLE_TYPE_NAME_CPU); 481 TF_CALL_int16(HANDLE_TYPE_NAME_CPU); 482 TF_CALL_int32(HANDLE_TYPE_NAME_CPU); 483 TF_CALL_int64(HANDLE_TYPE_NAME_CPU); 484 TF_CALL_half(HANDLE_TYPE_NAME_CPU); 485 TF_CALL_complex64(HANDLE_TYPE_NAME_CPU); 486 TF_CALL_complex128(HANDLE_TYPE_NAME_CPU); 487 488 #if GOOGLE_CUDA 489 TF_CALL_float(HANDLE_TYPE_NAME_GPU); 490 TF_CALL_double(HANDLE_TYPE_NAME_GPU); 491 TF_CALL_int16(HANDLE_TYPE_NAME_GPU); 492 TF_CALL_int32(HANDLE_TYPE_NAME_GPU); 493 TF_CALL_int64(HANDLE_TYPE_NAME_GPU); 494 TF_CALL_half(HANDLE_TYPE_NAME_GPU); 495 TF_CALL_complex64(HANDLE_TYPE_NAME_GPU); 496 TF_CALL_complex128(HANDLE_TYPE_NAME_GPU); 497 #endif // GOOGLE_CUDA 498 499 #if TENSORFLOW_USE_SYCL 500 #define HANDLE_TYPE_NAME_SYCL(T) \ 501 HANDLE_CASE_DIM(SYCLDevice, T, DataTypeToEnum<T>::value); 502 503 TF_CALL_float(HANDLE_TYPE_NAME_SYCL); 504 TF_CALL_double(HANDLE_TYPE_NAME_SYCL); 505 TF_CALL_int16(HANDLE_TYPE_NAME_SYCL); 506 TF_CALL_int32(HANDLE_TYPE_NAME_SYCL); 507 TF_CALL_int64(HANDLE_TYPE_NAME_SYCL); 508 #undef HANDLE_TYPE_NAME_SYCL 509 #endif // TENSORFLOW_USE_SYCL 510 511 #undef HANDLE_TYPE_NAME_CPU 512 #undef HANDLE_TYPE_NAME_GPU 513 #undef HANDLE_CASE_DIM 514 #undef HANDLE_CASE 515 516 REGISTER_KERNEL_BUILDER(Name("Tile") 517 .Device(DEVICE_CPU) 518 .HostMemory("multiples") 519 .TypeConstraint<int32>("Tmultiples"), 520 TileOp<CPUDevice, int32>); 521 REGISTER_KERNEL_BUILDER(Name("Tile") 522 .Device(DEVICE_CPU) 523 .HostMemory("multiples") 524 .TypeConstraint<int64>("Tmultiples"), 525 TileOp<CPUDevice, int64>); 526 REGISTER_KERNEL_BUILDER(Name("TileGrad") 527 .Device(DEVICE_CPU) 528 .HostMemory("multiples") 529 .TypeConstraint<int32>("Tmultiples"), 530 TileGradientOp<CPUDevice, int32>); 531 REGISTER_KERNEL_BUILDER(Name("TileGrad") 532 .Device(DEVICE_CPU) 533 .HostMemory("multiples") 534 .TypeConstraint<int64>("Tmultiples"), 535 TileGradientOp<CPUDevice, int64>); 536 537 #if GOOGLE_CUDA 538 #define REGISTER_GPU_TILE(type) \ 539 REGISTER_KERNEL_BUILDER(Name("Tile") \ 540 .Device(DEVICE_GPU) \ 541 .TypeConstraint<type>("T") \ 542 .TypeConstraint<int32>("Tmultiples") \ 543 .HostMemory("multiples"), \ 544 TileOp<GPUDevice, int32>); \ 545 REGISTER_KERNEL_BUILDER(Name("Tile") \ 546 .Device(DEVICE_GPU) \ 547 .TypeConstraint<type>("T") \ 548 .TypeConstraint<int64>("Tmultiples") \ 549 .HostMemory("multiples"), \ 550 TileOp<GPUDevice, int64>); 551 552 #define REGISTER_GPU_TILE_GRAD(type) \ 553 REGISTER_KERNEL_BUILDER(Name("TileGrad") \ 554 .Device(DEVICE_GPU) \ 555 .TypeConstraint<type>("T") \ 556 .TypeConstraint<int32>("Tmultiples") \ 557 .HostMemory("multiples"), \ 558 TileGradientOp<GPUDevice, int32>); \ 559 REGISTER_KERNEL_BUILDER(Name("TileGrad") \ 560 .Device(DEVICE_GPU) \ 561 .TypeConstraint<type>("T") \ 562 .TypeConstraint<int64>("Tmultiples") \ 563 .HostMemory("multiples"), \ 564 TileGradientOp<GPUDevice, int64>); 565 566 #define REGISTER_GPU(type) \ 567 REGISTER_GPU_TILE(type); \ 568 REGISTER_GPU_TILE_GRAD(type); 569 570 TF_CALL_bool(REGISTER_GPU_TILE); 571 TF_CALL_float(REGISTER_GPU); 572 TF_CALL_double(REGISTER_GPU); 573 TF_CALL_half(REGISTER_GPU); 574 TF_CALL_int16(REGISTER_GPU); 575 TF_CALL_int32(REGISTER_GPU); 576 TF_CALL_complex64(REGISTER_GPU); 577 TF_CALL_complex128(REGISTER_GPU) 578 579 #undef REGISTER_GPU_TILE 580 #undef REGISTER_GPU_TILE_GRAD 581 #undef REGISTER_GPU 582 #endif // GOOGLE_CUDA 583 584 #ifdef TENSORFLOW_USE_SYCL 585 #define REGISTER_SYCL(type) \ 586 REGISTER_KERNEL_BUILDER(Name("Tile") \ 587 .Device(DEVICE_SYCL) \ 588 .TypeConstraint<type>("T") \ 589 .TypeConstraint<int32>("Tmultiples") \ 590 .HostMemory("multiples"), \ 591 TileOp<SYCLDevice, int32>); \ 592 REGISTER_KERNEL_BUILDER(Name("Tile") \ 593 .Device(DEVICE_SYCL) \ 594 .TypeConstraint<type>("T") \ 595 .TypeConstraint<int64>("Tmultiples") \ 596 .HostMemory("multiples"), \ 597 TileOp<SYCLDevice, int64>); \ 598 REGISTER_KERNEL_BUILDER(Name("TileGrad") \ 599 .Device(DEVICE_SYCL) \ 600 .TypeConstraint<type>("T") \ 601 .TypeConstraint<int32>("Tmultiples") \ 602 .HostMemory("multiples"), \ 603 TileGradientOp<SYCLDevice, int32>); \ 604 REGISTER_KERNEL_BUILDER(Name("TileGrad") \ 605 .Device(DEVICE_SYCL) \ 606 .TypeConstraint<type>("T") \ 607 .TypeConstraint<int64>("Tmultiples") \ 608 .HostMemory("multiples"), \ 609 TileGradientOp<SYCLDevice, int64>); 610 611 TF_CALL_float(REGISTER_SYCL); 612 TF_CALL_double(REGISTER_SYCL); 613 614 #undef REGISTER_SYCL 615 #endif // TENSORFLOW_USE_SYCL 616 617 } // namespace tensorflow 618