Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // See docs in ../ops/array_ops.cc.
     17 
     18 #define EIGEN_USE_THREADS
     19 
     20 #if GOOGLE_CUDA
     21 #define EIGEN_USE_GPU
     22 #endif
     23 
     24 #include "tensorflow/core/kernels/constant_op.h"
     25 
     26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor"
     27 #include "tensorflow/core/framework/node_def.pb.h"
     28 #include "tensorflow/core/framework/register_types.h"
     29 #include "tensorflow/core/framework/tensor.h"
     30 #include "tensorflow/core/framework/tensor.pb.h"
     31 #include "tensorflow/core/framework/tensor_shape.h"
     32 #include "tensorflow/core/framework/tensor_types.h"
     33 #include "tensorflow/core/framework/types.h"
     34 #include "tensorflow/core/framework/variant_op_registry.h"
     35 #include "tensorflow/core/kernels/bounds_check.h"
     36 #include "tensorflow/core/kernels/fill_functor.h"
     37 #include "tensorflow/core/platform/macros.h"
     38 
     39 #ifdef TENSORFLOW_USE_SYCL
     40 #include "tensorflow/core/common_runtime/sycl/sycl_util.h"
     41 #endif  // TENSORFLOW_USE_SYCL
     42 
     43 namespace tensorflow {
     44 
     45 namespace {
     46 
     47 std::unique_ptr<const NodeDef> StripTensorDataFromNodeDef(
     48     OpKernelConstruction* ctx) {
     49 #ifndef __ANDROID__
     50   DCHECK_EQ(NodeDef::descriptor()->field_count(), 5)
     51       << "The NodeDef format has changed, and the attr-stripping code may need "
     52       << "to be updated.";
     53 #endif
     54   const NodeDef& original = ctx->def();
     55   NodeDef* ret = new NodeDef;
     56   ret->set_name(original.name());
     57   ret->set_op(original.op());
     58   ret->set_device(original.device());
     59   // Strip the "value" attr from the returned NodeDef.
     60   // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses
     61   // attrs that affect the cardinality of list-typed inputs and outputs, so it
     62   // is safe to drop other attrs from the NodeDef.
     63   AddNodeAttr("dtype", ctx->output_type(0), ret);
     64   return std::unique_ptr<const NodeDef>(ret);
     65 }
     66 
     67 }  // namespace
     68 
     69 ConstantOp::ConstantOp(OpKernelConstruction* ctx)
     70     : OpKernel(ctx, StripTensorDataFromNodeDef(ctx)),
     71       tensor_(ctx->output_type(0)) {
     72   const TensorProto* proto = nullptr;
     73   OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
     74   OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto(
     75                           *proto, AllocatorAttributes(), &tensor_));
     76   OP_REQUIRES(
     77       ctx, ctx->output_type(0) == tensor_.dtype(),
     78       errors::InvalidArgument("Type mismatch between value (",
     79                               DataTypeString(tensor_.dtype()), ") and dtype (",
     80                               DataTypeString(ctx->output_type(0)), ")"));
     81 }
     82 
     83 void ConstantOp::Compute(OpKernelContext* ctx) {
     84   ctx->set_output(0, tensor_);
     85   if (TF_PREDICT_FALSE(ctx->track_allocations())) {
     86     ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
     87   }
     88 }
     89 
     90 ConstantOp::~ConstantOp() {}
     91 
     92 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp);
     93 
     94 #if GOOGLE_CUDA
     95 #define REGISTER_KERNEL(D, TYPE)                                      \
     96   REGISTER_KERNEL_BUILDER(                                            \
     97       Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
     98       ConstantOp);
     99 REGISTER_KERNEL(GPU, Eigen::half);
    100 REGISTER_KERNEL(GPU, bfloat16);
    101 REGISTER_KERNEL(GPU, float);
    102 REGISTER_KERNEL(GPU, double);
    103 REGISTER_KERNEL(GPU, uint8);
    104 REGISTER_KERNEL(GPU, int8);
    105 REGISTER_KERNEL(GPU, qint8);
    106 REGISTER_KERNEL(GPU, uint16);
    107 REGISTER_KERNEL(GPU, int16);
    108 REGISTER_KERNEL(GPU, int64);
    109 REGISTER_KERNEL(GPU, complex64);
    110 REGISTER_KERNEL(GPU, complex128);
    111 REGISTER_KERNEL(GPU, bool);
    112 REGISTER_KERNEL(GPU, Variant);
    113 #undef REGISTER_KERNEL
    114 #endif
    115 
    116 #ifdef TENSORFLOW_USE_SYCL
    117 #define REGISTER_SYCL_KERNEL(D, TYPE)                                 \
    118   REGISTER_KERNEL_BUILDER(                                            \
    119       Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \
    120       ConstantOp);
    121 REGISTER_SYCL_KERNEL(SYCL, float);
    122 REGISTER_SYCL_KERNEL(SYCL, double);
    123 REGISTER_SYCL_KERNEL(SYCL, uint8);
    124 REGISTER_SYCL_KERNEL(SYCL, int8);
    125 REGISTER_SYCL_KERNEL(SYCL, uint16);
    126 REGISTER_SYCL_KERNEL(SYCL, int16);
    127 REGISTER_SYCL_KERNEL(SYCL, int64);
    128 REGISTER_SYCL_KERNEL(SYCL, bool);
    129 #undef REGISTER_SYCL_KERNEL
    130 #endif
    131 
    132 HostConstantOp::HostConstantOp(OpKernelConstruction* ctx)
    133     : OpKernel(ctx), tensor_(ctx->output_type(0)) {
    134   const TensorProto* proto = nullptr;
    135   AllocatorAttributes alloc_attr;
    136   alloc_attr.set_on_host(true);
    137   OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto));
    138   OP_REQUIRES_OK(
    139       ctx, ctx->device()->MakeTensorFromProto(*proto, alloc_attr, &tensor_));
    140   OP_REQUIRES(
    141       ctx, ctx->output_type(0) == tensor_.dtype(),
    142       errors::InvalidArgument("Type mismatch between value (",
    143                               DataTypeString(tensor_.dtype()), ") and dtype (",
    144                               DataTypeString(ctx->output_type(0)), ")"));
    145 }
    146 
    147 void HostConstantOp::Compute(OpKernelContext* ctx) {
    148   ctx->set_output(0, tensor_);
    149 }
    150 
    151 #if GOOGLE_CUDA
    152 // A special GPU kernel for int32.
    153 // TODO(b/25387198): Also enable int32 in device memory. This kernel
    154 // registration requires all int32 inputs and outputs to be in host memory.
    155 REGISTER_KERNEL_BUILDER(Name("Const")
    156                             .Device(DEVICE_GPU)
    157                             .HostMemory("output")
    158                             .TypeConstraint<int32>("dtype"),
    159                         HostConstantOp);
    160 #endif
    161 
    162 #ifdef TENSORFLOW_USE_SYCL
    163 REGISTER_KERNEL_BUILDER(Name("Const")
    164                             .Device(DEVICE_SYCL)
    165                             .HostMemory("output")
    166                             .TypeConstraint<int32>("dtype"),
    167                         HostConstantOp);
    168 #endif  // TENSORFLOW_USE_SYCL
    169 
    170 typedef Eigen::ThreadPoolDevice CPUDevice;
    171 typedef Eigen::GpuDevice GPUDevice;
    172 #ifdef TENSORFLOW_USE_SYCL
    173 typedef Eigen::SyclDevice SYCLDevice;
    174 #endif  // TENSORFLOW_USE_SYCL
    175 
    176 template <typename Device, typename T, typename Index>
    177 class FillOp : public OpKernel {
    178  public:
    179   explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {}
    180 
    181   void Compute(OpKernelContext* context) override {
    182     const Tensor& Tdims = context->input(0);
    183     OP_REQUIRES(context, IsLegacyVector(Tdims.shape()),
    184                 errors::InvalidArgument("dims must be a vector, got shape ",
    185                                         Tdims.shape().DebugString()));
    186     const Tensor& Tvalue = context->input(1);
    187     OP_REQUIRES(context, IsLegacyScalar(Tvalue.shape()),
    188                 errors::InvalidArgument("value must be a scalar, got shape ",
    189                                         Tvalue.shape().DebugString()));
    190     auto dims = Tdims.flat<Index>();
    191     TensorShape shape;
    192     OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
    193                                 reinterpret_cast<const Index*>(dims.data()),
    194                                 dims.size(), &shape));
    195     Tensor* out = nullptr;
    196     OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out));
    197     functor::FillFunctor<Device, T> functor;
    198     functor(context->eigen_device<Device>(), out->flat<T>(),
    199             Tvalue.scalar<T>());
    200   }
    201 };
    202 
    203 #define REGISTER_KERNEL(D, TYPE)                                   \
    204   REGISTER_KERNEL_BUILDER(Name("Fill")                             \
    205                               .Device(DEVICE_##D)                  \
    206                               .TypeConstraint<TYPE>("T")           \
    207                               .TypeConstraint<int32>("index_type") \
    208                               .HostMemory("dims"),                 \
    209                           FillOp<D##Device, TYPE, int32>);         \
    210   REGISTER_KERNEL_BUILDER(Name("Fill")                             \
    211                               .Device(DEVICE_##D)                  \
    212                               .TypeConstraint<TYPE>("T")           \
    213                               .TypeConstraint<int64>("index_type") \
    214                               .HostMemory("dims"),                 \
    215                           FillOp<D##Device, TYPE, int64>);
    216 
    217 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE)
    218 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL);
    219 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about
    220 // the conversion from uint8 to quint8.
    221 REGISTER_KERNEL(CPU, quint8);
    222 REGISTER_KERNEL(CPU, quint16);
    223 #undef REGISTER_CPU_KERNEL
    224 
    225 #ifdef TENSORFLOW_USE_SYCL
    226 REGISTER_KERNEL(SYCL, float);
    227 REGISTER_KERNEL(SYCL, double);
    228 REGISTER_KERNEL(SYCL, uint8);
    229 REGISTER_KERNEL(SYCL, int8);
    230 REGISTER_KERNEL(SYCL, uint16);
    231 REGISTER_KERNEL(SYCL, int16);
    232 REGISTER_KERNEL(SYCL, int64);
    233 
    234 REGISTER_KERNEL_BUILDER(Name("Fill")
    235                             .Device(DEVICE_SYCL)
    236                             .TypeConstraint<int32>("T")
    237                             .TypeConstraint<int32>("index_type")
    238                             .HostMemory("dims")
    239                             .HostMemory("value")
    240                             .HostMemory("output"),
    241                         FillOp<CPUDevice, int32, int32>);
    242 #undef REGISTER_KERNEL_SYCL
    243 #endif  // TENSORFLOW_USE_SYCL
    244 
    245 #if GOOGLE_CUDA
    246 REGISTER_KERNEL(GPU, Eigen::half);
    247 REGISTER_KERNEL(GPU, bfloat16);
    248 REGISTER_KERNEL(GPU, float);
    249 REGISTER_KERNEL(GPU, double);
    250 REGISTER_KERNEL(GPU, uint8);
    251 REGISTER_KERNEL(GPU, int8);
    252 REGISTER_KERNEL(GPU, uint16);
    253 REGISTER_KERNEL(GPU, int16);
    254 REGISTER_KERNEL(GPU, int64);
    255 REGISTER_KERNEL(GPU, bool);
    256 // Currently we do not support filling strings and complex64 on GPU
    257 
    258 // A special GPU kernel for int32.
    259 // TODO(b/25387198): Also enable int32 in device memory. This kernel
    260 // registration requires all int32 inputs and outputs to be in host memory.
    261 REGISTER_KERNEL_BUILDER(Name("Fill")
    262                             .Device(DEVICE_GPU)
    263                             .TypeConstraint<int32>("T")
    264                             .TypeConstraint<int32>("index_type")
    265                             .HostMemory("dims")
    266                             .HostMemory("value")
    267                             .HostMemory("output"),
    268                         FillOp<CPUDevice, int32, int32>);
    269 #endif
    270 
    271 #undef REGISTER_KERNEL
    272 
    273 template <typename Device, typename T>
    274 class ZerosLikeOp : public OpKernel {
    275  public:
    276   explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
    277 
    278   void Compute(OpKernelContext* ctx) override {
    279     const Tensor& input = ctx->input(0);
    280     const Device& d = ctx->eigen_device<Device>();
    281     if (std::is_same<T, Variant>::value) {
    282       OP_REQUIRES(
    283           ctx, input.dims() == 0,
    284           errors::InvalidArgument("ZerosLike non-scalar Tensor with "
    285                                   "dtype=DT_VARIANT is not supported."));
    286       const Variant& v = input.scalar<Variant>()();
    287       Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({}));
    288       Variant* out_v = &(out.scalar<Variant>()());
    289       OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>(
    290                               ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v));
    291       ctx->set_output(0, out);
    292     } else {
    293       Tensor* out = nullptr;
    294       OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
    295                               {0}, 0, input.shape(), &out));
    296       functor::SetZeroFunctor<Device, T> f;
    297       f(d, out->flat<T>());
    298     }
    299   }
    300 };
    301 
    302 #define REGISTER_KERNEL(type, dev)                                      \
    303   REGISTER_KERNEL_BUILDER(                                              \
    304       Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
    305       ZerosLikeOp<dev##Device, type>)
    306 
    307 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
    308 TF_CALL_POD_STRING_TYPES(REGISTER_CPU);
    309 REGISTER_CPU(Variant);
    310 #undef REGISTER_CPU
    311 
    312 #ifdef TENSORFLOW_USE_SYCL
    313 REGISTER_KERNEL(bool, SYCL);
    314 REGISTER_KERNEL(float, SYCL);
    315 REGISTER_KERNEL(double, SYCL);
    316 REGISTER_KERNEL(int64, SYCL);
    317 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
    318                             .Device(DEVICE_SYCL)
    319                             .TypeConstraint<int32>("T")
    320                             .HostMemory("y"),
    321                         ZerosLikeOp<CPUDevice, int32>);
    322 #endif  // TENSORFLOW_USE_SYCL
    323 
    324 #if GOOGLE_CUDA
    325 REGISTER_KERNEL(bool, GPU);
    326 REGISTER_KERNEL(Eigen::half, GPU);
    327 REGISTER_KERNEL(bfloat16, GPU);
    328 REGISTER_KERNEL(float, GPU);
    329 REGISTER_KERNEL(double, GPU);
    330 REGISTER_KERNEL(complex64, GPU);
    331 REGISTER_KERNEL(complex128, GPU);
    332 REGISTER_KERNEL(int64, GPU);
    333 REGISTER_KERNEL(Variant, GPU);
    334 REGISTER_KERNEL_BUILDER(Name("ZerosLike")
    335                             .Device(DEVICE_GPU)
    336                             .TypeConstraint<int32>("T")
    337                             .HostMemory("y"),
    338                         ZerosLikeOp<CPUDevice, int32>);
    339 #endif  // GOOGLE_CUDA
    340 
    341 #undef REGISTER_KERNEL
    342 
    343 template <typename Device, typename T>
    344 class OnesLikeOp : public OpKernel {
    345  public:
    346   explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
    347 
    348   void Compute(OpKernelContext* ctx) override {
    349     const Tensor& input = ctx->input(0);
    350     Tensor* out = nullptr;
    351     OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output(
    352                             {0}, 0, input.shape(), &out));
    353     functor::SetOneFunctor<Device, T> f;
    354     f(ctx->eigen_device<Device>(), out->flat<T>());
    355   }
    356 };
    357 
    358 #define REGISTER_KERNEL(type, dev)                                     \
    359   REGISTER_KERNEL_BUILDER(                                             \
    360       Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \
    361       OnesLikeOp<dev##Device, type>)
    362 
    363 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU)
    364 TF_CALL_POD_TYPES(REGISTER_CPU);
    365 #undef REGISTER_CPU
    366 
    367 #ifdef TENSORFLOW_USE_SYCL
    368 REGISTER_KERNEL(float, SYCL);
    369 REGISTER_KERNEL(bool, SYCL);
    370 REGISTER_KERNEL_BUILDER(Name("OnesLike")
    371                             .Device(DEVICE_SYCL)
    372                             .TypeConstraint<int32>("T")
    373                             .HostMemory("y"),
    374                         OnesLikeOp<CPUDevice, int32>);
    375 #endif  // TENSORFLOW_USE_SYCL
    376 
    377 #if GOOGLE_CUDA
    378 REGISTER_KERNEL(bool, GPU);
    379 REGISTER_KERNEL(Eigen::half, GPU);
    380 REGISTER_KERNEL(bfloat16, GPU);
    381 REGISTER_KERNEL(float, GPU);
    382 REGISTER_KERNEL(double, GPU);
    383 REGISTER_KERNEL(complex64, GPU);
    384 REGISTER_KERNEL(complex128, GPU);
    385 REGISTER_KERNEL(int64, GPU);
    386 REGISTER_KERNEL_BUILDER(Name("OnesLike")
    387                             .Device(DEVICE_GPU)
    388                             .TypeConstraint<int32>("T")
    389                             .HostMemory("y"),
    390                         OnesLikeOp<CPUDevice, int32>);
    391 #endif  // GOOGLE_CUDA
    392 
    393 #undef REGISTER_KERNEL
    394 
    395 PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) {
    396   OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_));
    397 }
    398 
    399 void PlaceholderOp::Compute(OpKernelContext* ctx) {
    400   if (expected_shape_.dims() > 0) {
    401     OP_REQUIRES(ctx, false,
    402                 errors::InvalidArgument(
    403                     "You must feed a value for placeholder tensor '", name(),
    404                     "' with dtype ", DataTypeString(output_type(0)),
    405                     " and shape ", expected_shape_.DebugString()));
    406   } else {
    407     OP_REQUIRES(ctx, false,
    408                 errors::InvalidArgument(
    409                     "You must feed a value for placeholder tensor '", name(),
    410                     "' with dtype ", DataTypeString(output_type(0))));
    411   }
    412 }
    413 
    414 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp);
    415 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU),
    416                         PlaceholderOp);
    417 // The following GPU kernel registration is used to address the situation that
    418 // a placeholder is added in a GPU device context and soft placement is false.
    419 // Since a placeholder should never be executed, adding these GPU kernels has
    420 // no effect on graph execution.
    421 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_GPU), PlaceholderOp);
    422 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_GPU),
    423                         PlaceholderOp);
    424 
    425 #if TENSORFLOW_USE_SYCL
    426 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_SYCL), PlaceholderOp);
    427 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_SYCL),
    428                         PlaceholderOp);
    429 #endif  // TENSORFLOW_USE_SYCL
    430 }  // namespace tensorflow
    431