1 /* Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // See docs in ../ops/array_ops.cc. 17 18 #define EIGEN_USE_THREADS 19 20 #if GOOGLE_CUDA 21 #define EIGEN_USE_GPU 22 #endif 23 24 #include "tensorflow/core/kernels/constant_op.h" 25 26 #include "third_party/eigen3/unsupported/Eigen/CXX11/Tensor" 27 #include "tensorflow/core/framework/node_def.pb.h" 28 #include "tensorflow/core/framework/register_types.h" 29 #include "tensorflow/core/framework/tensor.h" 30 #include "tensorflow/core/framework/tensor.pb.h" 31 #include "tensorflow/core/framework/tensor_shape.h" 32 #include "tensorflow/core/framework/tensor_types.h" 33 #include "tensorflow/core/framework/types.h" 34 #include "tensorflow/core/framework/variant_op_registry.h" 35 #include "tensorflow/core/kernels/bounds_check.h" 36 #include "tensorflow/core/kernels/fill_functor.h" 37 #include "tensorflow/core/platform/macros.h" 38 39 #ifdef TENSORFLOW_USE_SYCL 40 #include "tensorflow/core/common_runtime/sycl/sycl_util.h" 41 #endif // TENSORFLOW_USE_SYCL 42 43 namespace tensorflow { 44 45 namespace { 46 47 std::unique_ptr<const NodeDef> StripTensorDataFromNodeDef( 48 OpKernelConstruction* ctx) { 49 #ifndef __ANDROID__ 50 DCHECK_EQ(NodeDef::descriptor()->field_count(), 5) 51 << "The NodeDef format has changed, and the attr-stripping code may need " 52 << "to be updated."; 53 #endif 54 const NodeDef& original = ctx->def(); 55 NodeDef* ret = new NodeDef; 56 ret->set_name(original.name()); 57 ret->set_op(original.op()); 58 ret->set_device(original.device()); 59 // Strip the "value" attr from the returned NodeDef. 60 // NOTE(mrry): The present implementation of `OpKernel::OpKernel()` only uses 61 // attrs that affect the cardinality of list-typed inputs and outputs, so it 62 // is safe to drop other attrs from the NodeDef. 63 AddNodeAttr("dtype", ctx->output_type(0), ret); 64 return std::unique_ptr<const NodeDef>(ret); 65 } 66 67 } // namespace 68 69 ConstantOp::ConstantOp(OpKernelConstruction* ctx) 70 : OpKernel(ctx, StripTensorDataFromNodeDef(ctx)), 71 tensor_(ctx->output_type(0)) { 72 const TensorProto* proto = nullptr; 73 OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto)); 74 OP_REQUIRES_OK(ctx, ctx->device()->MakeTensorFromProto( 75 *proto, AllocatorAttributes(), &tensor_)); 76 OP_REQUIRES( 77 ctx, ctx->output_type(0) == tensor_.dtype(), 78 errors::InvalidArgument("Type mismatch between value (", 79 DataTypeString(tensor_.dtype()), ") and dtype (", 80 DataTypeString(ctx->output_type(0)), ")")); 81 } 82 83 void ConstantOp::Compute(OpKernelContext* ctx) { 84 ctx->set_output(0, tensor_); 85 if (TF_PREDICT_FALSE(ctx->track_allocations())) { 86 ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes()); 87 } 88 } 89 90 ConstantOp::~ConstantOp() {} 91 92 REGISTER_KERNEL_BUILDER(Name("Const").Device(DEVICE_CPU), ConstantOp); 93 94 #if GOOGLE_CUDA 95 #define REGISTER_KERNEL(D, TYPE) \ 96 REGISTER_KERNEL_BUILDER( \ 97 Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \ 98 ConstantOp); 99 REGISTER_KERNEL(GPU, Eigen::half); 100 REGISTER_KERNEL(GPU, bfloat16); 101 REGISTER_KERNEL(GPU, float); 102 REGISTER_KERNEL(GPU, double); 103 REGISTER_KERNEL(GPU, uint8); 104 REGISTER_KERNEL(GPU, int8); 105 REGISTER_KERNEL(GPU, qint8); 106 REGISTER_KERNEL(GPU, uint16); 107 REGISTER_KERNEL(GPU, int16); 108 REGISTER_KERNEL(GPU, int64); 109 REGISTER_KERNEL(GPU, complex64); 110 REGISTER_KERNEL(GPU, complex128); 111 REGISTER_KERNEL(GPU, bool); 112 REGISTER_KERNEL(GPU, Variant); 113 #undef REGISTER_KERNEL 114 #endif 115 116 #ifdef TENSORFLOW_USE_SYCL 117 #define REGISTER_SYCL_KERNEL(D, TYPE) \ 118 REGISTER_KERNEL_BUILDER( \ 119 Name("Const").Device(DEVICE_##D).TypeConstraint<TYPE>("dtype"), \ 120 ConstantOp); 121 REGISTER_SYCL_KERNEL(SYCL, float); 122 REGISTER_SYCL_KERNEL(SYCL, double); 123 REGISTER_SYCL_KERNEL(SYCL, uint8); 124 REGISTER_SYCL_KERNEL(SYCL, int8); 125 REGISTER_SYCL_KERNEL(SYCL, uint16); 126 REGISTER_SYCL_KERNEL(SYCL, int16); 127 REGISTER_SYCL_KERNEL(SYCL, int64); 128 REGISTER_SYCL_KERNEL(SYCL, bool); 129 #undef REGISTER_SYCL_KERNEL 130 #endif 131 132 HostConstantOp::HostConstantOp(OpKernelConstruction* ctx) 133 : OpKernel(ctx), tensor_(ctx->output_type(0)) { 134 const TensorProto* proto = nullptr; 135 AllocatorAttributes alloc_attr; 136 alloc_attr.set_on_host(true); 137 OP_REQUIRES_OK(ctx, ctx->GetAttr("value", &proto)); 138 OP_REQUIRES_OK( 139 ctx, ctx->device()->MakeTensorFromProto(*proto, alloc_attr, &tensor_)); 140 OP_REQUIRES( 141 ctx, ctx->output_type(0) == tensor_.dtype(), 142 errors::InvalidArgument("Type mismatch between value (", 143 DataTypeString(tensor_.dtype()), ") and dtype (", 144 DataTypeString(ctx->output_type(0)), ")")); 145 } 146 147 void HostConstantOp::Compute(OpKernelContext* ctx) { 148 ctx->set_output(0, tensor_); 149 } 150 151 #if GOOGLE_CUDA 152 // A special GPU kernel for int32. 153 // TODO(b/25387198): Also enable int32 in device memory. This kernel 154 // registration requires all int32 inputs and outputs to be in host memory. 155 REGISTER_KERNEL_BUILDER(Name("Const") 156 .Device(DEVICE_GPU) 157 .HostMemory("output") 158 .TypeConstraint<int32>("dtype"), 159 HostConstantOp); 160 #endif 161 162 #ifdef TENSORFLOW_USE_SYCL 163 REGISTER_KERNEL_BUILDER(Name("Const") 164 .Device(DEVICE_SYCL) 165 .HostMemory("output") 166 .TypeConstraint<int32>("dtype"), 167 HostConstantOp); 168 #endif // TENSORFLOW_USE_SYCL 169 170 typedef Eigen::ThreadPoolDevice CPUDevice; 171 typedef Eigen::GpuDevice GPUDevice; 172 #ifdef TENSORFLOW_USE_SYCL 173 typedef Eigen::SyclDevice SYCLDevice; 174 #endif // TENSORFLOW_USE_SYCL 175 176 template <typename Device, typename T, typename Index> 177 class FillOp : public OpKernel { 178 public: 179 explicit FillOp(OpKernelConstruction* context) : OpKernel(context) {} 180 181 void Compute(OpKernelContext* context) override { 182 const Tensor& Tdims = context->input(0); 183 OP_REQUIRES(context, IsLegacyVector(Tdims.shape()), 184 errors::InvalidArgument("dims must be a vector, got shape ", 185 Tdims.shape().DebugString())); 186 const Tensor& Tvalue = context->input(1); 187 OP_REQUIRES(context, IsLegacyScalar(Tvalue.shape()), 188 errors::InvalidArgument("value must be a scalar, got shape ", 189 Tvalue.shape().DebugString())); 190 auto dims = Tdims.flat<Index>(); 191 TensorShape shape; 192 OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape( 193 reinterpret_cast<const Index*>(dims.data()), 194 dims.size(), &shape)); 195 Tensor* out = nullptr; 196 OP_REQUIRES_OK(context, context->allocate_output(0, shape, &out)); 197 functor::FillFunctor<Device, T> functor; 198 functor(context->eigen_device<Device>(), out->flat<T>(), 199 Tvalue.scalar<T>()); 200 } 201 }; 202 203 #define REGISTER_KERNEL(D, TYPE) \ 204 REGISTER_KERNEL_BUILDER(Name("Fill") \ 205 .Device(DEVICE_##D) \ 206 .TypeConstraint<TYPE>("T") \ 207 .TypeConstraint<int32>("index_type") \ 208 .HostMemory("dims"), \ 209 FillOp<D##Device, TYPE, int32>); \ 210 REGISTER_KERNEL_BUILDER(Name("Fill") \ 211 .Device(DEVICE_##D) \ 212 .TypeConstraint<TYPE>("T") \ 213 .TypeConstraint<int64>("index_type") \ 214 .HostMemory("dims"), \ 215 FillOp<D##Device, TYPE, int64>); 216 217 #define REGISTER_CPU_KERNEL(TYPE) REGISTER_KERNEL(CPU, TYPE) 218 TF_CALL_ALL_TYPES(REGISTER_CPU_KERNEL); 219 // TODO(b/28917570): Add a test for this. Currently python 3 is not happy about 220 // the conversion from uint8 to quint8. 221 REGISTER_KERNEL(CPU, quint8); 222 REGISTER_KERNEL(CPU, quint16); 223 #undef REGISTER_CPU_KERNEL 224 225 #ifdef TENSORFLOW_USE_SYCL 226 REGISTER_KERNEL(SYCL, float); 227 REGISTER_KERNEL(SYCL, double); 228 REGISTER_KERNEL(SYCL, uint8); 229 REGISTER_KERNEL(SYCL, int8); 230 REGISTER_KERNEL(SYCL, uint16); 231 REGISTER_KERNEL(SYCL, int16); 232 REGISTER_KERNEL(SYCL, int64); 233 234 REGISTER_KERNEL_BUILDER(Name("Fill") 235 .Device(DEVICE_SYCL) 236 .TypeConstraint<int32>("T") 237 .TypeConstraint<int32>("index_type") 238 .HostMemory("dims") 239 .HostMemory("value") 240 .HostMemory("output"), 241 FillOp<CPUDevice, int32, int32>); 242 #undef REGISTER_KERNEL_SYCL 243 #endif // TENSORFLOW_USE_SYCL 244 245 #if GOOGLE_CUDA 246 REGISTER_KERNEL(GPU, Eigen::half); 247 REGISTER_KERNEL(GPU, bfloat16); 248 REGISTER_KERNEL(GPU, float); 249 REGISTER_KERNEL(GPU, double); 250 REGISTER_KERNEL(GPU, uint8); 251 REGISTER_KERNEL(GPU, int8); 252 REGISTER_KERNEL(GPU, uint16); 253 REGISTER_KERNEL(GPU, int16); 254 REGISTER_KERNEL(GPU, int64); 255 REGISTER_KERNEL(GPU, bool); 256 // Currently we do not support filling strings and complex64 on GPU 257 258 // A special GPU kernel for int32. 259 // TODO(b/25387198): Also enable int32 in device memory. This kernel 260 // registration requires all int32 inputs and outputs to be in host memory. 261 REGISTER_KERNEL_BUILDER(Name("Fill") 262 .Device(DEVICE_GPU) 263 .TypeConstraint<int32>("T") 264 .TypeConstraint<int32>("index_type") 265 .HostMemory("dims") 266 .HostMemory("value") 267 .HostMemory("output"), 268 FillOp<CPUDevice, int32, int32>); 269 #endif 270 271 #undef REGISTER_KERNEL 272 273 template <typename Device, typename T> 274 class ZerosLikeOp : public OpKernel { 275 public: 276 explicit ZerosLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} 277 278 void Compute(OpKernelContext* ctx) override { 279 const Tensor& input = ctx->input(0); 280 const Device& d = ctx->eigen_device<Device>(); 281 if (std::is_same<T, Variant>::value) { 282 OP_REQUIRES( 283 ctx, input.dims() == 0, 284 errors::InvalidArgument("ZerosLike non-scalar Tensor with " 285 "dtype=DT_VARIANT is not supported.")); 286 const Variant& v = input.scalar<Variant>()(); 287 Tensor out(cpu_allocator(), DT_VARIANT, TensorShape({})); 288 Variant* out_v = &(out.scalar<Variant>()()); 289 OP_REQUIRES_OK(ctx, UnaryOpVariant<Device>( 290 ctx, ZEROS_LIKE_VARIANT_UNARY_OP, v, out_v)); 291 ctx->set_output(0, out); 292 } else { 293 Tensor* out = nullptr; 294 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output( 295 {0}, 0, input.shape(), &out)); 296 functor::SetZeroFunctor<Device, T> f; 297 f(d, out->flat<T>()); 298 } 299 } 300 }; 301 302 #define REGISTER_KERNEL(type, dev) \ 303 REGISTER_KERNEL_BUILDER( \ 304 Name("ZerosLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \ 305 ZerosLikeOp<dev##Device, type>) 306 307 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU) 308 TF_CALL_POD_STRING_TYPES(REGISTER_CPU); 309 REGISTER_CPU(Variant); 310 #undef REGISTER_CPU 311 312 #ifdef TENSORFLOW_USE_SYCL 313 REGISTER_KERNEL(bool, SYCL); 314 REGISTER_KERNEL(float, SYCL); 315 REGISTER_KERNEL(double, SYCL); 316 REGISTER_KERNEL(int64, SYCL); 317 REGISTER_KERNEL_BUILDER(Name("ZerosLike") 318 .Device(DEVICE_SYCL) 319 .TypeConstraint<int32>("T") 320 .HostMemory("y"), 321 ZerosLikeOp<CPUDevice, int32>); 322 #endif // TENSORFLOW_USE_SYCL 323 324 #if GOOGLE_CUDA 325 REGISTER_KERNEL(bool, GPU); 326 REGISTER_KERNEL(Eigen::half, GPU); 327 REGISTER_KERNEL(bfloat16, GPU); 328 REGISTER_KERNEL(float, GPU); 329 REGISTER_KERNEL(double, GPU); 330 REGISTER_KERNEL(complex64, GPU); 331 REGISTER_KERNEL(complex128, GPU); 332 REGISTER_KERNEL(int64, GPU); 333 REGISTER_KERNEL(Variant, GPU); 334 REGISTER_KERNEL_BUILDER(Name("ZerosLike") 335 .Device(DEVICE_GPU) 336 .TypeConstraint<int32>("T") 337 .HostMemory("y"), 338 ZerosLikeOp<CPUDevice, int32>); 339 #endif // GOOGLE_CUDA 340 341 #undef REGISTER_KERNEL 342 343 template <typename Device, typename T> 344 class OnesLikeOp : public OpKernel { 345 public: 346 explicit OnesLikeOp(OpKernelConstruction* ctx) : OpKernel(ctx) {} 347 348 void Compute(OpKernelContext* ctx) override { 349 const Tensor& input = ctx->input(0); 350 Tensor* out = nullptr; 351 OP_REQUIRES_OK(ctx, ctx->forward_input_or_allocate_output( 352 {0}, 0, input.shape(), &out)); 353 functor::SetOneFunctor<Device, T> f; 354 f(ctx->eigen_device<Device>(), out->flat<T>()); 355 } 356 }; 357 358 #define REGISTER_KERNEL(type, dev) \ 359 REGISTER_KERNEL_BUILDER( \ 360 Name("OnesLike").Device(DEVICE_##dev).TypeConstraint<type>("T"), \ 361 OnesLikeOp<dev##Device, type>) 362 363 #define REGISTER_CPU(type) REGISTER_KERNEL(type, CPU) 364 TF_CALL_POD_TYPES(REGISTER_CPU); 365 #undef REGISTER_CPU 366 367 #ifdef TENSORFLOW_USE_SYCL 368 REGISTER_KERNEL(float, SYCL); 369 REGISTER_KERNEL(bool, SYCL); 370 REGISTER_KERNEL_BUILDER(Name("OnesLike") 371 .Device(DEVICE_SYCL) 372 .TypeConstraint<int32>("T") 373 .HostMemory("y"), 374 OnesLikeOp<CPUDevice, int32>); 375 #endif // TENSORFLOW_USE_SYCL 376 377 #if GOOGLE_CUDA 378 REGISTER_KERNEL(bool, GPU); 379 REGISTER_KERNEL(Eigen::half, GPU); 380 REGISTER_KERNEL(bfloat16, GPU); 381 REGISTER_KERNEL(float, GPU); 382 REGISTER_KERNEL(double, GPU); 383 REGISTER_KERNEL(complex64, GPU); 384 REGISTER_KERNEL(complex128, GPU); 385 REGISTER_KERNEL(int64, GPU); 386 REGISTER_KERNEL_BUILDER(Name("OnesLike") 387 .Device(DEVICE_GPU) 388 .TypeConstraint<int32>("T") 389 .HostMemory("y"), 390 OnesLikeOp<CPUDevice, int32>); 391 #endif // GOOGLE_CUDA 392 393 #undef REGISTER_KERNEL 394 395 PlaceholderOp::PlaceholderOp(OpKernelConstruction* ctx) : OpKernel(ctx) { 396 OP_REQUIRES_OK(ctx, ctx->GetAttr("shape", &expected_shape_)); 397 } 398 399 void PlaceholderOp::Compute(OpKernelContext* ctx) { 400 if (expected_shape_.dims() > 0) { 401 OP_REQUIRES(ctx, false, 402 errors::InvalidArgument( 403 "You must feed a value for placeholder tensor '", name(), 404 "' with dtype ", DataTypeString(output_type(0)), 405 " and shape ", expected_shape_.DebugString())); 406 } else { 407 OP_REQUIRES(ctx, false, 408 errors::InvalidArgument( 409 "You must feed a value for placeholder tensor '", name(), 410 "' with dtype ", DataTypeString(output_type(0)))); 411 } 412 } 413 414 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_CPU), PlaceholderOp); 415 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_CPU), 416 PlaceholderOp); 417 // The following GPU kernel registration is used to address the situation that 418 // a placeholder is added in a GPU device context and soft placement is false. 419 // Since a placeholder should never be executed, adding these GPU kernels has 420 // no effect on graph execution. 421 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_GPU), PlaceholderOp); 422 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_GPU), 423 PlaceholderOp); 424 425 #if TENSORFLOW_USE_SYCL 426 REGISTER_KERNEL_BUILDER(Name("Placeholder").Device(DEVICE_SYCL), PlaceholderOp); 427 REGISTER_KERNEL_BUILDER(Name("PlaceholderV2").Device(DEVICE_SYCL), 428 PlaceholderOp); 429 #endif // TENSORFLOW_USE_SYCL 430 } // namespace tensorflow 431