Home | History | Annotate | Download | only in kernels
      1 /* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/common_runtime/dma_helper.h"
     17 #include "tensorflow/core/common_runtime/scoped_allocator.h"
     18 #include "tensorflow/core/common_runtime/scoped_allocator_mgr.h"
     19 #include "tensorflow/core/framework/allocator.h"
     20 #include "tensorflow/core/framework/op_kernel.h"
     21 #include "tensorflow/core/framework/tensor.h"
     22 #include "tensorflow/core/lib/core/errors.h"
     23 #include "tensorflow/core/lib/core/status.h"
     24 
     25 namespace tensorflow {
     26 
     27 class ScopedAllocatorOp : public OpKernel {
     28  public:
     29   explicit ScopedAllocatorOp(OpKernelConstruction* context)
     30       : OpKernel(context) {
     31     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
     32     OP_REQUIRES_OK(context, context->GetAttr("shapes", &shapes_));
     33     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
     34     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
     35     OP_REQUIRES_OK(context, context->GetAttr("expected_call_count",
     36                                              &expected_call_count_));
     37     device_ = context->device();
     38     // Precalculate the size of the backing tensor and the offsets of
     39     // the subtensors to be allocated from it, taking into account
     40     // alignment considerations.
     41     ScopedAllocatorMgr::PopulateFields(id_, shapes_, dtype_, &fields_);
     42     size_t num_bytes = fields_.back().offset + fields_.back().bytes;
     43     num_elements_ = num_bytes / DataTypeSize(dtype_);
     44     OP_REQUIRES(context, num_bytes % DataTypeSize(dtype_) == 0,
     45                 errors::InvalidArgument(
     46                     "Number of bytes ", num_bytes,
     47                     " must be divisible by size of datatype ", dtype_));
     48   }
     49 
     50   void Compute(OpKernelContext* context) override {
     51     ScopedAllocatorMgr* sam = device_->GetScopedAllocatorMgr();
     52     if (!sam) {
     53       context->SetStatus(errors::Internal(
     54           "ScopedAllocatorMgr not supported on device ", device_->name()));
     55       return;
     56     }
     57     Tensor* backing_tensor = nullptr;
     58     AllocatorAttributes attr = context->output_alloc_attr(0);
     59     Status s =
     60         context->allocate_output(0, {num_elements_}, &backing_tensor, attr);
     61     VLOG(1) << "_ScopedAllocatorOp new backing tensor size "
     62             << backing_tensor->TotalBytes() << " num_elements_ "
     63             << num_elements_ << " buffer " << DMAHelper::buffer(backing_tensor)
     64             << " base addr " << DMAHelper::base(backing_tensor);
     65     if (s.ok()) {
     66       s = sam->AddScopedAllocator(*backing_tensor, context->step_id(), id_,
     67                                   name_, fields_, expected_call_count_);
     68     }
     69     if (!s.ok()) {
     70       context->SetStatus(s);
     71     }
     72   }
     73 
     74  private:
     75   std::vector<TensorShape> shapes_;
     76   DataType dtype_;
     77   int64 num_elements_;
     78   std::vector<ScopedAllocator::Field> fields_;
     79   string name_;
     80   int32 id_;
     81   int32 expected_call_count_;
     82   DeviceBase* device_;
     83 };
     84 
     85 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_CPU),
     86                         ScopedAllocatorOp);
     87 
     88 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocator").Device(DEVICE_GPU),
     89                         ScopedAllocatorOp);
     90 
     91 class ScopedAllocatorConcatOp : public OpKernel {
     92  public:
     93   explicit ScopedAllocatorConcatOp(OpKernelConstruction* context)
     94       : OpKernel(context) {
     95     OP_REQUIRES_OK(context, context->GetAttr("shape", &shape_));
     96     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
     97     OP_REQUIRES_OK(context, context->GetAttr("reshape", &reshape_));
     98     // These attributes are just for debugging.
     99     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
    100     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
    101     device_ = context->device();
    102   }
    103 
    104   void Compute(OpKernelContext* context) override {
    105     const Tensor& backing_tensor = context->input(0);
    106     // Check that type matches.
    107     OP_REQUIRES(context, backing_tensor.dtype() == dtype_,
    108                 errors::InvalidArgument("Backing tensor type ",
    109                                         DataTypeString(backing_tensor.dtype()),
    110                                         " does not match expected type ",
    111                                         DataTypeString(dtype_)));
    112     // Check that backing tensor is at least as large as the shape of the
    113     // output.
    114     OP_REQUIRES(context, backing_tensor.NumElements() >= shape_.num_elements(),
    115                 errors::InvalidArgument("Backing tensor num elements ",
    116                                         backing_tensor.NumElements(),
    117                                         " is not >= to expected ",
    118                                         shape_.num_elements()));
    119     Tensor output(dtype_);
    120     if (reshape_) {
    121       CHECK(output.CopyFrom(backing_tensor, shape_));
    122     } else {
    123       CHECK(output.CopyFrom(backing_tensor, backing_tensor.shape()));
    124     }
    125     context->set_output(0, output);
    126     const TensorBuffer* backing_buf = DMAHelper::buffer(&output);
    127     const void* backing_tensor_lb = backing_buf->data();
    128     const void* backing_tensor_ub = static_cast<const void*>(
    129         static_cast<const char*>(backing_tensor_lb) + backing_buf->size());
    130     // Check that all inputs lie entirely within the backing tensor.
    131     for (int i = 1; i < context->num_inputs(); ++i) {
    132       const TensorBuffer* input_buf = DMAHelper::buffer(&context->input(i));
    133       const void* input_lb = input_buf->data();
    134       const void* input_ub = static_cast<const void*>(
    135           static_cast<const char*>(input_lb) + input_buf->size());
    136       OP_REQUIRES(
    137           context, input_lb >= backing_tensor_lb,
    138           errors::InvalidArgument(
    139               "Lower bound check fail for input ", i, " from node ",
    140               context->op_kernel().requested_input(i), " to node ",
    141               context->op_kernel().name(), " input bounds = [", input_lb, ", ",
    142               input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb,
    143               ", ", backing_tensor_ub, "]"));
    144       OP_REQUIRES(
    145           context, input_ub <= backing_tensor_ub,
    146           errors::InvalidArgument(
    147               "Upper bound check fail for input ", i, " from node ",
    148               context->op_kernel().requested_input(i), " to node ",
    149               context->op_kernel().name(), " input bounds = [", input_lb, ", ",
    150               input_ub, "]", " backing_tensor bounds = [", backing_tensor_lb,
    151               ", ", backing_tensor_ub, "]"));
    152     }
    153     VLOG(1) << "_ScopedAllocatorConcatOp outputting backing tensor at "
    154             << backing_buf;
    155   }
    156 
    157  private:
    158   TensorShape shape_;
    159   DataType dtype_;
    160   string name_;
    161   int32 id_;
    162   bool reshape_;
    163   DeviceBase* device_;
    164 };
    165 
    166 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_CPU),
    167                         ScopedAllocatorConcatOp);
    168 
    169 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorConcat").Device(DEVICE_GPU),
    170                         ScopedAllocatorConcatOp);
    171 
    172 class ScopedAllocatorSplitOp : public OpKernel {
    173  public:
    174   explicit ScopedAllocatorSplitOp(OpKernelConstruction* context)
    175       : OpKernel(context) {
    176     OP_REQUIRES_OK(context, context->GetAttr("T", &dtype_));
    177     // This stuff is just for debugging
    178     OP_REQUIRES_OK(context, context->GetAttr("sa_name", &name_));
    179     OP_REQUIRES_OK(context, context->GetAttr("id", &id_));
    180     device_ = context->device();
    181   }
    182 
    183   void Compute(OpKernelContext* context) override {
    184     Tensor backing_copy(context->input(0));
    185     // Check that type matches.
    186     OP_REQUIRES(context, backing_copy.dtype() == dtype_,
    187                 errors::InvalidArgument("Backing tensor type ",
    188                                         DataTypeString(backing_copy.dtype()),
    189                                         " does not match expected type ",
    190                                         DataTypeString(dtype_)));
    191     const TensorBuffer* backing_buf = DMAHelper::buffer(&backing_copy);
    192     const void* backing_tensor_lb = backing_buf->data();
    193     const void* backing_tensor_ub = static_cast<const void*>(
    194         static_cast<const char*>(backing_tensor_lb) + backing_buf->size());
    195     for (int i = 1; i < context->num_inputs(); ++i) {
    196       VLOG(1) << "_ScopedAllocatorSplitOp assigning input " << i
    197               << " to output " << i - 1 << " buf addr "
    198               << DMAHelper::base(&context->input(i));
    199       Tensor copy(context->input(i));
    200       OP_REQUIRES(context, copy.dtype() == dtype_,
    201                   errors::InvalidArgument("Input ", i, " tensor type ",
    202                                           DataTypeString(copy.dtype()),
    203                                           " does not match expected type ",
    204                                           DataTypeString(dtype_)));
    205       context->set_output(i - 1, copy);
    206       const TensorBuffer* input_buf = DMAHelper::buffer(&copy);
    207       const void* input_lb = input_buf->data();
    208       OP_REQUIRES(
    209           context, input_lb >= backing_tensor_lb,
    210           errors::InvalidArgument("Lower bound check fail for input ", i,
    211                                   " to node ", context->op_kernel().name()));
    212       const void* input_ub = static_cast<const void*>(
    213           static_cast<const char*>(input_lb) + input_buf->size());
    214       OP_REQUIRES(
    215           context, input_ub <= backing_tensor_ub,
    216           errors::InvalidArgument("Upper bound check fail for input ", i,
    217                                   " to node ", context->op_kernel().name()));
    218     }
    219   }
    220 
    221  private:
    222   DataType dtype_;
    223   string name_;
    224   int32 id_;
    225   DeviceBase* device_;
    226 };
    227 
    228 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_CPU),
    229                         ScopedAllocatorSplitOp);
    230 
    231 REGISTER_KERNEL_BUILDER(Name("_ScopedAllocatorSplit").Device(DEVICE_GPU),
    232                         ScopedAllocatorSplitOp);
    233 
    234 }  // namespace tensorflow
    235