Home | History | Annotate | Download | only in debug
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/debug/debug_gateway.h"
     17 
     18 #include <utility>
     19 
     20 #include "tensorflow/core/common_runtime/device_factory.h"
     21 #include "tensorflow/core/common_runtime/session_factory.h"
     22 #include "tensorflow/core/framework/tensor.h"
     23 
     24 namespace tensorflow {
     25 
     26 DebugGateway::DebugGateway(DirectSession* session) : session_(session) {
     27   session_->node_outputs_callback_ =
     28       [this](const string& node_name, const int output_slot,
     29              const Tensor* tensor, const bool is_ref, OpKernelContext* ctx) {
     30         if (comp_cb_ != nullptr && output_slot <= 0) {
     31           // The node completion callback is invoked once for a node regardless
     32           // of whether the node has zero, one or more outputs.
     33           // The output_slot can be negative (-1, or kControlSlot) if
     34           // node_outputs_callback_ is invoked for a node with no output. If
     35           // that is the case, notify the callback that the node in question has
     36           // no output.
     37           comp_cb_(node_name, output_slot == 0);
     38         }
     39 
     40         // Copy tensor values (e.g., from GPU to host) only if the
     41         // value callback is not nullptr.
     42         if (val_cb_ != nullptr && output_slot >= 0) {
     43           CopyTensor(node_name, output_slot, tensor, ctx,
     44                      [this, node_name, output_slot,
     45                       is_ref](const Tensor* copied_tensor) {
     46                        val_cb_(node_name, output_slot, *copied_tensor, is_ref);
     47                      });
     48         }
     49 
     50         return Status::OK();
     51       };
     52 }
     53 
     54 DebugGateway::~DebugGateway() {
     55   if (session_ != nullptr) {
     56     session_->node_outputs_callback_ = nullptr;
     57   }
     58 }
     59 
     60 void DebugGateway::SetNodeCompletionCallback(NodeCompletionCallback callback) {
     61   comp_cb_ = std::move(callback);
     62 }
     63 
     64 void DebugGateway::SetNodeValueCallback(NodeValueCallback callback) {
     65   val_cb_ = std::move(callback);
     66 }
     67 
     68 void DebugGateway::CopyTensor(const string& node_name, const int output_slot,
     69                               const Tensor* src_tensor, OpKernelContext* ctx,
     70                               CopyDoneCallback copy_done_cb) {
     71   Device* device = static_cast<Device*>(ctx->device());
     72 
     73   // Determine if the tensor is initialized properly.
     74   // The second part of the check is necessary because in some cases, a
     75   // tensor can pass the IsInitialized() check, but the dtype is not set,
     76   // e.g., tf.FIFOQueue.
     77   if (src_tensor->IsInitialized() && DataTypeSize(src_tensor->dtype()) > 0) {
     78     // Tensor is initialized.
     79 
     80     string tensor_tag = strings::StrCat(node_name, ":", output_slot);
     81 
     82     // Create copied tensor on host
     83     Allocator* cpu_allocator = tensorflow::cpu_allocator();
     84     Tensor cpu_tensor(cpu_allocator, src_tensor->dtype(), src_tensor->shape());
     85 
     86     // Determine if the tensor is on device (GPU) or host (CPU).
     87     // The second part of the check is necessary because even an OpKernel on
     88     // may have output tensors allocated on CPU.
     89     if ((device->name().find("GPU:") != string::npos ||
     90          device->name().find("SYCL:") != string::npos) &&
     91         !ctx->output_alloc_attr(output_slot).on_host()) {
     92       // GPU tensors: Copy it to host (CPU).
     93       DeviceContext* device_ctxt = ctx->op_device_context();
     94 
     95       // Copy device (e.g., GPU) tensor to host and when done, invoke the
     96       // callback.
     97       device_ctxt->CopyDeviceTensorToCPU(
     98           src_tensor, "TensorCopy", device, &cpu_tensor,
     99           [node_name, cpu_tensor, copy_done_cb](const Status& s) {
    100             if (s.ok()) {
    101               copy_done_cb(&cpu_tensor);
    102             } else {
    103               LOG(ERROR) << "Copying of device Tensor " << node_name
    104                          << " to CPU for debugging failed.";
    105             }
    106           });
    107     } else {
    108       // For CPU tensors, copy the source tensor and own the copy, because the
    109       // value callback may outlive the life time of the tensor and the tensor
    110       // may shared the underlying buffer with other tensors.
    111       cpu_tensor.UnsafeCopyFromInternal(*src_tensor, src_tensor->dtype(),
    112                                         src_tensor->shape());
    113 
    114       copy_done_cb(&cpu_tensor);
    115     }
    116   } else {
    117     // Tensor is not initialized: No need to copy.
    118     copy_done_cb(src_tensor);
    119   }
    120 }
    121 
    122 }  // namespace tensorflow
    123