1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/core/debug/debug_gateway.h" 17 18 #include <utility> 19 20 #include "tensorflow/core/common_runtime/device_factory.h" 21 #include "tensorflow/core/common_runtime/session_factory.h" 22 #include "tensorflow/core/framework/tensor.h" 23 24 namespace tensorflow { 25 26 DebugGateway::DebugGateway(DirectSession* session) : session_(session) { 27 session_->node_outputs_callback_ = 28 [this](const string& node_name, const int output_slot, 29 const Tensor* tensor, const bool is_ref, OpKernelContext* ctx) { 30 if (comp_cb_ != nullptr && output_slot <= 0) { 31 // The node completion callback is invoked once for a node regardless 32 // of whether the node has zero, one or more outputs. 33 // The output_slot can be negative (-1, or kControlSlot) if 34 // node_outputs_callback_ is invoked for a node with no output. If 35 // that is the case, notify the callback that the node in question has 36 // no output. 37 comp_cb_(node_name, output_slot == 0); 38 } 39 40 // Copy tensor values (e.g., from GPU to host) only if the 41 // value callback is not nullptr. 42 if (val_cb_ != nullptr && output_slot >= 0) { 43 CopyTensor(node_name, output_slot, tensor, ctx, 44 [this, node_name, output_slot, 45 is_ref](const Tensor* copied_tensor) { 46 val_cb_(node_name, output_slot, *copied_tensor, is_ref); 47 }); 48 } 49 50 return Status::OK(); 51 }; 52 } 53 54 DebugGateway::~DebugGateway() { 55 if (session_ != nullptr) { 56 session_->node_outputs_callback_ = nullptr; 57 } 58 } 59 60 void DebugGateway::SetNodeCompletionCallback(NodeCompletionCallback callback) { 61 comp_cb_ = std::move(callback); 62 } 63 64 void DebugGateway::SetNodeValueCallback(NodeValueCallback callback) { 65 val_cb_ = std::move(callback); 66 } 67 68 void DebugGateway::CopyTensor(const string& node_name, const int output_slot, 69 const Tensor* src_tensor, OpKernelContext* ctx, 70 CopyDoneCallback copy_done_cb) { 71 Device* device = static_cast<Device*>(ctx->device()); 72 73 // Determine if the tensor is initialized properly. 74 // The second part of the check is necessary because in some cases, a 75 // tensor can pass the IsInitialized() check, but the dtype is not set, 76 // e.g., tf.FIFOQueue. 77 if (src_tensor->IsInitialized() && DataTypeSize(src_tensor->dtype()) > 0) { 78 // Tensor is initialized. 79 80 string tensor_tag = strings::StrCat(node_name, ":", output_slot); 81 82 // Create copied tensor on host 83 Allocator* cpu_allocator = tensorflow::cpu_allocator(); 84 Tensor cpu_tensor(cpu_allocator, src_tensor->dtype(), src_tensor->shape()); 85 86 // Determine if the tensor is on device (GPU) or host (CPU). 87 // The second part of the check is necessary because even an OpKernel on 88 // may have output tensors allocated on CPU. 89 if ((device->name().find("GPU:") != string::npos || 90 device->name().find("SYCL:") != string::npos) && 91 !ctx->output_alloc_attr(output_slot).on_host()) { 92 // GPU tensors: Copy it to host (CPU). 93 DeviceContext* device_ctxt = ctx->op_device_context(); 94 95 // Copy device (e.g., GPU) tensor to host and when done, invoke the 96 // callback. 97 device_ctxt->CopyDeviceTensorToCPU( 98 src_tensor, "TensorCopy", device, &cpu_tensor, 99 [node_name, cpu_tensor, copy_done_cb](const Status& s) { 100 if (s.ok()) { 101 copy_done_cb(&cpu_tensor); 102 } else { 103 LOG(ERROR) << "Copying of device Tensor " << node_name 104 << " to CPU for debugging failed."; 105 } 106 }); 107 } else { 108 // For CPU tensors, copy the source tensor and own the copy, because the 109 // value callback may outlive the life time of the tensor and the tensor 110 // may shared the underlying buffer with other tensors. 111 cpu_tensor.UnsafeCopyFromInternal(*src_tensor, src_tensor->dtype(), 112 src_tensor->shape()); 113 114 copy_done_cb(&cpu_tensor); 115 } 116 } else { 117 // Tensor is not initialized: No need to copy. 118 copy_done_cb(src_tensor); 119 } 120 } 121 122 } // namespace tensorflow 123