Home | History | Annotate | Download | only in hexagon
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 vcyou may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/core/kernels/hexagon/hexagon_control_wrapper.h"
     17 
     18 #include "tensorflow/core/framework/tensor_shape.pb.h"
     19 #include "tensorflow/core/kernels/hexagon/hexagon_ops_definitions.h"
     20 #include "tensorflow/core/kernels/hexagon/soc_interface.h"
     21 #include "tensorflow/core/platform/profile_utils/cpu_utils.h"
     22 
     23 namespace tensorflow {
     24 
     25 constexpr const char* const OUTPUT_OP_NAME = "OUTPUT";
     26 constexpr const char* const REMOTE_FUSED_GRAPH_NODE_NAME_PREFIX =
     27     "hexagon_remote_fused_graph";
     28 /* static */ constexpr const char* const
     29     HexagonControlWrapper::REMOTE_FUSED_GRAPH_EXECUTOR_NAME;
     30 
     31 constexpr int ALIGNMENT_BYTES = 16;
     32 constexpr int MAX_IN_OUT_COUNT = 128;
     33 
     34 const bool DBG_DUMP_VERIFICATION_STRING = false;
     35 const int DBG_LEVEL = 0;  // -2: verbose, -1: debug, 0: info
     36 const bool DBG_USE_DUMMY_INPUT = false;
     37 const bool DBG_USE_SAMPLE_INPUT = false;
     38 const int64 FLAG_ENABLE_PANDA_BINARY_INPUT = 0x01;
     39 const bool DBG_DUMP_INPUT_TENSOR_AS_FLOAT_DATA = false;
     40 
     41 static string AddPort(const string& node_name) {
     42   if (node_name.find(':') != string::npos) {
     43     return node_name;
     44   } else {
     45     return strings::StrCat(node_name, ":", 0);
     46   }
     47 }
     48 
     49 static uint8* FindAlignedPointer(uint8* ptr) {
     50   const uintptr_t data_ptr_int = reinterpret_cast<uintptr_t>(ptr);
     51   const int shift_count =
     52       (ALIGNMENT_BYTES - data_ptr_int % ALIGNMENT_BYTES) % ALIGNMENT_BYTES;
     53   uint8* data_ptr = ptr + shift_count;
     54   return data_ptr;
     55 }
     56 
     57 /* static */ GraphTransferInfo::NodeInfo* HexagonControlWrapper::FindNodeInfo(
     58     const string& name, GraphTransferInfo* graph_transfer_info) {
     59   for (GraphTransferInfo::NodeInfo& node_info :
     60        *graph_transfer_info->mutable_node_info()) {
     61     if (node_info.name() == name) {
     62       return &node_info;
     63     }
     64   }
     65   return nullptr;
     66 }
     67 
     68 int HexagonControlWrapper::GetVersion() {
     69   return soc_interface_GetSocControllerVersion();
     70 }
     71 
     72 bool HexagonControlWrapper::Init(const RemoteFusedGraphExecuteInfo& info) {
     73   soc_interface_SetLogLevel(DBG_LEVEL);
     74   if (DBG_USE_SAMPLE_INPUT) {
     75     soc_interface_SetDebugFlag(FLAG_ENABLE_PANDA_BINARY_INPUT);
     76   }
     77   if (info.serialized_executor_parameters().empty()) {
     78     std::vector<std::pair<string, Tensor>> inputs;
     79     std::vector<string> outputs;
     80     RemoteFusedGraphExecuteUtils::BuildRemoteGraphInputsAndOutputsFromProto(
     81         info, &inputs, &outputs);
     82     Status status = graph_transferer_.LoadGraphFromProto(
     83         HexagonOpsDefinitions::getInstance(), info.remote_graph(), inputs,
     84         outputs,
     85         false  // shape_inference_for_unknown_shape
     86     );
     87     TF_CHECK_OK(status) << status;
     88   } else {
     89     // If graph transfer info is attached, just import it.
     90     graph_transferer_.SetSerializedGraphTransferInfo(
     91         info.serialized_executor_parameters());
     92   }
     93   execute_info_ = &info;
     94   bool success = soc_interface_Init();
     95   if (!success) {
     96     LOG(ERROR) << "Hexagon initialization was failed.  See log output.";
     97     return false;
     98   }
     99   std::vector<int> input_sizes;
    100   std::vector<int> output_sizes;
    101   CHECK_NOTNULL(execute_info_);
    102   for (int i = 0; i < execute_info_->graph_input_node_name_size(); ++i) {
    103     const string& input = execute_info_->graph_input_node_name(i);
    104     LOG(INFO) << "Add input: " << input << ", " << i;
    105     CHECK(input_port_map_.emplace(AddPort(input), i).second);
    106     const RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& shape_type =
    107         execute_info_->default_graph_input_tensor_shape(i);
    108     int64 buf_size = DataTypeSize(shape_type.dtype());
    109     for (const TensorShapeProto::Dim& dim : shape_type.shape().dim()) {
    110       buf_size *= dim.size();
    111     }
    112     input_sizes.emplace_back(static_cast<int>(buf_size));
    113   }
    114   for (int i = 0; i < execute_info_->graph_output_node_name_size(); ++i) {
    115     const string& output = execute_info_->graph_output_node_name(i);
    116     CHECK(output_port_map_.emplace(AddPort(output), i).second);
    117     const RemoteFusedGraphExecuteInfo::TensorShapeTypeProto& shape_type =
    118         execute_info_->default_graph_output_tensor_shape(i);
    119 
    120     int64 buf_size = DataTypeSize(shape_type.dtype());
    121     for (const TensorShapeProto::Dim& dim : shape_type.shape().dim()) {
    122       buf_size *= dim.size();
    123     }
    124     output_sizes.emplace_back(static_cast<int>(buf_size));
    125   }
    126 
    127   LOG(INFO) << "Allocate inout buffer";
    128   success &= soc_interface_AllocateInOutNodeBuffers(
    129       input_sizes.size(), input_sizes.data(), output_sizes.size(),
    130       output_sizes.data());
    131   return success;
    132 }
    133 
    134 bool HexagonControlWrapper::Finalize() { return soc_interface_Finalize(); }
    135 bool HexagonControlWrapper::SetupGraph() {
    136   // Copy graph transfer info to modify to adapt hexnn library
    137   GraphTransferInfo& graph_transfer_info =
    138       graph_transferer_.GetMutableGraphTransferInfo();
    139 
    140   // Overwrite op type of input nodes for hexagon
    141   for (const GraphTransferInfo::GraphInputNodeInfo& graph_input :
    142        graph_transfer_info.graph_input_node_info()) {
    143     GraphTransferInfo::NodeInfo* node_info =
    144         FindNodeInfo(graph_input.name(), &graph_transfer_info);
    145     CHECK_NE(node_info, nullptr);
    146   }
    147 
    148   // Generate a new output node which is connected to graph output node
    149   // TODO(satok): Support multiple output nodes
    150   CHECK_EQ(graph_transfer_info.graph_output_node_info_size(), 1);
    151   for (const GraphTransferInfo::GraphOutputNodeInfo& graph_output :
    152        graph_transfer_info.graph_output_node_info()) {
    153     const int new_output_node_id = graph_transfer_info.node_info_size() +
    154                                    graph_transfer_info.const_node_info_size() +
    155                                    2 /* offset for ids */;
    156     // Register a new output node
    157     GraphTransferInfo::NodeInfo& new_output_node_info =
    158         *graph_transfer_info.add_node_info();
    159     new_output_node_info.set_name(OUTPUT_OP_NAME);
    160     new_output_node_info.set_node_id(new_output_node_id);
    161     new_output_node_info.set_type_name(OUTPUT_OP_NAME);
    162     new_output_node_info.set_soc_op_id(
    163         HexagonOpsDefinitions::getInstance().GetOpIdFor(OUTPUT_OP_NAME, {}));
    164     new_output_node_info.set_padding_id(0 /* PADDING_NA_ID */);
    165     new_output_node_info.set_input_count(1);
    166     new_output_node_info.set_output_count(0);
    167 
    168     const TensorId tid = ParseTensorName(graph_output.name());
    169     const string node_name = tid.first.ToString();
    170     const int port = tid.second;
    171     // Register node input for the new output node
    172     const GraphTransferInfo::NodeInfo* node_info =
    173         FindNodeInfo(node_name, &graph_transfer_info);
    174     CHECK_NE(node_info, nullptr);
    175     GraphTransferInfo::NodeInputInfo& node_input_info =
    176         *graph_transfer_info.add_node_input_info();
    177     node_input_info.set_node_id(new_output_node_id);
    178     GraphTransferInfo::NodeInput& node_input =
    179         *node_input_info.add_node_input();
    180     node_input.set_node_id(node_info->node_id());
    181     node_input.set_output_port(port);
    182   }
    183 
    184   if (DBG_DUMP_VERIFICATION_STRING) {
    185     GraphTransferer gt;
    186     gt.SetSerializedGraphTransferInfo(graph_transfer_info.SerializeAsString());
    187     gt.DumpVerificationStringOfNodeTransferParams();
    188   }
    189 
    190   int inputs_count = 0;
    191   int outputs_count = 0;
    192   for (const GraphTransferInfo::NodeInputInfo& input_params :
    193        graph_transfer_info.node_input_info()) {
    194     inputs_count += input_params.node_input_size();
    195   }
    196 
    197   for (const GraphTransferInfo::NodeOutputInfo& output_params :
    198        graph_transfer_info.node_output_info()) {
    199     outputs_count += output_params.max_byte_size_size();
    200   }
    201   // Allocate memory for node inputs and node outputs
    202   soc_interface_AllocateNodeInputAndNodeOutputArray(inputs_count,
    203                                                     outputs_count);
    204 
    205   // Construct node input parameters
    206   std::unordered_map<int, std::tuple<void*, int>> inputs_map;
    207   for (const GraphTransferInfo::NodeInputInfo& input_params :
    208        graph_transfer_info.node_input_info()) {
    209     const int count = input_params.node_input_size();
    210     CHECK(count <= MAX_IN_OUT_COUNT);
    211     int node_ids[MAX_IN_OUT_COUNT];
    212     int ports[MAX_IN_OUT_COUNT];
    213     for (int i = 0; i < count; ++i) {
    214       const GraphTransferInfo::NodeInput& node_input =
    215           input_params.node_input(i);
    216       node_ids[i] = node_input.node_id() + NODE_ID_OFFSET;
    217       ports[i] = node_input.output_port();
    218     }
    219     void* inputs_ptr = soc_interface_SetOneNodeInputs(count, node_ids, ports);
    220     const int node_id = input_params.node_id();
    221     CHECK(inputs_map.count(node_id) == 0);
    222     inputs_map.emplace(node_id, std::make_tuple(inputs_ptr, count));
    223   }
    224 
    225   // Construct node output parameters
    226   std::unordered_map<int, std::tuple<void*, int>> outputs_map;
    227   for (const GraphTransferInfo::NodeOutputInfo& output_params :
    228        graph_transfer_info.node_output_info()) {
    229     const int count = output_params.max_byte_size_size();
    230     CHECK(count <= MAX_IN_OUT_COUNT);
    231     int sizes[MAX_IN_OUT_COUNT];
    232     for (int i = 0; i < count; ++i) {
    233       const int size = output_params.max_byte_size(i);
    234       sizes[i] = size;
    235     }
    236     void* outputs_ptr = soc_interface_SetOneNodeOutputs(count, sizes);
    237     const int node_id = output_params.node_id();
    238     CHECK(outputs_map.count(node_id) == 0);
    239     outputs_map.emplace(node_id, std::make_tuple(outputs_ptr, count));
    240   }
    241 
    242   // Instantiate graph
    243   soc_interface_InstantiateGraph();
    244 
    245   // Initialize graph
    246   // 1. Setup const nodes
    247   for (const GraphTransferInfo::ConstNodeInfo& params :
    248        graph_transfer_info.const_node_info()) {
    249     const int node_id = params.node_id();
    250     // TODO(satok): Stop assuming shape size is 4.
    251     CHECK(params.shape_size() == 4);
    252     const int64 shape_0 = params.shape(0);
    253     const int64 shape_1 = params.shape(1);
    254     const int64 shape_2 = params.shape(2);
    255     const int64 shape_3 = params.shape(3);
    256     const int data_size = params.data().length();
    257     CHECK(dummy_const_data_.count(node_id) == 0);
    258     auto data = dummy_const_data_.emplace(
    259         std::piecewise_construct, std::make_tuple(node_id), std::make_tuple());
    260     CHECK(data.second);
    261     data.first->second.resize(data_size + ALIGNMENT_BYTES - 1);
    262     uint8* data_ptr = FindAlignedPointer(data.first->second.data());
    263     std::memcpy(data_ptr, params.data().data(), data_size);
    264     soc_interface_AppendConstNode(params.name().c_str(),
    265                                   node_id + NODE_ID_OFFSET, shape_0, shape_1,
    266                                   shape_2, shape_3, data_ptr, data_size);
    267   }
    268 
    269   // 2. Setup op nodes
    270   for (const GraphTransferInfo::NodeInfo& params :
    271        graph_transfer_info.node_info()) {
    272     const int node_id = params.node_id();
    273     const int op_id = params.soc_op_id();
    274     CHECK(inputs_map.count(node_id) == 1);
    275     CHECK(outputs_map.count(node_id) <= 1);
    276     // Only output node doesn't have output
    277     const bool has_output = outputs_map.count(node_id) == 1;
    278     const auto& input_ptr_and_count = inputs_map.at(node_id);
    279     const void* input_ptr = std::get<0>(input_ptr_and_count);
    280     const int input_count = std::get<1>(input_ptr_and_count);
    281     void* output_ptr = nullptr;
    282     int output_count = 0;
    283     if (has_output) {
    284       const auto& output_ptr_and_count = outputs_map.at(node_id);
    285       output_ptr = std::get<0>(output_ptr_and_count);
    286       output_count = std::get<1>(output_ptr_and_count);
    287       // CHECK(output_count > 0);
    288     }
    289     int padding_id = -1;
    290     if (params.padding_id() == 0) {
    291       padding_id = 0;
    292     } else if (params.padding_id() == Padding::SAME) {
    293       padding_id = 1;
    294     } else if (params.padding_id() == Padding::VALID) {
    295       padding_id = 2;
    296     } else {
    297       LOG(FATAL);
    298     }
    299     soc_interface_AppendNode(params.name().c_str(), node_id + NODE_ID_OFFSET,
    300                              op_id, padding_id, input_ptr, input_count,
    301                              output_ptr, output_count);
    302   }
    303 
    304   LOG(INFO) << "Setup graph completed";
    305 
    306   // 3. construct graph
    307   return soc_interface_ConstructGraph();
    308 
    309   // Keep following comment to use dummy graph construction
    310   // return soc_interface_setupDummyGraph(3 /* inception version */);
    311 }
    312 
    313 bool HexagonControlWrapper::ExecuteGraph() {
    314   return soc_interface_ExecuteGraph();
    315 }
    316 
    317 bool HexagonControlWrapper::TeardownGraph() {
    318   soc_interface_ReleaseNodeInputAndNodeOutputArray();
    319   return soc_interface_TeardownGraph();
    320 }
    321 
    322 bool HexagonControlWrapper::FillInputNode(
    323     const string& node_name,
    324     const std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE>& shape,
    325     const ConstByteArray bytes) {
    326   const string tensor_name = AddPort(node_name);
    327   CHECK(input_port_map_.count(tensor_name) > 0);
    328   const int port = input_port_map_.at(tensor_name);
    329   if (input_tensor_data_.count(port) <= 0) {
    330     input_tensor_data_.emplace(port, std::vector<uint8>{});
    331   }
    332   std::vector<uint8>& input_tensor_data = input_tensor_data_.at(port);
    333 
    334   // hexagon only supports 32bit dimension
    335   const int x = static_cast<int>(shape[0]);
    336   const int y = static_cast<int>(shape[1]);
    337   const int z = static_cast<int>(shape[2]);
    338   const int d = static_cast<int>(shape[3]);
    339 
    340   const uint64 byte_size = x * y * z * d * DataTypeSize(std::get<2>(bytes));
    341   CHECK_EQ(byte_size, std::get<1>(bytes));
    342   input_tensor_data.resize(byte_size + ALIGNMENT_BYTES);
    343   uint8* data_ptr = FindAlignedPointer(input_tensor_data.data());
    344 
    345   if (DBG_USE_DUMMY_INPUT) {
    346     std::memset(data_ptr, 0, byte_size);
    347   } else {
    348     std::memcpy(data_ptr, std::get<0>(bytes), byte_size);
    349   }
    350 
    351   return soc_interface_FillInputNodeWithPort(port, x, y, z, d, data_ptr,
    352                                              byte_size);
    353 }
    354 
    355 bool HexagonControlWrapper::ReadOutputNode(
    356     const string& node_name, TensorAllocatorFunc tensor_allocator) {
    357   CHECK_NE(execute_info_, nullptr);
    358   TensorShape output_shape;
    359   // TODO(satok): Switch shape corresponding to input shape
    360   for (int i = 0; i < execute_info_->graph_output_node_name_size(); ++i) {
    361     if (execute_info_->graph_output_node_name(i) == node_name) {
    362       for (const TensorShapeProto::Dim& dim :
    363            execute_info_->default_graph_output_tensor_shape(i).shape().dim()) {
    364         output_shape.AddDim(dim.size());
    365       }
    366       break;
    367     }
    368   }
    369   std::vector<ByteArray> outputs;
    370   ReadOutputNode(node_name, &outputs);
    371   CHECK_EQ(1, outputs.size());
    372   ByteArray& output = outputs[0];
    373   Tensor* output_tensor = tensor_allocator(output_shape);
    374   CHECK(output_tensor->TotalBytes() >= std::get<1>(output))
    375       << output_tensor->TotalBytes() << ", " << std::get<1>(output);
    376   TF_CHECK_OK(RemoteFusedGraphExecuteUtils::CopyByteArrayToTensor(
    377       std::get<0>(output), std::get<1>(output), output_tensor));
    378   return true;
    379 }
    380 
    381 bool HexagonControlWrapper::ReadOutputNode(
    382     const string& node_name, std::vector<ByteArray>* const outputs) {
    383   CHECK(outputs != nullptr);
    384   ByteArray output;
    385   const string tensor_name = AddPort(node_name);
    386   CHECK(output_port_map_.count(tensor_name) > 0);
    387   const int port = output_port_map_.at(tensor_name);
    388   soc_interface_ReadOutputNodeWithPort(
    389       port, &std::get<0>(output),
    390       reinterpret_cast<uint64_t*>(&std::get<1>(output)));
    391   // TODO: Accept all results
    392   // std::get<2>(output) = DT_FLOAT;
    393   outputs->emplace_back(output);
    394   return true;
    395 }
    396 
    397 Status HexagonControlWrapper::FuseRemoteGraph(
    398     const GraphDef& original_graph_def, const std::vector<string>& inputs,
    399     const std::vector<string>& outputs, GraphDef* fused_graph_def) {
    400   const std::unordered_set<string> fused_node_names =
    401       RemoteFusedGraphExecuteUtils::BuildNodeMapFromOpsDefinitions(
    402           original_graph_def, HexagonOpsDefinitions::getInstance());
    403   // TODO(satok): We may want to place shape and type inside this function
    404   // if they are not placed in the given graph.
    405   TF_RETURN_IF_ERROR(RemoteFusedGraphExecuteUtils::FuseRemoteGraphByNodeNames(
    406       original_graph_def, inputs, outputs, REMOTE_FUSED_GRAPH_NODE_NAME_PREFIX,
    407       fused_node_names, REMOTE_FUSED_GRAPH_EXECUTOR_NAME,
    408       /*require_shape_type=*/true, fused_graph_def));
    409   return Status::OK();
    410 }
    411 
    412 bool HexagonControlWrapper::FillInputNode(const string& node_name,
    413                                           const Tensor& tensor) {
    414   StringPiece tensor_data = tensor.tensor_data();
    415   const ConstByteArray ba =
    416       ConstByteArray(reinterpret_cast<const uint8*>(tensor_data.data()),
    417                      tensor_data.size(), tensor.dtype());
    418   if (DBG_DUMP_INPUT_TENSOR_AS_FLOAT_DATA) {
    419     LOG(INFO) << "Input tensor data: element size = " << tensor.NumElements()
    420               << ", byte syze = " << tensor.TotalBytes();
    421     std::stringstream line;
    422     for (int i = 0; i < tensor.NumElements(); ++i) {
    423       line << tensor.flat<float>().data()[i] << ", ";
    424       if ((i - 2) % 3 == 0 || i == tensor.NumElements() - 1) {
    425         LOG(INFO) << "(" << ((i - 2) / 3) << ") " << line.str();
    426         line.str("");
    427         line.clear();
    428       }
    429     }
    430   }
    431   const std::array<int64, GraphTransferer::SHAPE_ARRAY_SIZE> shape =
    432       GraphTransferer::ToTensorShapeArray(tensor.shape());
    433   FillInputNode(node_name, shape, ba);
    434   return true;
    435 }
    436 
    437 bool HexagonControlWrapper::IsEnabled() const { return true; };
    438 }  // namespace tensorflow
    439