Home | History | Annotate | Download | only in src_impl
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // to demonstrate the performance difference between ION and HLOS memory
     17 // for sharing with ADSP.
     18 #define USE_ION_MEMORY
     19 
     20 #include "hexagon_controller.h"
     21 
     22 #include <malloc.h>
     23 #include <stdio.h>
     24 
     25 #include "adspmsgd.h"
     26 #include "dspCV.h"
     27 #include "node_data_float.h"
     28 #include "rpcmem.h"  // helper API's for shared buffer allocation
     29 #include "soc_interface.h"
     30 #include "tfm_log.h"
     31 
     32 // if false, use int data as input.  This is only for acceleration purpose.
     33 // Also you may need to change android.min.
     34 static const bool USE_FLOAT_DATA = true;
     35 
     36 // if true, show id for each node
     37 static const bool DBG_SHOW_ID = false;
     38 
     39 static const uint32_t OUTPUT_PARAM_MAX_LINE_SIZE = 1000;
     40 
     41 static const uint32_t PRINT_BUFSIZE = 2 * 1024 * 1024;
     42 
     43 // extern pre-generated inception dummy data
     44 extern uint8_t inception_dummy_int_data_224x224[];
     45 extern uint8_t inception_dummy_int_data_299x299[];
     46 extern float inception_dummy_float_data_299x299[];
     47 
     48 #define HEXAGON_CONTROLLER_VERSION 101
     49 
     50 // allocate print bufsize in advance @MB
     51 #define PRINT_BUFSIZE (2 * 1024 * 1024)
     52 
     53 static unsigned char s_print_buf[PRINT_BUFSIZE];
     54 
     55 #define MAX_INPUTS 10
     56 #define MAX_OUTPUTS 10
     57 
     58 static struct NodeDataFloat s_input_node_data_buffer[MAX_INPUTS];
     59 static uint8_t* s_output_node_data_buffer[MAX_OUTPUTS];
     60 static int s_output_node_data_buffer_max_byte_size[MAX_OUTPUTS];
     61 static int s_output_node_data_array_byte_size[MAX_OUTPUTS];
     62 static uint32_t s_target_graph_id;
     63 
     64 static bool s_dbg_use_inception_dummy_data = false;
     65 static int s_dbg_inception_version = 3;
     66 
     67 static int GetInputNodeCount() {
     68   for (int i = 0; i < MAX_INPUTS; ++i) {
     69     if (s_input_node_data_buffer[i].max_buf_byte_size == 0) {
     70       return i;
     71     }
     72   }
     73   return 0;
     74 }
     75 
     76 static int GetOutputNodeCount() {
     77   for (int i = 0; i < MAX_OUTPUTS; ++i) {
     78     if (s_output_node_data_buffer_max_byte_size[i] == 0) {
     79       return i;
     80     }
     81   }
     82   return 0;
     83 }
     84 
     85 static bool SetInputTensorDef(int port, hexagon_nn_tensordef* tensordef) {
     86   if (port >= GetInputNodeCount()) {
     87     TFMLOGE("Error exceeds input count.");
     88     return false;
     89   }
     90   struct NodeDataFloat* input_node_data_buffer =
     91       &s_input_node_data_buffer[port];
     92   tensordef->batches = input_node_data_buffer->x;
     93   tensordef->height = input_node_data_buffer->y;
     94   tensordef->width = input_node_data_buffer->z;
     95   tensordef->depth = input_node_data_buffer->d;
     96   tensordef->data = input_node_data_buffer->byte_array_data;
     97   tensordef->dataLen = input_node_data_buffer->array_byte_size;
     98 
     99   return true;
    100 }
    101 
    102 bool hexagon_controller_SetAllInputTensorDef(int node_count,
    103                                              hexagon_nn_tensordef* tensordef) {
    104   bool success = true;
    105   if (node_count != GetInputNodeCount()) {
    106     TFMLOGE("Error invalid input node count.");
    107     return false;
    108   }
    109   for (int i = 0; i < node_count; ++i) {
    110     SetInputTensorDef(i, &tensordef[i]);
    111   }
    112   return success;
    113 }
    114 
    115 static bool SetOutputTensorDef(int port, hexagon_nn_tensordef* tensordef) {
    116   if (port >= GetOutputNodeCount()) {
    117     TFMLOGE("Error exceeds output count.");
    118     return false;
    119   }
    120   tensordef->data = s_output_node_data_buffer[port];
    121   tensordef->dataLen = s_output_node_data_buffer_max_byte_size[port];
    122   return true;
    123 }
    124 
    125 bool hexagon_controller_SetAllOutputTensorDef(int node_count,
    126                                               hexagon_nn_tensordef* tensordef) {
    127   bool success = true;
    128   if (node_count != GetOutputNodeCount()) {
    129     TFMLOGE("Error invalid output node count. %d != %d", node_count,
    130             GetOutputNodeCount());
    131     return false;
    132   }
    133   for (int i = 0; i < node_count; ++i) {
    134     SetOutputTensorDef(i, &tensordef[i]);
    135   }
    136   return success;
    137 }
    138 
    139 void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) {
    140   if (version == 1) {
    141     if (USE_FLOAT_DATA) {
    142       TFMLOGE("ERROR!!!! Do not use float data for v1");
    143       return;
    144     }
    145     hexagon_controller_CopyByteNodeData(
    146         0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1,
    147         INCEPTION_PARAM_WIDTH_V1, INCEPTION_PARAM_DEPTH, 1,
    148         inception_dummy_int_data_224x224);
    149   } else if (version == 3) {
    150     if (USE_FLOAT_DATA) {
    151       hexagon_controller_CopyByteNodeData(
    152           0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
    153           INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, sizeof(float),
    154           (uint8_t*)inception_dummy_float_data_299x299);
    155     } else {
    156       hexagon_controller_CopyByteNodeData(
    157           0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3,
    158           INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, 1,
    159           inception_dummy_int_data_299x299);
    160     }
    161   }
    162 }
    163 
    164 bool hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id,
    165                                                bool show_ranking) {
    166   const int input_node_count = GetInputNodeCount();
    167   hexagon_nn_tensordef inputs[input_node_count];
    168   const int output_node_count = GetOutputNodeCount();
    169   if (output_node_count <= 0) {
    170     TFMLOGI("Error output node count is 0.");
    171     return false;
    172   }
    173   hexagon_nn_tensordef outputs[output_node_count];
    174   hexagon_controller_SetAllInputTensorDef(input_node_count, inputs);
    175   hexagon_controller_SetAllOutputTensorDef(output_node_count, outputs);
    176   const bool success = hexagon_controller_ExecuteGraphWithMultipleInOut(
    177       nn_id, input_node_count, inputs, output_node_count, outputs);
    178   for (int i = 0; i < output_node_count; ++i) {
    179     s_output_node_data_array_byte_size[i] = outputs[i].data_valid_len;
    180   }
    181 
    182   const hexagon_nn_tensordef* output0 = &outputs[0];
    183 
    184   const uint32_t out_batches = output0->batches;
    185   const uint32_t out_height = output0->height;
    186   const uint32_t out_width = output0->width;
    187   const uint32_t out_depth = output0->depth;
    188   const uint32_t out_data_size = output0->data_valid_len;
    189   const uint32_t out_buf_byte_size = output0->dataLen;
    190 
    191   if (!success) {
    192     TFMLOGE("Execution failed");
    193     DumpNNId(nn_id);
    194     return false;
    195   } else if (!show_ranking) {
    196     return true;
    197   }
    198 
    199   static const int OUT_RANKING_SIZE = 5;
    200   int out_ranking[OUT_RANKING_SIZE];
    201   hexagon_controller_PrintMaxNIdx(
    202       (float*)s_output_node_data_buffer[0],
    203       out_batches * out_height * out_width * out_depth, OUT_RANKING_SIZE,
    204       out_ranking);
    205   TFMLOGD("%d x %d x %d x %d, byte size = %d, buf size = %d\n", out_batches,
    206           out_height, out_width, out_depth, out_data_size, out_buf_byte_size);
    207   if (s_dbg_use_inception_dummy_data) {
    208     // Check the result of inception with a dummy data. This step shouldn't
    209     // be passed when show_ranking != true to avoid adding unnecessary
    210     // additional computation cost.
    211     if (out_ranking[0] == 169 && out_ranking[1] == 7) {
    212       TFMLOGD("Result is correct! %d, %d", out_ranking[0], out_ranking[1]);
    213       return true;
    214     } else {
    215       TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]);
    216       return false;
    217     }
    218   }
    219   return true;
    220 }
    221 
    222 uint32_t hexagon_controller_GetTargetGraphId() { return s_target_graph_id; }
    223 
    224 void hexagon_controller_SetTargetGraphId(uint32_t graph_id) {
    225   s_target_graph_id = graph_id;
    226 }
    227 
    228 void hexagon_controller_PrintGraph(uint32_t id) {
    229   int retval = hexagon_nn_snpprint(id, s_print_buf, PRINT_BUFSIZE);
    230   TFMLOGD("PrintGraph %s\n", s_print_buf);
    231   if (retval) {
    232     TFMLOGE("Error on print graph\n");
    233   }
    234 }
    235 
    236 int hexagon_controller_GetWrapperVersion() {
    237   return HEXAGON_CONTROLLER_VERSION;
    238 }
    239 
    240 int hexagon_controller_GetHexagonBinaryVersion() {
    241   int retval = 0;
    242   hexagon_nn_version(&retval);
    243   return retval;
    244 }
    245 
    246 bool hexagon_controller_AllocateInputNodeDataBuffers(int port,
    247                                                      int input_buf_byte_size) {
    248   TFMLOGD("Allocate memory for input node data. port = %d, size = %d", port,
    249           input_buf_byte_size);
    250   if (s_input_node_data_buffer[port].max_buf_byte_size != 0) {
    251     TFMLOGE("ERROR! input buffer is already allocated!!");
    252     return false;
    253   } else {
    254     s_input_node_data_buffer[port].max_buf_byte_size = input_buf_byte_size;
    255     posix_memalign((void**)&s_input_node_data_buffer[port].byte_array_data, 128,
    256                    input_buf_byte_size);
    257     TFMLOGD("allocate input node data buffers done");
    258   }
    259   return true;
    260 }
    261 
    262 bool hexagon_controller_AllocateOutputNodeDataBuffers(
    263     int port, int output_buf_byte_size) {
    264   TFMLOGD("Allocate memory for output node data. port = %d, size = %d", port,
    265           output_buf_byte_size);
    266   if (s_output_node_data_buffer_max_byte_size[port] != 0) {
    267     TFMLOGE("ERROR! input buffer is already allocated!!");
    268     return false;
    269   } else {
    270     // s_output_node_data_buffer = malloc(output_size * sizeof(float));
    271     posix_memalign((void**)&s_output_node_data_buffer[port], 128,
    272                    output_buf_byte_size);
    273     s_output_node_data_buffer_max_byte_size[port] = output_buf_byte_size;
    274     s_output_node_data_array_byte_size[port] = 0;
    275     TFMLOGD("allocate output node data buffers");
    276   }
    277   return true;
    278 }
    279 
    280 bool hexagon_controller_AllocateMultipleNodeDataBuffers(int input_count,
    281                                                         int* input_sizes,
    282                                                         int output_count,
    283                                                         int* output_sizes) {
    284   bool success = true;
    285   for (int i = 0; i < input_count; ++i) {
    286     success &=
    287         hexagon_controller_AllocateInputNodeDataBuffers(i, input_sizes[i]);
    288   }
    289   for (int i = 0; i < output_count; ++i) {
    290     success &=
    291         hexagon_controller_AllocateOutputNodeDataBuffers(i, output_sizes[i]);
    292   }
    293 
    294   if (s_dbg_use_inception_dummy_data) {
    295     hexagon_controller_InitInputNodeDataToInceptionDummyData(
    296         s_dbg_inception_version);
    297   }
    298   return success;
    299 }
    300 
    301 bool hexagon_controller_AllocateNodeDataBuffers(int input_size,
    302                                                 int output_size) {
    303   return hexagon_controller_AllocateMultipleNodeDataBuffers(1, &input_size, 1,
    304                                                             &output_size);
    305 }
    306 
    307 bool hexagon_controller_ReleaseInputNodeDataBuffersWithPort(int port) {
    308   struct NodeDataFloat* input_node_data_buffer =
    309       &s_input_node_data_buffer[port];
    310   if (input_node_data_buffer->max_buf_byte_size == 0) {
    311     TFMLOGE("ERROR! input buffer has not been allocated yet!!");
    312     return false;
    313   } else {
    314     input_node_data_buffer->max_buf_byte_size = 0;
    315     input_node_data_buffer->array_byte_size = 0;
    316     free(input_node_data_buffer->byte_array_data);
    317   }
    318   return true;
    319 }
    320 
    321 bool hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(int port) {
    322   if (s_output_node_data_buffer_max_byte_size[port] == 0) {
    323     TFMLOGE("ERROR! output buffer has not been allocated yet!!");
    324     return false;
    325   } else {
    326     s_output_node_data_buffer_max_byte_size[port] = 0;
    327     s_output_node_data_array_byte_size[port] = 0;
    328     free(s_output_node_data_buffer[port]);
    329   }
    330   return true;
    331 }
    332 
    333 bool hexagon_controller_ReleaseNodeDataBuffers() {
    334   bool success = true;
    335   for (int i = 0; i < GetInputNodeCount(); ++i) {
    336     success &= hexagon_controller_ReleaseInputNodeDataBuffersWithPort(i);
    337   }
    338   for (int i = 0; i < GetOutputNodeCount(); ++i) {
    339     success &= hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(i);
    340   }
    341   return success;
    342 }
    343 
    344 bool hexagon_controller_CopyByteNodeData(int port, int x, int y, int z, int d,
    345                                          int type_byte_size,
    346                                          uint8_t* array_data) {
    347   int array_byte_size = x * y * z * d * type_byte_size;
    348   TFMLOGD("--- %d, %d, %d, %d, %d, %d", x, y, z, d, type_byte_size,
    349           array_byte_size);
    350   struct NodeDataFloat* input_node_data_buffer = &s_input_node_data_buffer[0];
    351 
    352   if (input_node_data_buffer->max_buf_byte_size < array_byte_size) {
    353     TFMLOGE("ERROR! input buffer size is too small! %d < %d",
    354             input_node_data_buffer->max_buf_byte_size, array_byte_size);
    355     return false;
    356   }
    357   memcpy(input_node_data_buffer->byte_array_data, array_data, array_byte_size);
    358   input_node_data_buffer->array_byte_size = array_byte_size;
    359   input_node_data_buffer->x = x;
    360   input_node_data_buffer->y = y;
    361   input_node_data_buffer->z = z;
    362   input_node_data_buffer->d = d;
    363   return true;
    364 }
    365 
    366 int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs,
    367                                                     int bus_usage,
    368                                                     int version) {
    369   TFMLOGI("Init hexagon with max attributes (Controller version = %d)",
    370           HEXAGON_CONTROLLER_VERSION);
    371   const int MCPS = 1000;
    372   const int MBPS = 12000;
    373 
    374   adspmsgd_start(0, RPCMEM_HEAP_DEFAULT, 4096);
    375 
    376   dspCV_Attribute attrib[] = {
    377       // The below values will result in the maximum aDSP performance,
    378       // at Turbo voltage.
    379       // Slightly more MCPS than are available on current targets
    380       {DSP_TOTAL_MCPS, MCPS},
    381       // drive the clock to MAX on known targets
    382       {DSP_MCPS_PER_THREAD, MCPS / 2},
    383       // 12 GB/sec is slightly higher than the max realistic
    384       // max BW on existing targets.
    385       {PEAK_BUS_BANDWIDTH_MBPS, MBPS},
    386       // This app is non-real time, and constantly reading/writing memory
    387       {BUS_USAGE_PERCENT, bus_usage},
    388   };
    389   int retval = 0;
    390   if (!enable_dcvs) {
    391     retval = hexagon_nn_disable_dcvs();
    392     if (retval) {
    393       TFMLOGE("Failed to disable DSP DCVS: %x\n", retval);
    394     }
    395   }
    396 
    397   retval =
    398       dspCV_initQ6_with_attributes(attrib, sizeof(attrib) / sizeof(attrib[0]));
    399   TFMLOGD("Return value from dspCV_initQ6() : %d\n", retval);
    400 
    401   s_target_graph_id = 0;
    402   s_dbg_inception_version = version;
    403 
    404   return retval;
    405 }
    406 
    407 int hexagon_controller_DeInitHexagon() {
    408   adspmsgd_stop();
    409   TFMLOGI("Finalize hexagon");
    410   const int retval = dspCV_deinitQ6();
    411   TFMLOGD("return value from dspCV_deinitQ6(): %d \n", retval);
    412 
    413   hexagon_controller_ReleaseNodeDataBuffers();
    414 
    415   return retval;
    416 }
    417 
    418 void hexagon_controller_GrowMemorySize() { hexagon_nn_config(); }
    419 
    420 struct NodeDataFloat* hexagon_controller_GetInputNodeDataBuffer(int port) {
    421   if (port >= GetInputNodeCount()) {
    422     TFMLOGE("port should be less than 1");
    423   }
    424   return &s_input_node_data_buffer[port];
    425 }
    426 
    427 uint8_t* hexagon_controller_GetOutputNodeDataBuffer(int port,
    428                                                     int* out_array_byte_size) {
    429   if (port >= GetOutputNodeCount()) {
    430     TFMLOGE("port should be less than 1");
    431   }
    432   *out_array_byte_size = s_output_node_data_array_byte_size[port];
    433   return s_output_node_data_buffer[port];
    434 }
    435 
    436 // Append const node to the graph
    437 int hexagon_controller_AppendConstNode(const char* const name, int graph_id,
    438                                        int node_id, int batch, int height,
    439                                        int width, int depth,
    440                                        const uint8_t* const data,
    441                                        int data_length) {
    442   if (DBG_SHOW_ID) {
    443     TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d, %d", name, node_id, batch,
    444             height, width, depth, data_length);
    445   } else {
    446     TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d", name, batch, height, width,
    447             depth, data_length);
    448   }
    449   const int retval = hexagon_nn_append_const_node(
    450       graph_id, node_id, batch, height, width, depth, data, data_length);
    451   if (retval != 0) {
    452     TFMLOGE("Failed to append const node %d", node_id);
    453     return retval;
    454   }
    455   return retval;
    456 }
    457 
    458 // Append node to the graph
    459 int hexagon_controller_AppendNode(const char* const name, int graph_id,
    460                                   int node_id, int ops_id, int padding_id,
    461                                   const hexagon_nn_input* const inputs,
    462                                   int inputs_count,
    463                                   const hexagon_nn_output* const outputs,
    464                                   int outputs_count) {
    465   char input_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE];
    466   memset(input_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE);
    467   int pos = 0;
    468   pos += snprintf(&input_param_buf[pos], 500, "in: ");
    469   for (int i = 0; i < inputs_count; ++i) {
    470     if (DBG_SHOW_ID) {
    471       pos += snprintf(&input_param_buf[pos], 500, "(%d, %d), ",
    472                       inputs[i].src_id, inputs[i].output_idx);
    473     } else {
    474       pos +=
    475           snprintf(&input_param_buf[pos], 500, "(%d), ", inputs[i].output_idx);
    476     }
    477   }
    478 
    479   char output_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE];
    480   memset(output_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE);
    481   pos = 0;
    482   pos += snprintf(&output_param_buf[pos], 500, "out: ");
    483   for (int i = 0; i < outputs_count; ++i) {
    484     pos += snprintf(&output_param_buf[pos], 500, "(%d), ", outputs[i].max_size);
    485   }
    486 
    487   if (DBG_SHOW_ID) {
    488     TFMLOGV("---(OP) %s, %d, %d, %d, %d, %d, %s, %s", name, node_id, ops_id,
    489             padding_id, inputs_count, outputs_count, input_param_buf,
    490             output_param_buf);
    491   } else {
    492     TFMLOGV("---(OP) %s, %d, %d, %d, %d, %s, %s", name, ops_id, padding_id,
    493             inputs_count, outputs_count, input_param_buf, output_param_buf);
    494   }
    495   const int retval =
    496       hexagon_nn_append_node(graph_id, node_id, ops_id, padding_id, inputs,
    497                              inputs_count, outputs, outputs_count);
    498   if (retval != 0) {
    499     TFMLOGE("Failed to append const node %d", node_id);
    500     return retval;
    501   }
    502   return retval;
    503 }
    504 
    505 void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable) {
    506   s_dbg_use_inception_dummy_data = enable;
    507 }
    508 
    509 bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled() {
    510   return s_dbg_use_inception_dummy_data;
    511 }
    512