1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // to demonstrate the performance difference between ION and HLOS memory 17 // for sharing with ADSP. 18 #define USE_ION_MEMORY 19 20 #include "hexagon_controller.h" 21 22 #include <malloc.h> 23 #include <stdio.h> 24 25 #include "adspmsgd.h" 26 #include "dspCV.h" 27 #include "node_data_float.h" 28 #include "rpcmem.h" // helper API's for shared buffer allocation 29 #include "soc_interface.h" 30 #include "tfm_log.h" 31 32 // if false, use int data as input. This is only for acceleration purpose. 33 // Also you may need to change android.min. 34 static const bool USE_FLOAT_DATA = true; 35 36 // if true, show id for each node 37 static const bool DBG_SHOW_ID = false; 38 39 static const uint32_t OUTPUT_PARAM_MAX_LINE_SIZE = 1000; 40 41 static const uint32_t PRINT_BUFSIZE = 2 * 1024 * 1024; 42 43 // extern pre-generated inception dummy data 44 extern uint8_t inception_dummy_int_data_224x224[]; 45 extern uint8_t inception_dummy_int_data_299x299[]; 46 extern float inception_dummy_float_data_299x299[]; 47 48 #define HEXAGON_CONTROLLER_VERSION 101 49 50 // allocate print bufsize in advance @MB 51 #define PRINT_BUFSIZE (2 * 1024 * 1024) 52 53 static unsigned char s_print_buf[PRINT_BUFSIZE]; 54 55 #define MAX_INPUTS 10 56 #define MAX_OUTPUTS 10 57 58 static struct NodeDataFloat s_input_node_data_buffer[MAX_INPUTS]; 59 static uint8_t* s_output_node_data_buffer[MAX_OUTPUTS]; 60 static int s_output_node_data_buffer_max_byte_size[MAX_OUTPUTS]; 61 static int s_output_node_data_array_byte_size[MAX_OUTPUTS]; 62 static uint32_t s_target_graph_id; 63 64 static bool s_dbg_use_inception_dummy_data = false; 65 static int s_dbg_inception_version = 3; 66 67 static int GetInputNodeCount() { 68 for (int i = 0; i < MAX_INPUTS; ++i) { 69 if (s_input_node_data_buffer[i].max_buf_byte_size == 0) { 70 return i; 71 } 72 } 73 return 0; 74 } 75 76 static int GetOutputNodeCount() { 77 for (int i = 0; i < MAX_OUTPUTS; ++i) { 78 if (s_output_node_data_buffer_max_byte_size[i] == 0) { 79 return i; 80 } 81 } 82 return 0; 83 } 84 85 static bool SetInputTensorDef(int port, hexagon_nn_tensordef* tensordef) { 86 if (port >= GetInputNodeCount()) { 87 TFMLOGE("Error exceeds input count."); 88 return false; 89 } 90 struct NodeDataFloat* input_node_data_buffer = 91 &s_input_node_data_buffer[port]; 92 tensordef->batches = input_node_data_buffer->x; 93 tensordef->height = input_node_data_buffer->y; 94 tensordef->width = input_node_data_buffer->z; 95 tensordef->depth = input_node_data_buffer->d; 96 tensordef->data = input_node_data_buffer->byte_array_data; 97 tensordef->dataLen = input_node_data_buffer->array_byte_size; 98 99 return true; 100 } 101 102 bool hexagon_controller_SetAllInputTensorDef(int node_count, 103 hexagon_nn_tensordef* tensordef) { 104 bool success = true; 105 if (node_count != GetInputNodeCount()) { 106 TFMLOGE("Error invalid input node count."); 107 return false; 108 } 109 for (int i = 0; i < node_count; ++i) { 110 SetInputTensorDef(i, &tensordef[i]); 111 } 112 return success; 113 } 114 115 static bool SetOutputTensorDef(int port, hexagon_nn_tensordef* tensordef) { 116 if (port >= GetOutputNodeCount()) { 117 TFMLOGE("Error exceeds output count."); 118 return false; 119 } 120 tensordef->data = s_output_node_data_buffer[port]; 121 tensordef->dataLen = s_output_node_data_buffer_max_byte_size[port]; 122 return true; 123 } 124 125 bool hexagon_controller_SetAllOutputTensorDef(int node_count, 126 hexagon_nn_tensordef* tensordef) { 127 bool success = true; 128 if (node_count != GetOutputNodeCount()) { 129 TFMLOGE("Error invalid output node count. %d != %d", node_count, 130 GetOutputNodeCount()); 131 return false; 132 } 133 for (int i = 0; i < node_count; ++i) { 134 SetOutputTensorDef(i, &tensordef[i]); 135 } 136 return success; 137 } 138 139 void hexagon_controller_InitInputNodeDataToInceptionDummyData(int version) { 140 if (version == 1) { 141 if (USE_FLOAT_DATA) { 142 TFMLOGE("ERROR!!!! Do not use float data for v1"); 143 return; 144 } 145 hexagon_controller_CopyByteNodeData( 146 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V1, 147 INCEPTION_PARAM_WIDTH_V1, INCEPTION_PARAM_DEPTH, 1, 148 inception_dummy_int_data_224x224); 149 } else if (version == 3) { 150 if (USE_FLOAT_DATA) { 151 hexagon_controller_CopyByteNodeData( 152 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, 153 INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, sizeof(float), 154 (uint8_t*)inception_dummy_float_data_299x299); 155 } else { 156 hexagon_controller_CopyByteNodeData( 157 0, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, 158 INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, 1, 159 inception_dummy_int_data_299x299); 160 } 161 } 162 } 163 164 bool hexagon_controller_ExecuteGraphWithBuffer(uint32_t nn_id, 165 bool show_ranking) { 166 const int input_node_count = GetInputNodeCount(); 167 hexagon_nn_tensordef inputs[input_node_count]; 168 const int output_node_count = GetOutputNodeCount(); 169 if (output_node_count <= 0) { 170 TFMLOGI("Error output node count is 0."); 171 return false; 172 } 173 hexagon_nn_tensordef outputs[output_node_count]; 174 hexagon_controller_SetAllInputTensorDef(input_node_count, inputs); 175 hexagon_controller_SetAllOutputTensorDef(output_node_count, outputs); 176 const bool success = hexagon_controller_ExecuteGraphWithMultipleInOut( 177 nn_id, input_node_count, inputs, output_node_count, outputs); 178 for (int i = 0; i < output_node_count; ++i) { 179 s_output_node_data_array_byte_size[i] = outputs[i].data_valid_len; 180 } 181 182 const hexagon_nn_tensordef* output0 = &outputs[0]; 183 184 const uint32_t out_batches = output0->batches; 185 const uint32_t out_height = output0->height; 186 const uint32_t out_width = output0->width; 187 const uint32_t out_depth = output0->depth; 188 const uint32_t out_data_size = output0->data_valid_len; 189 const uint32_t out_buf_byte_size = output0->dataLen; 190 191 if (!success) { 192 TFMLOGE("Execution failed"); 193 DumpNNId(nn_id); 194 return false; 195 } else if (!show_ranking) { 196 return true; 197 } 198 199 static const int OUT_RANKING_SIZE = 5; 200 int out_ranking[OUT_RANKING_SIZE]; 201 hexagon_controller_PrintMaxNIdx( 202 (float*)s_output_node_data_buffer[0], 203 out_batches * out_height * out_width * out_depth, OUT_RANKING_SIZE, 204 out_ranking); 205 TFMLOGD("%d x %d x %d x %d, byte size = %d, buf size = %d\n", out_batches, 206 out_height, out_width, out_depth, out_data_size, out_buf_byte_size); 207 if (s_dbg_use_inception_dummy_data) { 208 // Check the result of inception with a dummy data. This step shouldn't 209 // be passed when show_ranking != true to avoid adding unnecessary 210 // additional computation cost. 211 if (out_ranking[0] == 169 && out_ranking[1] == 7) { 212 TFMLOGD("Result is correct! %d, %d", out_ranking[0], out_ranking[1]); 213 return true; 214 } else { 215 TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]); 216 return false; 217 } 218 } 219 return true; 220 } 221 222 uint32_t hexagon_controller_GetTargetGraphId() { return s_target_graph_id; } 223 224 void hexagon_controller_SetTargetGraphId(uint32_t graph_id) { 225 s_target_graph_id = graph_id; 226 } 227 228 void hexagon_controller_PrintGraph(uint32_t id) { 229 int retval = hexagon_nn_snpprint(id, s_print_buf, PRINT_BUFSIZE); 230 TFMLOGD("PrintGraph %s\n", s_print_buf); 231 if (retval) { 232 TFMLOGE("Error on print graph\n"); 233 } 234 } 235 236 int hexagon_controller_GetWrapperVersion() { 237 return HEXAGON_CONTROLLER_VERSION; 238 } 239 240 int hexagon_controller_GetHexagonBinaryVersion() { 241 int retval = 0; 242 hexagon_nn_version(&retval); 243 return retval; 244 } 245 246 bool hexagon_controller_AllocateInputNodeDataBuffers(int port, 247 int input_buf_byte_size) { 248 TFMLOGD("Allocate memory for input node data. port = %d, size = %d", port, 249 input_buf_byte_size); 250 if (s_input_node_data_buffer[port].max_buf_byte_size != 0) { 251 TFMLOGE("ERROR! input buffer is already allocated!!"); 252 return false; 253 } else { 254 s_input_node_data_buffer[port].max_buf_byte_size = input_buf_byte_size; 255 posix_memalign((void**)&s_input_node_data_buffer[port].byte_array_data, 128, 256 input_buf_byte_size); 257 TFMLOGD("allocate input node data buffers done"); 258 } 259 return true; 260 } 261 262 bool hexagon_controller_AllocateOutputNodeDataBuffers( 263 int port, int output_buf_byte_size) { 264 TFMLOGD("Allocate memory for output node data. port = %d, size = %d", port, 265 output_buf_byte_size); 266 if (s_output_node_data_buffer_max_byte_size[port] != 0) { 267 TFMLOGE("ERROR! input buffer is already allocated!!"); 268 return false; 269 } else { 270 // s_output_node_data_buffer = malloc(output_size * sizeof(float)); 271 posix_memalign((void**)&s_output_node_data_buffer[port], 128, 272 output_buf_byte_size); 273 s_output_node_data_buffer_max_byte_size[port] = output_buf_byte_size; 274 s_output_node_data_array_byte_size[port] = 0; 275 TFMLOGD("allocate output node data buffers"); 276 } 277 return true; 278 } 279 280 bool hexagon_controller_AllocateMultipleNodeDataBuffers(int input_count, 281 int* input_sizes, 282 int output_count, 283 int* output_sizes) { 284 bool success = true; 285 for (int i = 0; i < input_count; ++i) { 286 success &= 287 hexagon_controller_AllocateInputNodeDataBuffers(i, input_sizes[i]); 288 } 289 for (int i = 0; i < output_count; ++i) { 290 success &= 291 hexagon_controller_AllocateOutputNodeDataBuffers(i, output_sizes[i]); 292 } 293 294 if (s_dbg_use_inception_dummy_data) { 295 hexagon_controller_InitInputNodeDataToInceptionDummyData( 296 s_dbg_inception_version); 297 } 298 return success; 299 } 300 301 bool hexagon_controller_AllocateNodeDataBuffers(int input_size, 302 int output_size) { 303 return hexagon_controller_AllocateMultipleNodeDataBuffers(1, &input_size, 1, 304 &output_size); 305 } 306 307 bool hexagon_controller_ReleaseInputNodeDataBuffersWithPort(int port) { 308 struct NodeDataFloat* input_node_data_buffer = 309 &s_input_node_data_buffer[port]; 310 if (input_node_data_buffer->max_buf_byte_size == 0) { 311 TFMLOGE("ERROR! input buffer has not been allocated yet!!"); 312 return false; 313 } else { 314 input_node_data_buffer->max_buf_byte_size = 0; 315 input_node_data_buffer->array_byte_size = 0; 316 free(input_node_data_buffer->byte_array_data); 317 } 318 return true; 319 } 320 321 bool hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(int port) { 322 if (s_output_node_data_buffer_max_byte_size[port] == 0) { 323 TFMLOGE("ERROR! output buffer has not been allocated yet!!"); 324 return false; 325 } else { 326 s_output_node_data_buffer_max_byte_size[port] = 0; 327 s_output_node_data_array_byte_size[port] = 0; 328 free(s_output_node_data_buffer[port]); 329 } 330 return true; 331 } 332 333 bool hexagon_controller_ReleaseNodeDataBuffers() { 334 bool success = true; 335 for (int i = 0; i < GetInputNodeCount(); ++i) { 336 success &= hexagon_controller_ReleaseInputNodeDataBuffersWithPort(i); 337 } 338 for (int i = 0; i < GetOutputNodeCount(); ++i) { 339 success &= hexagon_controller_ReleaseOutputNodeDataBuffersWithPort(i); 340 } 341 return success; 342 } 343 344 bool hexagon_controller_CopyByteNodeData(int port, int x, int y, int z, int d, 345 int type_byte_size, 346 uint8_t* array_data) { 347 int array_byte_size = x * y * z * d * type_byte_size; 348 TFMLOGD("--- %d, %d, %d, %d, %d, %d", x, y, z, d, type_byte_size, 349 array_byte_size); 350 struct NodeDataFloat* input_node_data_buffer = &s_input_node_data_buffer[0]; 351 352 if (input_node_data_buffer->max_buf_byte_size < array_byte_size) { 353 TFMLOGE("ERROR! input buffer size is too small! %d < %d", 354 input_node_data_buffer->max_buf_byte_size, array_byte_size); 355 return false; 356 } 357 memcpy(input_node_data_buffer->byte_array_data, array_data, array_byte_size); 358 input_node_data_buffer->array_byte_size = array_byte_size; 359 input_node_data_buffer->x = x; 360 input_node_data_buffer->y = y; 361 input_node_data_buffer->z = z; 362 input_node_data_buffer->d = d; 363 return true; 364 } 365 366 int hexagon_controller_InitHexagonWithMaxAttributes(int enable_dcvs, 367 int bus_usage, 368 int version) { 369 TFMLOGI("Init hexagon with max attributes (Controller version = %d)", 370 HEXAGON_CONTROLLER_VERSION); 371 const int MCPS = 1000; 372 const int MBPS = 12000; 373 374 adspmsgd_start(0, RPCMEM_HEAP_DEFAULT, 4096); 375 376 dspCV_Attribute attrib[] = { 377 // The below values will result in the maximum aDSP performance, 378 // at Turbo voltage. 379 // Slightly more MCPS than are available on current targets 380 {DSP_TOTAL_MCPS, MCPS}, 381 // drive the clock to MAX on known targets 382 {DSP_MCPS_PER_THREAD, MCPS / 2}, 383 // 12 GB/sec is slightly higher than the max realistic 384 // max BW on existing targets. 385 {PEAK_BUS_BANDWIDTH_MBPS, MBPS}, 386 // This app is non-real time, and constantly reading/writing memory 387 {BUS_USAGE_PERCENT, bus_usage}, 388 }; 389 int retval = 0; 390 if (!enable_dcvs) { 391 retval = hexagon_nn_disable_dcvs(); 392 if (retval) { 393 TFMLOGE("Failed to disable DSP DCVS: %x\n", retval); 394 } 395 } 396 397 retval = 398 dspCV_initQ6_with_attributes(attrib, sizeof(attrib) / sizeof(attrib[0])); 399 TFMLOGD("Return value from dspCV_initQ6() : %d\n", retval); 400 401 s_target_graph_id = 0; 402 s_dbg_inception_version = version; 403 404 return retval; 405 } 406 407 int hexagon_controller_DeInitHexagon() { 408 adspmsgd_stop(); 409 TFMLOGI("Finalize hexagon"); 410 const int retval = dspCV_deinitQ6(); 411 TFMLOGD("return value from dspCV_deinitQ6(): %d \n", retval); 412 413 hexagon_controller_ReleaseNodeDataBuffers(); 414 415 return retval; 416 } 417 418 void hexagon_controller_GrowMemorySize() { hexagon_nn_config(); } 419 420 struct NodeDataFloat* hexagon_controller_GetInputNodeDataBuffer(int port) { 421 if (port >= GetInputNodeCount()) { 422 TFMLOGE("port should be less than 1"); 423 } 424 return &s_input_node_data_buffer[port]; 425 } 426 427 uint8_t* hexagon_controller_GetOutputNodeDataBuffer(int port, 428 int* out_array_byte_size) { 429 if (port >= GetOutputNodeCount()) { 430 TFMLOGE("port should be less than 1"); 431 } 432 *out_array_byte_size = s_output_node_data_array_byte_size[port]; 433 return s_output_node_data_buffer[port]; 434 } 435 436 // Append const node to the graph 437 int hexagon_controller_AppendConstNode(const char* const name, int graph_id, 438 int node_id, int batch, int height, 439 int width, int depth, 440 const uint8_t* const data, 441 int data_length) { 442 if (DBG_SHOW_ID) { 443 TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d, %d", name, node_id, batch, 444 height, width, depth, data_length); 445 } else { 446 TFMLOGV("---(CONST) %s, %d, %d, %d, %d, %d", name, batch, height, width, 447 depth, data_length); 448 } 449 const int retval = hexagon_nn_append_const_node( 450 graph_id, node_id, batch, height, width, depth, data, data_length); 451 if (retval != 0) { 452 TFMLOGE("Failed to append const node %d", node_id); 453 return retval; 454 } 455 return retval; 456 } 457 458 // Append node to the graph 459 int hexagon_controller_AppendNode(const char* const name, int graph_id, 460 int node_id, int ops_id, int padding_id, 461 const hexagon_nn_input* const inputs, 462 int inputs_count, 463 const hexagon_nn_output* const outputs, 464 int outputs_count) { 465 char input_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE]; 466 memset(input_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE); 467 int pos = 0; 468 pos += snprintf(&input_param_buf[pos], 500, "in: "); 469 for (int i = 0; i < inputs_count; ++i) { 470 if (DBG_SHOW_ID) { 471 pos += snprintf(&input_param_buf[pos], 500, "(%d, %d), ", 472 inputs[i].src_id, inputs[i].output_idx); 473 } else { 474 pos += 475 snprintf(&input_param_buf[pos], 500, "(%d), ", inputs[i].output_idx); 476 } 477 } 478 479 char output_param_buf[OUTPUT_PARAM_MAX_LINE_SIZE]; 480 memset(output_param_buf, 0, OUTPUT_PARAM_MAX_LINE_SIZE); 481 pos = 0; 482 pos += snprintf(&output_param_buf[pos], 500, "out: "); 483 for (int i = 0; i < outputs_count; ++i) { 484 pos += snprintf(&output_param_buf[pos], 500, "(%d), ", outputs[i].max_size); 485 } 486 487 if (DBG_SHOW_ID) { 488 TFMLOGV("---(OP) %s, %d, %d, %d, %d, %d, %s, %s", name, node_id, ops_id, 489 padding_id, inputs_count, outputs_count, input_param_buf, 490 output_param_buf); 491 } else { 492 TFMLOGV("---(OP) %s, %d, %d, %d, %d, %s, %s", name, ops_id, padding_id, 493 inputs_count, outputs_count, input_param_buf, output_param_buf); 494 } 495 const int retval = 496 hexagon_nn_append_node(graph_id, node_id, ops_id, padding_id, inputs, 497 inputs_count, outputs, outputs_count); 498 if (retval != 0) { 499 TFMLOGE("Failed to append const node %d", node_id); 500 return retval; 501 } 502 return retval; 503 } 504 505 void hexagon_controller_EnableDbgUseInceptionDummyData(bool enable) { 506 s_dbg_use_inception_dummy_data = enable; 507 } 508 509 bool hexagon_controller_IsDbgUseInceptionDummyDataEnabled() { 510 return s_dbg_use_inception_dummy_data; 511 } 512