1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 // to demonstrate the performance difference between ION and HLOS memory 17 // for sharing with ADSP. 18 #define USE_ION_MEMORY 19 20 #include <limits.h> 21 #include <stdio.h> 22 23 #include "hexagon_controller.h" 24 #include "hexagon_nn.h" 25 #include "tfm_log.h" 26 27 static const uint32_t MAX_NODES = 2048; 28 static const uint32_t MAX_EVENT_COUNT = 256; 29 30 static const bool DUMP_OUTPUT = false; 31 static const bool DBG_EXECUTION = true; 32 33 static const int OUT_RANKING_SIZE = 5; 34 35 // static only for this file. 36 // TODO(satok): allocate dynamically 37 static float s_output_values[300 * 300 * 3 * 4]; 38 39 extern void init_graph(uint32_t id); 40 extern void init_graph_v1(uint32_t id); 41 extern uint8_t inception_dummy_int_data_299x299[]; 42 extern uint8_t inception_dummy_int_data_224x224[]; 43 extern float inception_dummy_float_data_299x299[]; 44 45 enum InceptionVersion { 46 INCEPTION_V1, 47 INCEPTION_V3, 48 }; 49 50 static enum InceptionVersion s_inception_version = INCEPTION_V3; 51 52 ///////////////////////////////////////////////// 53 // file local functions 54 55 static const char* ConvertGraphInfoIdToName(unsigned int id) { 56 // TODO(satok): implement 57 return "?"; 58 } 59 60 static const char* ConvertGraphInfoIdToOpName(unsigned int id) { 61 // TODO(satok): implement 62 return "?"; 63 } 64 65 ///////////////////////////////////////////////// 66 // file local utilities 67 static uint32_t FindMaxIdxWithExcludeList(const float* data, uint32_t entries, 68 const int exclude_size, 69 const int* exclude_idx) { 70 int i; 71 float maxval = data[0]; 72 int maxidx = 0; 73 for (i = 0; i < entries; i++) { 74 bool exclude = false; 75 for (int j = 0; j < exclude_size; ++j) { 76 if (exclude_idx[j] == i) { 77 exclude = true; 78 break; 79 } 80 } 81 if (exclude) { 82 continue; 83 } 84 if (maxval < data[i]) { 85 maxval = data[i]; 86 maxidx = i; 87 } 88 } 89 return maxidx; 90 } 91 92 static uint32_t FindMaxIdx(const float* data, uint32_t entries) { 93 return FindMaxIdxWithExcludeList(data, entries, 0, NULL); 94 } 95 96 void hexagon_controller_PrintMaxNIdx(const float* data, const uint32_t entries, 97 const int n, int* out_ranking) { 98 if (DUMP_OUTPUT) { 99 for (int i = 0; i < entries; ++i) { 100 TFMLOGD("%d: val = %f", i, data[i]); 101 } 102 } 103 if (n >= entries) { 104 TFMLOGD("Too many N %d >= %d", n, entries); 105 } 106 for (int i = 0; i < n; ++i) { 107 out_ranking[i] = INT_MAX; 108 } 109 for (int i = 0; i < n; ++i) { 110 out_ranking[i] = FindMaxIdxWithExcludeList(data, entries, n, out_ranking); 111 } 112 TFMLOGD("=== RANKING ==="); 113 for (int i = 0; i < n; ++i) { 114 TFMLOGD("%d: id = %d, val = %f", i, out_ranking[i], data[out_ranking[i]]); 115 } 116 } 117 118 static inline unsigned long long int GetCounter(hexagon_nn_perfinfo s) { 119 unsigned long long int ret; 120 ret = s.counter_hi; 121 ret <<= 32; 122 ret |= s.counter_lo; 123 return ret; 124 } 125 126 static int CompareCycle(const void* va, const void* vb) { 127 const hexagon_nn_perfinfo* a = va; 128 const hexagon_nn_perfinfo* b = vb; 129 unsigned long long int acount = GetCounter(*a); 130 unsigned long long int bcount = GetCounter(*b); 131 if (acount < bcount) { 132 return -1; 133 } else if (acount > bcount) { 134 return 1; 135 } else { 136 return 0; 137 } 138 } 139 140 ///////////////////////////////////////////////// 141 // Graph functions 142 143 uint32_t hexagon_controller_InstantiateGraph() { 144 const uint32_t nn_id = hexagon_nn_init(); 145 // TODO(satok): make this as argument 146 hexagon_nn_set_debug_level(nn_id, 0); 147 return nn_id; 148 } 149 150 void hexagon_controller_InitGraph(int version, uint32_t nn_id) { 151 if (version == 1) { 152 s_inception_version = INCEPTION_V1; 153 } else if (version == 3) { 154 s_inception_version = INCEPTION_V3; 155 } else { 156 TFMLOGE("Unsupported inception version %d", version); 157 return; 158 } 159 if (s_inception_version == INCEPTION_V3) { 160 init_graph(nn_id); 161 } else if (s_inception_version == INCEPTION_V1) { 162 init_graph_v1(nn_id); 163 } 164 TFMLOGD("Init graph (inception version = %d) done.", version); 165 } 166 167 bool hexagon_controller_ConstructGraph(uint32_t nn_id) { 168 int err; 169 if ((err = hexagon_nn_prepare(nn_id)) != 0) { 170 TFMLOGE("Prepare failed! returned 0x%x\n", err); 171 DumpNNId(nn_id); 172 return false; 173 } else { 174 TFMLOGD("Prepare success!\n"); 175 return true; 176 } 177 } 178 179 uint32_t hexagon_controller_SetupGraph(int version) { 180 const uint32_t nn_id = hexagon_controller_InstantiateGraph(); 181 hexagon_controller_InitGraph(version, nn_id); 182 hexagon_controller_ConstructGraph(nn_id); 183 return nn_id; 184 } 185 186 bool hexagon_controller_ExecuteGraphWithMultipleInOut( 187 const uint32_t nn_id, const int input_count, hexagon_nn_tensordef* inputs, 188 const int output_count, hexagon_nn_tensordef* outputs) { 189 if (DBG_EXECUTION) { 190 TFMLOGD("Preparing to execute... in = %d, out = %d", input_count, 191 output_count); 192 LogDHexagon("Execute graph!"); 193 } 194 195 const int err = 196 hexagon_nn_execute_new(nn_id, inputs, input_count, outputs, output_count); 197 if (err != 0) { 198 if (DBG_EXECUTION) { 199 LogDHexagon("Execution failed!"); 200 TFMLOGE("execute got err: %d\n", err); 201 DumpNNId(nn_id); 202 } 203 return false; 204 } else { 205 if (DBG_EXECUTION) { 206 LogDHexagon("Execution succeeded!"); 207 } 208 return true; 209 } 210 } 211 212 bool hexagon_controller_ExecuteGraph( 213 const uint32_t nn_id, const uint32_t batches, const uint32_t height, 214 const uint32_t width, const uint32_t depth, uint8_t* int_data, 215 const uint32_t int_data_size, uint32_t* out_batches, uint32_t* out_height, 216 uint32_t* out_width, uint32_t* out_depth, uint8_t* out_vals, 217 const uint32_t output_val_byte_size, uint32_t* out_data_byte_size) { 218 if (DBG_EXECUTION) { 219 TFMLOGD("Preparing to execute..."); 220 TFMLOGD("Input: %d, %d, %d, %d, %d, %d", batches, height, width, depth, 221 int_data[0], int_data_size); 222 TFMLOGD("Output: %d, %p", output_val_byte_size, out_vals); 223 LogDHexagon("Execute graph!"); 224 } 225 226 hexagon_nn_tensordef input; 227 hexagon_nn_tensordef output; 228 229 input.batches = batches; 230 input.height = height; 231 input.width = width; 232 input.depth = depth; 233 input.data = int_data; 234 input.dataLen = int_data_size; 235 236 output.data = out_vals; 237 output.dataLen = output_val_byte_size; 238 239 if (!hexagon_controller_ExecuteGraphWithMultipleInOut(nn_id, 1, &input, 1, 240 &output)) { 241 return false; 242 } else { 243 *out_batches = output.batches; 244 *out_height = output.height; 245 *out_width = output.width; 246 *out_depth = output.depth; 247 *out_data_byte_size = output.dataLen; 248 249 if (DBG_EXECUTION) { 250 LogDHexagon("Execution succeeded!"); 251 TFMLOGD("%d x %d x %d x %d, byte size = %d\n", *out_batches, *out_height, 252 *out_width, *out_depth, *out_data_byte_size); 253 } 254 return true; 255 } 256 } 257 258 bool hexagon_controller_ExecuteInceptionDummyData(uint32_t nn_id) { 259 uint32_t out_batches, out_height, out_width, out_depth; 260 uint32_t out_data_size; 261 // s_output_values = 300 * 300 * 3 * 4 * 4 262 const bool success = hexagon_controller_ExecuteGraph( 263 nn_id, INCEPTION_PARAM_BATCHES, INCEPTION_PARAM_HEIGHT_V3, 264 INCEPTION_PARAM_WIDTH_V3, INCEPTION_PARAM_DEPTH, 265 (uint8_t*)inception_dummy_int_data_299x299, 266 INCEPTION_PARAM_HEIGHT_V3 * INCEPTION_PARAM_WIDTH_V3 * 267 INCEPTION_PARAM_DEPTH, 268 &out_batches, &out_height, &out_width, &out_depth, 269 (uint8_t*)s_output_values, sizeof(s_output_values), &out_data_size); 270 if (success) { 271 int out_ranking[OUT_RANKING_SIZE]; 272 hexagon_controller_PrintMaxNIdx( 273 s_output_values, out_batches * out_height * out_width * out_depth, 274 OUT_RANKING_SIZE, out_ranking); 275 TFMLOGD("%d x %d x %d x %d, size = %d\n", out_batches, out_height, 276 out_width, out_depth, out_data_size); 277 TFMLOGD("max idx: %d\n", 278 FindMaxIdx(s_output_values, 279 out_batches * out_height * out_width * out_depth)); 280 if (out_ranking[0] == 169 && out_ranking[1] == 7) { 281 return true; 282 } else { 283 TFMLOGD("Result is wrong! %d, %d", out_ranking[0], out_ranking[1]); 284 return false; 285 } 286 } else { 287 return false; 288 } 289 } 290 291 void hexagon_controller_DumpPerf(uint32_t nn_id) { 292 hexagon_nn_perfinfo info[MAX_NODES]; 293 unsigned long long int total_cycles = 0; 294 unsigned long long int cum_cycles = 0; 295 unsigned long long int counter = 0; 296 unsigned int n_nodes; 297 int i; 298 TFMLOGD("Perf dump follows:"); 299 if (hexagon_nn_get_perfinfo(nn_id, info, MAX_NODES, &n_nodes) != 0) { 300 TFMLOGE("perf info failure"); 301 return; 302 } 303 TFMLOGD("Total %d nodes.", n_nodes); 304 qsort(info, n_nodes, sizeof(info[0]), CompareCycle); 305 for (i = 0; i < n_nodes; i++) { 306 total_cycles += GetCounter(info[i]); 307 } 308 TFMLOGD("Total %lld cycles.", total_cycles); 309 for (i = 0; i < n_nodes; i++) { 310 counter = GetCounter(info[i]); 311 cum_cycles += counter; 312 TFMLOGD( 313 "node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%," 314 "cum_cycles,%lld,%f %%\n", 315 info[i].node_id, ConvertGraphInfoIdToName(info[i].node_id), 316 ConvertGraphInfoIdToOpName(info[i].node_id), info[i].executions, 317 counter, 100 * ((double)counter) / total_cycles, cum_cycles, 318 100 * ((double)cum_cycles) / total_cycles); 319 } 320 #ifdef ENABLE_HVX_FULL_DEBUG 321 DumpAllPerf(nn_id); 322 #endif 323 } 324 325 void hexagon_controller_DumpNodeName(uint32_t nn_id) { 326 TFMLOGD("Show node name"); 327 const uint32_t id = nn_id; 328 hexagon_nn_perfinfo info[MAX_NODES]; 329 unsigned long long int total_cycles = 0; 330 unsigned long long int cum_cycles = 0; 331 unsigned long long int counter = 0; 332 unsigned int node_count; 333 int i; 334 TFMLOGD("Perf dump follows:"); 335 if (hexagon_nn_get_perfinfo(id, info, MAX_NODES, &node_count) != 0) { 336 TFMLOGD("perf info failure"); 337 return; 338 } 339 TFMLOGD("Total %d nodes.", node_count); 340 qsort(info, node_count, sizeof(info[0]), CompareCycle); 341 for (i = 0; i < node_count; i++) { 342 total_cycles += GetCounter(info[i]); 343 } 344 TFMLOGD("Total %lld cycles.", total_cycles); 345 for (i = 0; i < node_count; i++) { 346 counter = GetCounter(info[i]); 347 cum_cycles += counter; 348 TFMLOGD( 349 "node,0x%x,%s,%s,executions,%d,cycles,%lld,%f %%," 350 "cum_cycles,%lld,%f %%", 351 info[i].node_id, ConvertGraphInfoIdToName(info[i].node_id), 352 ConvertGraphInfoIdToOpName(info[i].node_id), info[i].executions, 353 counter, 100 * ((double)counter) / total_cycles, cum_cycles, 354 100 * ((double)cum_cycles) / total_cycles); 355 } 356 } 357 358 void hexagon_controller_Teardown(uint32_t nn_id) { hexagon_nn_teardown(nn_id); } 359