1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 #include "tensorflow/lite/toco/tooling_util.h" 16 17 #include <functional> 18 #include <iterator> 19 #include <set> 20 #include <unordered_map> 21 #include <unordered_set> 22 #include <utility> 23 24 #include "absl/strings/ascii.h" 25 #include "absl/strings/str_cat.h" 26 #include "absl/strings/str_join.h" 27 #include "absl/strings/str_replace.h" 28 #include "absl/strings/str_split.h" 29 #include "re2/re2.h" 30 #include "tensorflow/core/lib/core/status.h" 31 #include "tensorflow/core/platform/logging.h" 32 #include "tensorflow/lite/toco/dump_graphviz.h" 33 #include "tensorflow/lite/toco/model_flags.pb.h" 34 #include "tensorflow/lite/toco/toco_graphviz_dump_options.h" 35 36 namespace toco { 37 38 // Find the longest common prefix of two strings. 39 absl::string_view FindLongestCommonPrefix(absl::string_view a, 40 absl::string_view b) { 41 if (a.empty() || b.empty()) return absl::string_view(); 42 43 const char* pa = a.data(); 44 const char* pb = b.data(); 45 size_t count = 0; 46 const size_t limit = std::min(a.size(), b.size()); 47 while (count < limit && *pa == *pb) { 48 ++pa; 49 ++pb; 50 ++count; 51 } 52 53 return absl::string_view(a.data(), count); 54 } 55 56 string LogName(const Operator& op) { 57 const string& opname = HelpfulOperatorTypeName(op); 58 if (op.outputs.empty()) { 59 return toco::port::StringF("{%s operator}", opname); 60 } else { 61 return toco::port::StringF("{%s operator with output %s}", opname, 62 op.outputs[0]); 63 } 64 } 65 66 string ArrayDataTypeName(ArrayDataType data_type) { 67 switch (data_type) { 68 case ArrayDataType::kFloat: 69 return "float"; 70 case ArrayDataType::kInt8: 71 return "int8"; 72 case ArrayDataType::kUint8: 73 return "uint8"; 74 case ArrayDataType::kInt16: 75 return "int16"; 76 case ArrayDataType::kUint16: 77 return "uint16"; 78 case ArrayDataType::kInt32: 79 return "int32"; 80 case ArrayDataType::kUint32: 81 return "uint32"; 82 case ArrayDataType::kInt64: 83 return "int64"; 84 case ArrayDataType::kUint64: 85 return "uint64"; 86 case ArrayDataType::kString: 87 return "string"; 88 case ArrayDataType::kBool: 89 return "bool"; 90 case ArrayDataType::kComplex64: 91 return "complex64"; 92 case ArrayDataType::kNone: 93 return "None"; 94 default: 95 LOG(FATAL) << "Unhandled array data type " << static_cast<int>(data_type); 96 } 97 } 98 99 bool IsInputArray(const Model& model, const string& array_name) { 100 for (const auto& input_array : model.flags.input_arrays()) { 101 if (array_name == input_array.name()) { 102 return true; 103 } 104 } 105 return false; 106 } 107 108 bool IsOutputArray(const Model& model, const string& array_name) { 109 for (const auto& output_array : model.flags.output_arrays()) { 110 if (array_name == output_array) { 111 return true; 112 } 113 } 114 return false; 115 } 116 117 bool IsArrayConsumed(const Model& model, const string& name) { 118 if (GetOpWithInput(model, name)) { 119 return true; 120 } 121 if (IsOutputArray(model, name)) { 122 return true; 123 } 124 for (const auto& rnn_state : model.flags.rnn_states()) { 125 if (rnn_state.back_edge_source_array() == name) { 126 return true; 127 } 128 } 129 return false; 130 } 131 132 int CountTrueOutputs(const Model& model, const Operator& op) { 133 int count = 0; 134 for (const string& output : op.outputs) { 135 if (IsArrayConsumed(model, output)) { 136 ++count; 137 } 138 } 139 return count; 140 } 141 142 int CountOpsWithInput(const Model& model, const string& array_name) { 143 int count = 0; 144 for (const auto& op : model.operators) { 145 for (auto& input : op->inputs) { 146 if (input == array_name) { 147 count++; 148 // Breaking here is important: some graphs have ops that use the 149 // same array as more than one of their inputs, and in that case 150 // we want it counted only once. 151 break; 152 } 153 } 154 } 155 return count; 156 } 157 158 bool DeleteArrayIfUnused(const string& array_name, Model* model) { 159 if (IsDiscardableArray(*model, array_name) && 160 CountOpsWithInput(*model, array_name) == 0) { 161 model->EraseArray(array_name); 162 return true; 163 } 164 return false; 165 } 166 167 bool DeleteArrayIfUsedOnce(const string& array_name, Model* model) { 168 if (IsDiscardableArray(*model, array_name) && 169 CountOpsWithInput(*model, array_name) == 1) { 170 model->EraseArray(array_name); 171 return true; 172 } 173 return false; 174 } 175 176 void DeleteOpAndArraysIfUnused(Model* model, const Operator* op) { 177 for (const string& array_name : op->inputs) { 178 DeleteArrayIfUsedOnce(array_name, model); 179 } 180 auto op_it = FindOp(*model, op); 181 CHECK(op_it != model->operators.end()); 182 model->operators.erase(op_it); 183 } 184 185 std::vector<std::unique_ptr<Operator>>::const_iterator FindOpWithOutput( 186 const Model& model, const string& array_name) { 187 for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { 188 for (auto& output : it->get()->outputs) { 189 if (output == array_name) { 190 return it; 191 } 192 } 193 } 194 return model.operators.end(); 195 } 196 197 std::vector<std::unique_ptr<Operator>>::iterator FindOpWithOutput( 198 Model& model, const string& array_name) { 199 for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { 200 for (auto& output : it->get()->outputs) { 201 if (output == array_name) { 202 return it; 203 } 204 } 205 } 206 return model.operators.end(); 207 } 208 209 Operator* GetOpWithOutput(const Model& model, const string& array_name) { 210 auto it = FindOpWithOutput(model, array_name); 211 return it == model.operators.end() ? nullptr : it->get(); 212 } 213 214 // GetFirstOpWithInput assumes that this finds the first op. 215 std::vector<std::unique_ptr<Operator>>::const_iterator FindOpWithInput( 216 const Model& model, const string& array_name) { 217 for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { 218 for (auto& input : it->get()->inputs) { 219 if (input == array_name) { 220 return it; 221 } 222 } 223 } 224 return model.operators.end(); 225 } 226 227 std::vector<std::unique_ptr<Operator>>::iterator FindOpWithInput( 228 Model& model, const string& array_name) { 229 for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { 230 for (auto& input : it->get()->inputs) { 231 if (input == array_name) { 232 return it; 233 } 234 } 235 } 236 return model.operators.end(); 237 } 238 239 std::vector<std::unique_ptr<Operator>>::const_iterator FindOp( 240 const Model& model, const Operator* op) { 241 for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { 242 if (it->get() == op) { 243 return it; 244 } 245 } 246 return model.operators.end(); 247 } 248 249 std::vector<std::unique_ptr<Operator>>::iterator FindOp(Model& model, 250 const Operator* op) { 251 for (auto it = model.operators.begin(); it != model.operators.end(); ++it) { 252 if (it->get() == op) { 253 return it; 254 } 255 } 256 return model.operators.end(); 257 } 258 259 Operator* GetOpWithInput(const Model& model, const string& array_name) { 260 auto it = FindOpWithInput(model, array_name); 261 return it == model.operators.end() ? nullptr : it->get(); 262 } 263 264 Operator* GetFirstOpWithInput(const Model& model, const string& array_name) { 265 auto it = FindOpWithInput(model, array_name); 266 return it == model.operators.end() ? nullptr : it->get(); 267 } 268 269 void ReplaceArrayUsage(Model* model, const string& old_array_name, 270 const string& new_array_name) { 271 for (auto& op_it : model->operators) { 272 Operator* op = op_it.get(); 273 for (size_t i = 0; i < op->inputs.size(); ++i) { 274 if (op->inputs[i] == old_array_name) { 275 op->inputs[i] = new_array_name; 276 } 277 } 278 for (size_t i = 0; i < op->outputs.size(); ++i) { 279 if (op->outputs[i] == old_array_name) { 280 op->outputs[i] = new_array_name; 281 } 282 } 283 } 284 } 285 286 string FormatArraysList(const Model& model, const std::vector<string>& list) { 287 if (list.empty()) { 288 return "[]"; 289 } 290 string result = ""; 291 if (list.size() > 1) { 292 result += "[ "; 293 } 294 for (std::size_t i = 0; i < list.size(); i++) { 295 if (i > 0) { 296 result += ", "; 297 } 298 result += list[i]; 299 } 300 if (list.size() > 1) { 301 result += " ]"; 302 } 303 return result; 304 } 305 306 const char* OperatorTypeName(OperatorType type) { 307 switch (type) { 308 #define HANDLE_OPERATORTYPENAME_CASE(c) \ 309 case OperatorType::k##c: \ 310 return #c; 311 HANDLE_OPERATORTYPENAME_CASE(Abs) 312 HANDLE_OPERATORTYPENAME_CASE(Add) 313 HANDLE_OPERATORTYPENAME_CASE(AddN) 314 HANDLE_OPERATORTYPENAME_CASE(AveragePool) 315 HANDLE_OPERATORTYPENAME_CASE(BatchMatMul) 316 HANDLE_OPERATORTYPENAME_CASE(BatchNormalization) 317 HANDLE_OPERATORTYPENAME_CASE(Conv) 318 HANDLE_OPERATORTYPENAME_CASE(Concatenation) 319 HANDLE_OPERATORTYPENAME_CASE(DepthwiseConv) 320 HANDLE_OPERATORTYPENAME_CASE(DepthToSpace) 321 HANDLE_OPERATORTYPENAME_CASE(SpaceToDepth) 322 HANDLE_OPERATORTYPENAME_CASE(FullyConnected) 323 HANDLE_OPERATORTYPENAME_CASE(Dequantize) 324 HANDLE_OPERATORTYPENAME_CASE(L2Normalization) 325 HANDLE_OPERATORTYPENAME_CASE(LocalResponseNormalization) 326 HANDLE_OPERATORTYPENAME_CASE(Log) 327 HANDLE_OPERATORTYPENAME_CASE(Logistic) 328 HANDLE_OPERATORTYPENAME_CASE(LstmCell) 329 HANDLE_OPERATORTYPENAME_CASE(MaxPool) 330 HANDLE_OPERATORTYPENAME_CASE(L2Pool) 331 HANDLE_OPERATORTYPENAME_CASE(FakeQuant) 332 HANDLE_OPERATORTYPENAME_CASE(Mul) 333 HANDLE_OPERATORTYPENAME_CASE(RandomUniform) 334 HANDLE_OPERATORTYPENAME_CASE(Elu) 335 HANDLE_OPERATORTYPENAME_CASE(Relu) 336 HANDLE_OPERATORTYPENAME_CASE(Relu1) 337 HANDLE_OPERATORTYPENAME_CASE(Relu6) 338 HANDLE_OPERATORTYPENAME_CASE(PRelu) 339 HANDLE_OPERATORTYPENAME_CASE(ReorderAxes) 340 HANDLE_OPERATORTYPENAME_CASE(Softmax) 341 HANDLE_OPERATORTYPENAME_CASE(LogSoftmax) 342 HANDLE_OPERATORTYPENAME_CASE(Div) 343 HANDLE_OPERATORTYPENAME_CASE(Tanh) 344 HANDLE_OPERATORTYPENAME_CASE(Sin) 345 HANDLE_OPERATORTYPENAME_CASE(All) 346 HANDLE_OPERATORTYPENAME_CASE(Assert) 347 HANDLE_OPERATORTYPENAME_CASE(ExpandDims) 348 HANDLE_OPERATORTYPENAME_CASE(Fill) 349 HANDLE_OPERATORTYPENAME_CASE(FloorMod) 350 HANDLE_OPERATORTYPENAME_CASE(FloorDiv) 351 HANDLE_OPERATORTYPENAME_CASE(Greater) 352 HANDLE_OPERATORTYPENAME_CASE(GreaterEqual) 353 HANDLE_OPERATORTYPENAME_CASE(Identity) 354 HANDLE_OPERATORTYPENAME_CASE(Less) 355 HANDLE_OPERATORTYPENAME_CASE(LessEqual) 356 HANDLE_OPERATORTYPENAME_CASE(MatMul) 357 HANDLE_OPERATORTYPENAME_CASE(ReduceMax) // Reduction Max 358 HANDLE_OPERATORTYPENAME_CASE(Maximum) // Element-wise Maximum 359 HANDLE_OPERATORTYPENAME_CASE(Merge) 360 HANDLE_OPERATORTYPENAME_CASE(ReduceMin) // Reduction Min 361 HANDLE_OPERATORTYPENAME_CASE(Minimum) // Element-wise Minimum 362 HANDLE_OPERATORTYPENAME_CASE(Neg) 363 HANDLE_OPERATORTYPENAME_CASE(OneHot) 364 HANDLE_OPERATORTYPENAME_CASE(Pack) 365 HANDLE_OPERATORTYPENAME_CASE(Pad) 366 HANDLE_OPERATORTYPENAME_CASE(PadV2) 367 HANDLE_OPERATORTYPENAME_CASE(StridedSlice) 368 HANDLE_OPERATORTYPENAME_CASE(Range) 369 HANDLE_OPERATORTYPENAME_CASE(Rank) 370 HANDLE_OPERATORTYPENAME_CASE(Reshape) 371 HANDLE_OPERATORTYPENAME_CASE(Squeeze) 372 HANDLE_OPERATORTYPENAME_CASE(Rsqrt) 373 HANDLE_OPERATORTYPENAME_CASE(Shape) 374 HANDLE_OPERATORTYPENAME_CASE(Slice) 375 HANDLE_OPERATORTYPENAME_CASE(Split) 376 HANDLE_OPERATORTYPENAME_CASE(SplitV) 377 HANDLE_OPERATORTYPENAME_CASE(Sqrt) 378 HANDLE_OPERATORTYPENAME_CASE(Square) 379 HANDLE_OPERATORTYPENAME_CASE(Switch) 380 HANDLE_OPERATORTYPENAME_CASE(Sub) 381 HANDLE_OPERATORTYPENAME_CASE(Sum) 382 HANDLE_OPERATORTYPENAME_CASE(Tile) 383 HANDLE_OPERATORTYPENAME_CASE(Transpose) 384 HANDLE_OPERATORTYPENAME_CASE(TransposeConv) 385 HANDLE_OPERATORTYPENAME_CASE(Concat) 386 HANDLE_OPERATORTYPENAME_CASE(ConcatV2) 387 HANDLE_OPERATORTYPENAME_CASE(Cast) 388 HANDLE_OPERATORTYPENAME_CASE(Floor) 389 HANDLE_OPERATORTYPENAME_CASE(Ceil) 390 HANDLE_OPERATORTYPENAME_CASE(Gather) 391 HANDLE_OPERATORTYPENAME_CASE(GatherNd) 392 HANDLE_OPERATORTYPENAME_CASE(ResizeBilinear) 393 HANDLE_OPERATORTYPENAME_CASE(SpaceToBatchND) 394 HANDLE_OPERATORTYPENAME_CASE(BatchToSpaceND) 395 HANDLE_OPERATORTYPENAME_CASE(Mean) 396 HANDLE_OPERATORTYPENAME_CASE(ReduceProd) 397 HANDLE_OPERATORTYPENAME_CASE(Svdf) 398 HANDLE_OPERATORTYPENAME_CASE(ArgMax) 399 HANDLE_OPERATORTYPENAME_CASE(ArgMin) 400 HANDLE_OPERATORTYPENAME_CASE(TopK_V2) 401 HANDLE_OPERATORTYPENAME_CASE(Unsupported) 402 HANDLE_OPERATORTYPENAME_CASE(Exp) 403 HANDLE_OPERATORTYPENAME_CASE(DynamicPartition) 404 HANDLE_OPERATORTYPENAME_CASE(DynamicStitch) 405 HANDLE_OPERATORTYPENAME_CASE(Select) 406 HANDLE_OPERATORTYPENAME_CASE(SparseToDense) 407 HANDLE_OPERATORTYPENAME_CASE(Equal) 408 HANDLE_OPERATORTYPENAME_CASE(NotEqual) 409 HANDLE_OPERATORTYPENAME_CASE(Pow) 410 HANDLE_OPERATORTYPENAME_CASE(Any) 411 HANDLE_OPERATORTYPENAME_CASE(LogicalAnd) 412 HANDLE_OPERATORTYPENAME_CASE(LogicalNot) 413 HANDLE_OPERATORTYPENAME_CASE(LogicalOr) 414 HANDLE_OPERATORTYPENAME_CASE(CTCBeamSearchDecoder) 415 HANDLE_OPERATORTYPENAME_CASE(Unpack) 416 HANDLE_OPERATORTYPENAME_CASE(ZerosLike) 417 HANDLE_OPERATORTYPENAME_CASE(UnidirectionalSequenceLstm) 418 HANDLE_OPERATORTYPENAME_CASE(BidirectionalSequenceLstm) 419 HANDLE_OPERATORTYPENAME_CASE(BidirectionalSequenceRnn) 420 HANDLE_OPERATORTYPENAME_CASE(ResizeNearestNeighbor) 421 HANDLE_OPERATORTYPENAME_CASE(LeakyRelu) 422 HANDLE_OPERATORTYPENAME_CASE(SquaredDifference) 423 HANDLE_OPERATORTYPENAME_CASE(MirrorPad) 424 HANDLE_OPERATORTYPENAME_CASE(Unique) 425 HANDLE_OPERATORTYPENAME_CASE(UnidirectionalSequenceRnn) 426 HANDLE_OPERATORTYPENAME_CASE(ReverseV2) 427 HANDLE_OPERATORTYPENAME_CASE(Cos) 428 HANDLE_OPERATORTYPENAME_CASE(Where) 429 HANDLE_OPERATORTYPENAME_CASE(ReverseSequence) 430 default: 431 LOG(FATAL) << "Unhandled op type"; 432 #undef HANDLE_OPERATORTYPENAME_CASE 433 } 434 } 435 436 string HelpfulOperatorTypeName(const Operator& op) { 437 if (op.type == OperatorType::kUnsupported) { 438 return toco::port::StringF( 439 "(Unsupported TensorFlow op: %s)", 440 static_cast<const TensorFlowUnsupportedOperator&>(op).tensorflow_op); 441 } 442 return OperatorTypeName(op.type); 443 } 444 445 bool OperatorSupportsFusedActivation(OperatorType type) { 446 switch (type) { 447 case OperatorType::kAdd: 448 case OperatorType::kAveragePool: 449 case OperatorType::kBatchNormalization: 450 case OperatorType::kConv: 451 case OperatorType::kDepthwiseConv: 452 case OperatorType::kDiv: 453 case OperatorType::kFullyConnected: 454 case OperatorType::kL2Pool: 455 case OperatorType::kMaxPool: 456 case OperatorType::kMul: 457 case OperatorType::kSub: 458 case OperatorType::kSquaredDifference: 459 return true; 460 default: 461 return false; 462 } 463 } 464 465 void LogSummary(int log_level, const Model& model) { 466 VLOG(log_level) << "Operators summary (" << model.operators.size() 467 << " operators):"; 468 std::unordered_multiset<OperatorType> ops_by_type; 469 for (const auto& op : model.operators) { 470 ops_by_type.insert(op->type); 471 } 472 auto it = ops_by_type.begin(); 473 while (it != ops_by_type.end()) { 474 int count = ops_by_type.count(*it); 475 VLOG(log_level) << " " << OperatorTypeName(*it) << ": " << count; 476 std::advance(it, count); 477 } 478 } 479 480 void LogArray(int log_level, const Model& model, const string& name) { 481 VLOG(log_level) << "Array: " << name; 482 if (!model.HasArray(name)) { 483 VLOG(log_level) << " DOES NOT EXIST"; 484 return; 485 } 486 const auto& array = model.GetArray(name); 487 VLOG(log_level) << " Data type: " << ArrayDataTypeName(array.data_type); 488 VLOG(log_level) << " Final type: " 489 << ArrayDataTypeName(array.final_data_type); 490 if (array.buffer) { 491 VLOG(log_level) << " Constant Buffer"; 492 } 493 if (array.alloc) { 494 VLOG(log_level) << " Transient Alloc"; 495 } 496 if (array.has_shape()) { 497 const Shape& array_shape = array.shape(); 498 if (array_shape.dimensions_count() == 0) { 499 VLOG(log_level) << " (Zero dimensions)"; 500 } else { 501 string message = " Dims: "; 502 bool first = true; 503 for (const int dim : array_shape.dims()) { 504 if (!first) { 505 message += ", "; 506 } 507 first = false; 508 toco::port::AppendF(&message, "%d", dim); 509 } 510 VLOG(log_level) << message; 511 } 512 } 513 if (array.minmax) { 514 VLOG(log_level) << " MinMax: " << array.minmax->min << " .. " 515 << array.minmax->max; 516 } 517 if (array.quantization_params) { 518 VLOG(log_level) << " QuantizationParams: zero_point=" 519 << static_cast<int>(array.quantization_params->zero_point) 520 << ", scale=" << array.quantization_params->scale; 521 } 522 } 523 524 void DumpGraphvizVideoFrame(const Model& model) { 525 namespace port = toco::port; 526 527 const auto& dump_options = *GraphVizDumpOptions::singleton(); 528 if (!dump_options.dump_graphviz_video) { 529 return; 530 } 531 CHECK(!dump_options.dump_graphviz.empty()); 532 // TODO(benoitjacob): the static data here means that this function 533 // is stateful, not reentrant, and effectively leaks memory till exit 534 // (since dump_hashes can only grow in size). It also means that it 535 // really only is intended to be called for a single model during the 536 // process' lifetime. So it's not great design at all. The overriding 537 // design aspect here is to make the video-dumping code as unintrusive 538 // and self-contained as possible. Eventually, we'll want to have that 539 // cleaned-up, but that will require some form of general statefulness 540 // in toco (some kind of 'tooling state' data structure) that does 541 // not exist at present, and would be premature to design here just for 542 // this new video-dumping feature. 543 static int dump_id = 0; 544 static std::unordered_set<std::size_t> dump_hashes; 545 string graphviz_dump; 546 DumpGraphviz(model, &graphviz_dump, 547 toco::port::StringF("VIDEO frame:%05d", dump_id)); 548 std::size_t hash = std::hash<string>{}(graphviz_dump); 549 if (!dump_hashes.count(hash)) { 550 LOG(INFO) << "DUMPING GRAPHVIZ VIDEO FRAME: " << dump_id; 551 dump_hashes.insert(hash); 552 const auto result = port::file::SetContents( 553 port::file::JoinPath( 554 dump_options.dump_graphviz, 555 toco::port::StringF("toco_video_%05d.dot", dump_id)), 556 graphviz_dump, port::file::Defaults()); 557 QCHECK(result.ok()) << result.error_message(); 558 dump_id++; 559 } 560 } 561 562 void LogDump(int log_level, const string& message, const Model& model) { 563 namespace port = toco::port; 564 const auto& dump_options = *GraphVizDumpOptions::singleton(); 565 566 DumpGraphvizVideoFrame(model); 567 if (!dump_options.dump_graphviz.empty()) { 568 string graphviz_dump; 569 570 DumpGraphviz(model, &graphviz_dump, message); 571 const auto result = port::file::SetContents( 572 port::file::JoinPath( 573 dump_options.dump_graphviz, 574 absl::StrCat("toco_", absl::StrReplaceAll(message, {{" ", "_"}}), 575 ".dot")), 576 graphviz_dump, port::file::Defaults()); 577 QCHECK(result.ok()) << result.error_message(); 578 } 579 580 if (!VLOG_IS_ON(log_level)) { 581 return; 582 } 583 VLOG(log_level) << "BEGIN DUMP OF TOCO MODEL (" << message << ")"; 584 LogSummary(log_level, model); 585 std::unordered_set<string> already_printed_arrays; 586 for (const auto& op : model.operators) { 587 for (const auto& input : op->inputs) { 588 if (!already_printed_arrays.count(input)) { 589 already_printed_arrays.insert(input); 590 LogArray(log_level, model, input); 591 } 592 } 593 VLOG(log_level) << HelpfulOperatorTypeName(*op) << " :"; 594 VLOG(log_level) << " " << FormatArraysList(model, op->inputs) << " -> " 595 << FormatArraysList(model, op->outputs); 596 if (op->fused_activation_function != FusedActivationFunctionType::kNone) { 597 VLOG(log_level) << " (with fused activation function)"; 598 } 599 for (const auto& output : op->outputs) { 600 if (!already_printed_arrays.count(output)) { 601 already_printed_arrays.insert(output); 602 LogArray(log_level, model, output); 603 } 604 } 605 } 606 VLOG(log_level) << "END DUMP OF TOCO MODEL (" << message << ")"; 607 } 608 609 // Note remaining raw-array extension in ProcessTensorFlowReshapeOperator(). 610 void ExtendShape(Shape* shape, int new_shape_size) { 611 CHECK_GE(new_shape_size, shape->dimensions_count()); 612 const int size_increase = new_shape_size - shape->dimensions_count(); 613 auto* shape_dims = shape->mutable_dims(); 614 shape_dims->insert(shape_dims->begin(), size_increase, 1); 615 } 616 617 // TODO(b/62904716) Remove along with remaining uses. 618 void UnextendShape(Shape* shape, int new_shape_size) { 619 CHECK_LE(new_shape_size, shape->dimensions_count()); 620 const int size_reduction = shape->dimensions_count() - new_shape_size; 621 for (int i = 0; i < size_reduction; i++) { 622 CHECK_EQ(shape->dims(i), 1); 623 } 624 std::vector<int>& shape_dims = *shape->mutable_dims(); 625 shape_dims.erase(shape_dims.begin(), shape_dims.begin() + size_reduction); 626 } 627 628 // In general, zero-sized dimensions are disallowed, but there are exceptions, 629 // e.g., if the tensor data itself represents a scalar (rank 0) shape, its 630 // shape will have dimensions [0]. CheckNonEmptyShapeDimensions is more 631 // strict, and is appropriate for ops and comparisons where an empty shape 632 // doesn't make sense. 633 template <typename Dims> 634 void CheckValidShapeDimensions(const Dims& dims) { 635 if (dims.size() == 1 && dims[0] == 0) { 636 return; 637 } 638 for (const auto& dim : dims) { 639 CHECK_GE(dim, 1); 640 } 641 } 642 643 void CheckValidShape(const Shape& shape) { 644 CheckValidShapeDimensions(shape.dims()); 645 } 646 647 bool IsNonEmpty(const Shape& shape) { 648 for (int i = 0; i < shape.dimensions_count(); ++i) { 649 if (shape.dims(i) < 1) return false; 650 } 651 return true; 652 } 653 654 void CheckNonEmptyShapeDimensions(const Shape& shape) { 655 for (int i = 0; i < shape.dimensions_count(); ++i) { 656 CHECK_GE(shape.dims()[i], 1) << "shape has dimension 0 at index << " << i 657 << ". shape = " << ShapeToString(shape); 658 } 659 } 660 661 bool ShapesAgreeUpToBroadcasting(const Shape& shape0, const Shape& shape1) { 662 CheckNonEmptyShapeDimensions(shape0); 663 CheckNonEmptyShapeDimensions(shape1); 664 665 const Shape* longer = &shape0; 666 const Shape* shorter = &shape1; 667 if (shape1.dimensions_count() > shape0.dimensions_count()) { 668 longer = &shape1; 669 shorter = &shape0; 670 } 671 672 // Walk dimensions back to front until we run out of dimensions in the shorter 673 // shape. 674 int longer_index = longer->dimensions_count() - 1; 675 int shorter_index = shorter->dimensions_count() - 1; 676 while (shorter_index >= 0) { 677 const int d_long = longer->dims(longer_index); 678 const int d_short = shorter->dims(shorter_index); 679 // Broadcasting fails if the dimensions are different *and* neither is 1. 680 if ((d_long != d_short) && (d_long != 1) && (d_short != 1)) { 681 return false; 682 } 683 longer_index--; 684 shorter_index--; 685 } 686 return true; 687 } 688 689 bool ShapesAgreeUpToExtending(const Shape& shape0, const Shape& shape1) { 690 CheckNonEmptyShapeDimensions(shape0); 691 CheckNonEmptyShapeDimensions(shape1); 692 693 const Shape* longer = &shape0; 694 const Shape* shorter = &shape1; 695 if (shape1.dimensions_count() > shape0.dimensions_count()) { 696 longer = &shape1; 697 shorter = &shape0; 698 } 699 700 // Walk dimensions back to front until we run out of dimensions in the shorter 701 // shape. 702 int longer_index = longer->dimensions_count() - 1; 703 int shorter_index = shorter->dimensions_count() - 1; 704 while (shorter_index >= 0) { 705 const int d_long = longer->dims(longer_index); 706 const int d_short = shorter->dims(shorter_index); 707 // Extending fails if the dimensions are different. 708 if (d_long != d_short) { 709 return false; 710 } 711 longer_index--; 712 shorter_index--; 713 } 714 715 // The remaining dimensions in the longer shape must be 1. 716 while (longer_index >= 0) { 717 const int d_long = longer->dims(longer_index); 718 if (d_long != 1) { 719 return false; 720 } 721 longer_index--; 722 } 723 724 return true; 725 } 726 727 int RequiredBufferSizeForShape(const Shape& shape) { 728 CheckValidShape(shape); 729 int max_offset = 1; 730 for (const auto& dim : shape.dims()) { 731 max_offset *= dim; 732 } 733 return max_offset; 734 } 735 736 bool IsConstantParameterArray(const Model& model, const string& name) { 737 if (!model.HasArray(name)) { 738 return false; 739 } 740 741 return !!model.GetArray(name).buffer; 742 } 743 744 namespace { 745 template <ArrayDataType A> 746 bool CompareArrayBuffers(const Array& lhs_array, const Array& rhs_array) { 747 CHECK(lhs_array.data_type == rhs_array.data_type) << "Data types must match"; 748 CHECK(lhs_array.buffer) << "LHS must be constant"; 749 CHECK(rhs_array.buffer) << "RHS must be constant"; 750 const auto& lhs_data = lhs_array.GetBuffer<A>().data; 751 const auto& rhs_data = rhs_array.GetBuffer<A>().data; 752 CHECK_EQ(lhs_data.size(), rhs_data.size()) 753 << "Buffer sizes must match in element count"; 754 for (int i = 0; i < lhs_data.size(); ++i) { 755 if (lhs_data[i] != rhs_data[i]) { 756 return false; 757 } 758 } 759 return true; 760 } 761 762 bool HaveSameMinMax(const Array& lhs_array, const Array& rhs_array) { 763 if (lhs_array.minmax || rhs_array.minmax) { 764 if (!lhs_array.minmax || !rhs_array.minmax) { 765 return false; 766 } 767 if (!(*lhs_array.minmax == *rhs_array.minmax)) { 768 return false; 769 } 770 } 771 return true; 772 } 773 774 bool HaveSameQuantizationParams(const Array& lhs_array, 775 const Array& rhs_array) { 776 if (lhs_array.quantization_params || rhs_array.quantization_params) { 777 if (!lhs_array.quantization_params || !rhs_array.quantization_params) { 778 return false; 779 } 780 if (!(*lhs_array.quantization_params == *rhs_array.quantization_params)) { 781 return false; 782 } 783 } 784 return true; 785 } 786 787 } // namespace 788 789 bool CompareConstantArrays(const Array& lhs_array, const Array& rhs_array) { 790 bool attrs_equal = lhs_array.shape() == rhs_array.shape() && 791 lhs_array.data_type == rhs_array.data_type && 792 lhs_array.final_data_type == rhs_array.final_data_type && 793 HaveSameMinMax(lhs_array, rhs_array) && 794 HaveSameQuantizationParams(lhs_array, rhs_array) && 795 lhs_array.narrow_range == rhs_array.narrow_range; 796 if (!attrs_equal) { 797 return false; 798 } 799 switch (lhs_array.data_type) { 800 case ArrayDataType::kBool: 801 return CompareArrayBuffers<ArrayDataType::kBool>(lhs_array, rhs_array); 802 case ArrayDataType::kFloat: 803 return CompareArrayBuffers<ArrayDataType::kFloat>(lhs_array, rhs_array); 804 case ArrayDataType::kInt8: 805 return CompareArrayBuffers<ArrayDataType::kInt8>(lhs_array, rhs_array); 806 case ArrayDataType::kUint8: 807 return CompareArrayBuffers<ArrayDataType::kUint8>(lhs_array, rhs_array); 808 case ArrayDataType::kInt16: 809 return CompareArrayBuffers<ArrayDataType::kInt16>(lhs_array, rhs_array); 810 case ArrayDataType::kUint16: 811 return CompareArrayBuffers<ArrayDataType::kUint16>(lhs_array, rhs_array); 812 case ArrayDataType::kInt32: 813 return CompareArrayBuffers<ArrayDataType::kInt32>(lhs_array, rhs_array); 814 case ArrayDataType::kUint32: 815 return CompareArrayBuffers<ArrayDataType::kUint32>(lhs_array, rhs_array); 816 case ArrayDataType::kInt64: 817 return CompareArrayBuffers<ArrayDataType::kInt64>(lhs_array, rhs_array); 818 case ArrayDataType::kUint64: 819 return CompareArrayBuffers<ArrayDataType::kUint64>(lhs_array, rhs_array); 820 case ArrayDataType::kString: 821 return CompareArrayBuffers<ArrayDataType::kString>(lhs_array, rhs_array); 822 case ArrayDataType::kComplex64: 823 return CompareArrayBuffers<ArrayDataType::kComplex64>(lhs_array, 824 rhs_array); 825 default: 826 LOG(FATAL) << "Unsupported data type: " 827 << ArrayDataTypeName(lhs_array.data_type); 828 return false; 829 } 830 } 831 832 namespace { 833 // Take an array name, which may be something like "name:3_5" and make it 834 // acceptable as a TF node name, say "name_3_5"; 835 string SanitizeNameForTFNode(const string& array_name) { 836 auto node_name = array_name; 837 std::replace(node_name.begin(), node_name.end(), ':', '_'); 838 return node_name; 839 } 840 841 void CheckInputArraysAreNotOutputArrays(const ModelFlags& model_flags) { 842 for (const auto& input_array : model_flags.input_arrays()) { 843 for (const string& output_array : model_flags.output_arrays()) { 844 QCHECK_NE(input_array.name(), output_array) 845 << "The array " << output_array 846 << " is listed in both --input_arrays and --output_arrays."; 847 } 848 } 849 } 850 851 bool IsAsciiPrintable(const string& name) { 852 for (char c : name) { 853 if (!absl::ascii_isprint(c)) { 854 return false; 855 } 856 } 857 return true; 858 } 859 860 string DumpAscii(const string& name) { 861 string result; 862 port::AppendF(&result, "ASCII | Hex\n"); 863 port::AppendF(&result, "------+----\n"); 864 for (char c : name) { 865 if (absl::ascii_isprint(c)) { 866 port::AppendF(&result, "%c | %x\n", c, c); 867 } else { 868 port::AppendF(&result, " | %x Not ASCII printable!\n", c); 869 } 870 } 871 return result; 872 } 873 874 void CheckNonAsciiIOArrays(const ModelFlags& model_flags) { 875 if (model_flags.allow_nonascii_arrays()) { 876 return; 877 } 878 for (const auto& input_array : model_flags.input_arrays()) { 879 QCHECK(IsAsciiPrintable(input_array.name())) 880 << "Non-ASCII-printable character found in --input_arrays: " 881 << input_array.name() 882 << ". Pass --allow_nonascii_arrays to allow that. " 883 << "Here is a dump of the string:\n\n" 884 << DumpAscii(input_array.name()); 885 } 886 for (const string& output_array : model_flags.output_arrays()) { 887 QCHECK(IsAsciiPrintable(output_array)) 888 << "Non-ASCII-printable character found in --output_arrays: " 889 << output_array << ". Pass --allow_nonascii_arrays to allow that. " 890 << "Here is a dump of the string:\n\n" 891 << DumpAscii(output_array); 892 } 893 } 894 895 void CheckNonExistentIOArrays(const Model& model) { 896 // "non-existent" is interpreted in the stronger sense of 897 // "not actually produced/consumed by an op". 898 // Rationale: we have to artificially fix up TensorFlow graphs by creating 899 // any array that it refers to, so just checking that arrays exist isn't 900 // sufficient. The real invariant here is whether arrays are produced/consumed 901 // by something. 902 if (model.flags.allow_nonexistent_arrays()) { 903 return; 904 } 905 static constexpr char general_comment[] = 906 "Is it a typo? To silence this message, pass this flag: " 907 "allow_nonexistent_arrays"; 908 for (const string& output_array : model.flags.output_arrays()) { 909 if (IsConstantParameterArray(model, output_array)) { 910 continue; // It is OK to request that a constant be an output. 911 } 912 QCHECK(GetOpWithOutput(model, output_array)) 913 << "Specified output array \"" << output_array 914 << "\" is not produced by any op in this graph. " << general_comment; 915 } 916 for (const auto& rnn_state : model.flags.rnn_states()) { 917 if (!rnn_state.discardable()) { 918 // Check that all RNN states are consumed 919 QCHECK(GetOpWithInput(model, rnn_state.state_array())) 920 << "Specified RNN state \"" << rnn_state.state_array() 921 << "\" is not consumed by any op in this graph. " << general_comment; 922 // Check that all RNN back-edge source arrays are produced 923 QCHECK(GetOpWithOutput(model, rnn_state.back_edge_source_array())) 924 << "Specified RNN back-edge source array \"" 925 << rnn_state.back_edge_source_array() 926 << "\" is not produced by any op in this graph. " << general_comment; 927 } 928 } 929 } 930 931 } // namespace 932 933 void CheckNoMissingArray(const Model& model) { 934 for (const auto& op : model.operators) { 935 for (const auto& input : op->inputs) { 936 CHECK(model.HasArray(input) || model.optional_arrays.count(input)) 937 << "Input: " << input << " missing for op: " << op->outputs[0] << "."; 938 } 939 for (const auto& output : op->outputs) { 940 CHECK(model.HasArray(output)) << "Output: " << output << " missing."; 941 } 942 } 943 CheckNonExistentIOArrays(model); 944 } 945 946 void FixNoMissingArray(Model* model) { 947 for (const auto& op : model->operators) { 948 for (const auto& input : op->inputs) { 949 if (!model->HasArray(input) && !model->IsOptionalArray(input)) { 950 model->GetOrCreateArray(input); 951 } 952 } 953 for (const auto& output : op->outputs) { 954 if (!model->HasArray(output) && !model->IsOptionalArray(output)) { 955 model->GetOrCreateArray(output); 956 } 957 } 958 } 959 if (model->flags.allow_nonexistent_arrays()) { 960 for (const string& output_array : model->flags.output_arrays()) { 961 model->GetOrCreateArray(output_array); 962 } 963 for (const auto& rnn_state : model->flags.rnn_states()) { 964 model->GetOrCreateArray(rnn_state.state_array()); 965 model->GetOrCreateArray(rnn_state.back_edge_source_array()); 966 } 967 } 968 } 969 970 void CheckNoOrphanedArray(const Model& model) { 971 std::unordered_set<string> arrays_without_known_use; 972 for (const auto& array : model.GetArrayMap()) { 973 if (IsDiscardableArray(model, array.first)) { 974 arrays_without_known_use.insert(array.first); 975 } 976 } 977 for (const auto& op : model.operators) { 978 for (const auto& input : op->inputs) { 979 arrays_without_known_use.erase(input); 980 } 981 for (const auto& output : op->outputs) { 982 arrays_without_known_use.erase(output); 983 } 984 } 985 for (const auto& rnn_state : model.flags.rnn_states()) { 986 arrays_without_known_use.erase(rnn_state.state_array()); 987 arrays_without_known_use.erase(rnn_state.back_edge_source_array()); 988 } 989 if (!arrays_without_known_use.empty()) { 990 for (const auto& array : arrays_without_known_use) { 991 LOG(INFO) << "Error: Orphaned array: " << array; 992 } 993 } 994 CHECK(arrays_without_known_use.empty()); 995 } 996 997 void FixNoOrphanedArray(Model* model) { 998 std::unordered_set<string> arrays_without_known_use; 999 for (const auto& array : model->GetArrayMap()) { 1000 arrays_without_known_use.insert(array.first); 1001 } 1002 for (const auto& op : model->operators) { 1003 for (const auto& input : op->inputs) { 1004 arrays_without_known_use.erase(input); 1005 } 1006 for (const auto& output : op->outputs) { 1007 arrays_without_known_use.erase(output); 1008 } 1009 } 1010 for (const auto& rnn_state : model->flags.rnn_states()) { 1011 arrays_without_known_use.erase(rnn_state.state_array()); 1012 arrays_without_known_use.erase(rnn_state.back_edge_source_array()); 1013 } 1014 for (const auto& array : arrays_without_known_use) { 1015 if (IsDiscardableArray(*model, array)) { 1016 model->EraseArray(array); 1017 } 1018 } 1019 } 1020 1021 // Apply checks to arrays individually (for-each fashion). 1022 // 1023 // Check consistency of array fields, check name. 1024 void CheckEachArray(const Model& model) { 1025 for (const auto& array_entry : model.GetArrayMap()) { 1026 const auto& array = array_entry.second; 1027 // It's OK to have a buffer or an alloc, but not both. 1028 // (Since allocs are for transient arrays without a buffer). 1029 CHECK(!array->buffer || !array->alloc); 1030 if (array->buffer) { 1031 // If there is a buffer, its type should be consistent with data_type. 1032 CHECK(array->buffer->type == array->data_type); 1033 // The presence of a fixed buffer should imply the presence of a fixed 1034 // shape. 1035 CHECK(array->has_shape()); 1036 // Constant buffer should has a valid shape. 1037 CheckValidShape(array->shape()); 1038 // The shape flat-size should agree with the buffer length. 1039 CHECK_EQ(array->buffer->Length(), 1040 RequiredBufferSizeForShape(array->shape())); 1041 } 1042 1043 // Check name. Either "name_with_suffix_8", "name_with_port:3", but not 1044 // "name_with_both:3_8". 1045 const string& name = array_entry.first; 1046 auto colon_pos = name.find_first_of(":"); 1047 if (colon_pos != string::npos) { 1048 CHECK_EQ(name.substr(colon_pos + 1).find_first_not_of("0123456789"), 1049 string::npos) 1050 << "Array '" << name << "' has non-digit characters after colon."; 1051 } 1052 CHECK_GT(colon_pos, 0) << "Array '" << name 1053 << "' must not start with a colon."; 1054 } 1055 } 1056 1057 void CheckOperatorOrdering(const Model& model) { 1058 std::unordered_set<string> arrays_behind_us; 1059 for (const auto& array_entry : model.GetArrayMap()) { 1060 if (!GetOpWithOutput(model, array_entry.first)) { 1061 arrays_behind_us.insert(array_entry.first); 1062 } 1063 } 1064 arrays_behind_us.insert(model.optional_arrays.begin(), 1065 model.optional_arrays.end()); 1066 for (const auto& op : model.operators) { 1067 for (const auto& input : op->inputs) { 1068 if (!IsConstantParameterArray(model, input)) { 1069 CHECK(arrays_behind_us.count(input)); 1070 } 1071 } 1072 for (const auto& output : op->outputs) { 1073 CHECK(!arrays_behind_us.count(output)); 1074 arrays_behind_us.insert(output); 1075 } 1076 } 1077 for (const string& output_array : model.flags.output_arrays()) { 1078 CHECK(arrays_behind_us.count(output_array)); 1079 } 1080 } 1081 1082 void FixOperatorOrdering(Model* model) { 1083 std::unordered_set<string> arrays_behind_us; 1084 for (const auto& array_entry : model->GetArrayMap()) { 1085 if (!GetOpWithOutput(*model, array_entry.first)) { 1086 arrays_behind_us.insert(array_entry.first); 1087 } 1088 } 1089 arrays_behind_us.insert(model->optional_arrays.begin(), 1090 model->optional_arrays.end()); 1091 std::vector<std::unique_ptr<Operator>> old_operators; 1092 std::swap(old_operators, model->operators); 1093 std::set<std::size_t> remaining; 1094 for (std::size_t i = 0; i < old_operators.size(); i++) { 1095 remaining.insert(i); 1096 } 1097 std::unordered_map<string, string> reason_why_leftover; 1098 while (true) { 1099 bool inserted_something = false; 1100 for (const auto& i : remaining) { 1101 bool can_insert = true; 1102 auto& op = old_operators[i]; 1103 CHECK(op); 1104 for (const auto& input : op->inputs) { 1105 if (!IsConstantParameterArray(*model, input) && 1106 !arrays_behind_us.count(input)) { 1107 for (const string& output : op->outputs) { 1108 reason_why_leftover[output] = input; 1109 } 1110 can_insert = false; 1111 break; 1112 } 1113 } 1114 if (can_insert) { 1115 model->operators.emplace_back(nullptr); 1116 for (const auto& output : op->outputs) { 1117 arrays_behind_us.insert(output); 1118 } 1119 std::swap(op, model->operators.back()); 1120 remaining.erase(i); 1121 inserted_something = true; 1122 break; 1123 } 1124 } 1125 if (!inserted_something) { 1126 break; 1127 } 1128 } 1129 if (!remaining.empty()) { 1130 LOG(ERROR) 1131 << "No viable ordering of operators was found. " 1132 << "Here is a 'backtrace' of at least one part of the graph that is " 1133 << "problematic. It starts with the first operator that has as " 1134 << "problematic input array, and then walks back the graph to " 1135 << "the operator that produced that input array, etc., until we find " 1136 << "the root cause:"; 1137 LOG(ERROR) << "BEGIN TRACE OF OPERATOR WITH BAD INPUT"; 1138 LOG(ERROR) << "Here is the first-encountered operator with a bad input: "; 1139 const Operator* bad_op = old_operators[*remaining.begin()].get(); 1140 std::unordered_set<string> bad_inputs_already_traced; 1141 // The following while(true) loop should always end with a LOG(FATAL). 1142 while (true) { 1143 LOG(ERROR) << HelpfulOperatorTypeName(*bad_op) << " : " 1144 << FormatArraysList(*model, bad_op->inputs) << " -> " 1145 << FormatArraysList(*model, bad_op->outputs); 1146 bool found_bad_output = false; 1147 string bad_output; 1148 for (const string& output : bad_op->outputs) { 1149 if (reason_why_leftover.count(output)) { 1150 found_bad_output = true; 1151 bad_output = output; 1152 break; 1153 } 1154 } 1155 CHECK(found_bad_output); 1156 const string& bad_input = reason_why_leftover[bad_output]; 1157 LOG(ERROR) << "The bad input here is: " << bad_input; 1158 if (bad_inputs_already_traced.count(bad_input)) { 1159 LOG(FATAL) 1160 << "Cycle found! We already encountered that " 1161 << "input array, " << bad_input << ", earlier in the " 1162 << "above trace! We expect graphs to be acyclic, even " 1163 << "RNNs. Let us know if some graph actually needs to have " 1164 << "cycles, but first, please check if it really is " 1165 << "an *inference* graph. *Training* graphs are out-of-scope " 1166 << "for toco."; 1167 } 1168 bad_inputs_already_traced.insert(bad_input); 1169 bad_op = nullptr; 1170 for (const auto& i : remaining) { 1171 const Operator* op = old_operators[i].get(); 1172 for (const string& output : op->outputs) { 1173 if (bad_input == output) { 1174 bad_op = op; 1175 break; 1176 } 1177 } 1178 if (bad_op) { 1179 break; 1180 } 1181 } 1182 if (!bad_op) { 1183 LOG(ERROR) << "And that's the root cause: " 1184 << "that array, " << bad_input << ", isn't produced by any " 1185 << "operator, or provided in any other way."; 1186 LOG(ERROR) << "END TRACE OF OPERATOR WITH BAD INPUT"; 1187 LOG(FATAL) << "(The above was a multi-line fatal error)"; 1188 } 1189 LOG(ERROR) << "And that array is the output of the following operator:"; 1190 } 1191 } 1192 CHECK(remaining.empty()) 1193 << "Should never get here! In case of bad graph, " 1194 << "the above code should have generated a FATAL error already!"; 1195 } 1196 1197 void CheckInvariants(const Model& model) { 1198 CheckInputArraysAreNotOutputArrays(model.flags); 1199 CheckNonAsciiIOArrays(model.flags); 1200 CheckNoMissingArray(model); 1201 CheckNoOrphanedArray(model); 1202 CheckEachArray(model); 1203 CheckOperatorOrdering(model); 1204 } 1205 1206 void CheckCountInRange(const ::toco::ModelFlags::ModelCheck& model_check, 1207 const int count, const string& count_description) { 1208 if (model_check.count_min() >= 0) { 1209 CHECK_GE(count, model_check.count_min()) 1210 << "Mismatch in " << count_description << ": count was " << count 1211 << ", but the specified " 1212 << (model_check.count_max() > model_check.count_min() ? "minimum" 1213 : "value") 1214 << " was " << model_check.count_min() << "."; 1215 } 1216 if (model_check.count_max() > model_check.count_min()) { 1217 CHECK_LE(count, model_check.count_max()) 1218 << "Mismatch in " << count_description << ": count was " << count 1219 << ", but the specified maximum was " << model_check.count_max() << "."; 1220 } 1221 } 1222 1223 void CheckModelCounts(const Model& model) { 1224 std::unordered_multiset<OperatorType> ops_by_type; 1225 std::unordered_map<string, OperatorType> op_type_by_name; 1226 if (model.flags.model_checks_size() == 0) { 1227 return; 1228 } 1229 1230 for (const auto& op : model.operators) { 1231 ops_by_type.insert(op->type); 1232 op_type_by_name[OperatorTypeName(op->type)] = op->type; 1233 } 1234 for (const auto& model_check : model.flags.model_checks()) { 1235 string count_type = model_check.count_type(); 1236 if (count_type == "None") { 1237 continue; 1238 } else if (count_type == "Arrays") { 1239 CheckCountInRange(model_check, model.GetArrayMap().size(), 1240 "count of arrays"); 1241 } else if (count_type == "Total") { 1242 CheckCountInRange(model_check, model.operators.size(), 1243 "count of all operator instances"); 1244 } else { 1245 // The check type is not itself checked against the set of valid 1246 // operators, mainly because the enum set cannot be iterated in C++. 1247 const int found_count = 1248 op_type_by_name.count(count_type) > 0 1249 ? ops_by_type.count(op_type_by_name[count_type]) 1250 : 0; 1251 CheckCountInRange(model_check, found_count, 1252 "count of instances of " + count_type + " operator"); 1253 } 1254 } 1255 } 1256 1257 void FixEdgeArrays(Model* model) { 1258 for (const string& output_array_name : model->flags.output_arrays()) { 1259 if (!GetOpWithOutput(*model, output_array_name)) { 1260 // Output has no operator producing it. Change that by inserting a copy. 1261 LOG(WARNING) << "Fixing constant output array " << output_array_name 1262 << " by inserting a copy. This is not optimal."; 1263 string intermediate_array_name = 1264 AvailableArrayName(*model, output_array_name + "_copy"); 1265 CloneArray(model, output_array_name, intermediate_array_name); 1266 InsertCopyOperator(model, intermediate_array_name, output_array_name); 1267 } 1268 } 1269 } 1270 1271 void DedupeConstantArrays(Model* model, size_t min_size) { 1272 // Walk all 0..N and compare with the remaining n+1..N. 1273 // This lets us avoid N^2 comparisons and erase duplicate arrays while 1274 // iterating. 1275 const auto& array_map = model->GetArrayMap(); 1276 for (auto lhs_array_it = array_map.begin(); lhs_array_it != array_map.end(); 1277 ++lhs_array_it) { 1278 const auto& lhs_array_name = lhs_array_it->first; 1279 const auto& lhs_array = *lhs_array_it->second; 1280 if (!IsConstantParameterArray(*model, lhs_array_name)) { 1281 // Not a constant array; skip. 1282 continue; 1283 } 1284 ArrayDataType final_data_type = 1285 lhs_array.final_data_type != ArrayDataType::kNone 1286 ? lhs_array.final_data_type 1287 : lhs_array.data_type; 1288 // Ignore small arrays, don't check string arrays because it is not possible 1289 // to estimate its size. 1290 if (final_data_type != ArrayDataType::kString) { 1291 size_t array_byte_size = 1292 lhs_array.buffer->Length() * ElementSize(final_data_type); 1293 if (array_byte_size < min_size) { 1294 // Too small; skip. 1295 continue; 1296 } 1297 } 1298 1299 auto next_lhs_array_it = lhs_array_it; 1300 ++next_lhs_array_it; 1301 for (auto rhs_array_it = next_lhs_array_it; 1302 rhs_array_it != array_map.end();) { 1303 const auto& rhs_array_name = rhs_array_it->first; 1304 const auto& rhs_array = *rhs_array_it->second; 1305 ++rhs_array_it; 1306 if (!IsConstantParameterArray(*model, rhs_array_name)) { 1307 // Not a constant array; skip. 1308 continue; 1309 } 1310 if (!IsDiscardableArray(*model, rhs_array_name)) { 1311 // Can't remove the array as it's not discardable (such as an IO edge). 1312 continue; 1313 } 1314 if (!CompareConstantArrays(lhs_array, rhs_array)) { 1315 // Arrays aren't equal; skip. 1316 continue; 1317 } 1318 1319 // Arrays can be deduped! 1320 VLOG(1) << "Deduplicating arrays; using " << lhs_array_name 1321 << " in place of " << rhs_array_name; 1322 ReplaceArrayUsage(model, rhs_array_name, lhs_array_name); 1323 // Note: rhs_array_it above is already incremented so this is safe. 1324 model->EraseArray(rhs_array_name); 1325 } 1326 } 1327 } 1328 1329 namespace { 1330 void CopyArrayAttribs(const Array& source_array, Array* target_array) { 1331 target_array->data_type = source_array.data_type; 1332 target_array->final_data_type = source_array.final_data_type; 1333 target_array->copy_shape(source_array.shape()); 1334 1335 if (source_array.minmax) { 1336 target_array->GetOrCreateMinMax() = source_array.GetMinMax(); 1337 } else { 1338 target_array->minmax.reset(); 1339 } 1340 1341 if (source_array.quantization_params) { 1342 target_array->GetOrCreateQuantizationParams() = 1343 source_array.GetQuantizationParams(); 1344 } else { 1345 target_array->quantization_params.reset(); 1346 } 1347 } 1348 } // namespace 1349 1350 void InsertCopyOperator(Model* model, const string& source_array_name, 1351 const string& target_array_name) { 1352 // Reshape to the same size. This should be a no-op. 1353 const Array& source_array = model->GetArray(source_array_name); 1354 std::vector<int> shape = source_array.shape().dims(); 1355 1356 // Drop constant data from the target array as the copy will be done at 1357 // runtime. 1358 Array& target_array = model->GetOrCreateArray(target_array_name); 1359 target_array.buffer.reset(); 1360 CopyArrayAttribs(source_array, &target_array); 1361 1362 // Insert copy operator. 1363 auto* copy_op = new TensorFlowReshapeOperator; 1364 copy_op->inputs = { 1365 source_array_name, 1366 CreateInt32Array( 1367 model, AvailableArrayName(*model, target_array_name + "_copy_shape"), 1368 shape)}; 1369 copy_op->outputs = {target_array_name}; 1370 if (target_array.has_shape()) { 1371 copy_op->shape = target_array.shape().dims(); 1372 } 1373 model->operators.emplace_back(copy_op); 1374 } 1375 1376 void CloneArray(Model* model, const string& source_array_name, 1377 const string& target_array_name) { 1378 CHECK(!model->HasArray(target_array_name)); 1379 const Array& source_array = model->GetArray(source_array_name); 1380 Array& target_array = model->GetOrCreateArray(target_array_name); 1381 CopyArrayAttribs(source_array, &target_array); 1382 1383 if (source_array.minmax) { 1384 const auto& smm = source_array.GetMinMax(); 1385 auto& tmm = target_array.GetOrCreateMinMax(); 1386 tmm.min = smm.min; 1387 tmm.max = smm.max; 1388 } 1389 1390 if (source_array.quantization_params) { 1391 const auto& sqp = source_array.GetQuantizationParams(); 1392 auto& tqp = target_array.GetOrCreateQuantizationParams(); 1393 tqp.zero_point = sqp.zero_point; 1394 tqp.scale = sqp.scale; 1395 } 1396 1397 target_array.data_type = source_array.data_type; 1398 target_array.final_data_type = source_array.final_data_type; 1399 target_array.copy_shape(source_array.shape()); 1400 1401 switch (source_array.data_type) { 1402 case ArrayDataType::kBool: 1403 CopyArrayBuffer<ArrayDataType::kBool>(source_array, &target_array); 1404 break; 1405 case ArrayDataType::kFloat: 1406 CopyArrayBuffer<ArrayDataType::kFloat>(source_array, &target_array); 1407 break; 1408 case ArrayDataType::kInt8: 1409 CopyArrayBuffer<ArrayDataType::kInt8>(source_array, &target_array); 1410 break; 1411 case ArrayDataType::kUint8: 1412 CopyArrayBuffer<ArrayDataType::kUint8>(source_array, &target_array); 1413 break; 1414 case ArrayDataType::kInt16: 1415 CopyArrayBuffer<ArrayDataType::kInt16>(source_array, &target_array); 1416 break; 1417 case ArrayDataType::kUint16: 1418 CopyArrayBuffer<ArrayDataType::kUint16>(source_array, &target_array); 1419 break; 1420 case ArrayDataType::kInt32: 1421 CopyArrayBuffer<ArrayDataType::kInt32>(source_array, &target_array); 1422 break; 1423 case ArrayDataType::kUint32: 1424 CopyArrayBuffer<ArrayDataType::kUint32>(source_array, &target_array); 1425 break; 1426 case ArrayDataType::kInt64: 1427 CopyArrayBuffer<ArrayDataType::kInt64>(source_array, &target_array); 1428 break; 1429 case ArrayDataType::kUint64: 1430 CopyArrayBuffer<ArrayDataType::kUint64>(source_array, &target_array); 1431 break; 1432 case ArrayDataType::kString: 1433 CopyArrayBuffer<ArrayDataType::kString>(source_array, &target_array); 1434 break; 1435 case ArrayDataType::kComplex64: 1436 CopyArrayBuffer<ArrayDataType::kComplex64>(source_array, &target_array); 1437 break; 1438 default: 1439 LOG(FATAL) << "Unsupported data type: " 1440 << ArrayDataTypeName(source_array.data_type); 1441 return; 1442 } 1443 } 1444 1445 void MakeArrayDims(int num_dims, int batch, int height, int width, int depth, 1446 std::vector<int>* out_dims) { 1447 CHECK(out_dims->empty()); 1448 if (num_dims == 0) { 1449 return; 1450 } else if (num_dims == 1) { 1451 CHECK_EQ(batch, 1); 1452 *out_dims = {depth}; 1453 } else if (num_dims == 2) { 1454 *out_dims = {batch, depth}; 1455 } else if (num_dims == 3) { 1456 CHECK_EQ(batch, 1); 1457 *out_dims = {height, width, depth}; 1458 } else if (num_dims == 4) { 1459 *out_dims = {batch, height, width, depth}; 1460 } else { 1461 LOG(FATAL) << "Should not get here: " << num_dims; 1462 } 1463 } 1464 1465 void CreateOrCheckRnnStateArray(const string& name, int size, 1466 int state_num_dims, Model* model) { 1467 int batch = 1; 1468 int num_dims = -1; 1469 if (state_num_dims > 0) { 1470 num_dims = state_num_dims; 1471 } else { 1472 // state_num_dims is not given. We will infer it from an input tensor. 1473 for (const auto& input_array : model->flags.input_arrays()) { 1474 // Pick 'num_dims' and 'batch' from the first input_arrays, unless we find 1475 // a better match by name. 1476 if (input_array.name() == name || num_dims == -1) { 1477 num_dims = input_array.shape().dims_size(); 1478 if (num_dims > 0) { 1479 batch = input_array.shape().dims(0); 1480 } 1481 } 1482 } 1483 } 1484 Array& array = model->GetOrCreateArray(name); 1485 if (array.has_shape()) { 1486 num_dims = array.shape().dimensions_count(); 1487 } 1488 if (!array.has_shape() && num_dims >= 0) { 1489 Shape* shape = array.mutable_shape(); 1490 std::vector<int> dims; 1491 MakeArrayDims(num_dims, batch, 1, 1, size, &dims); 1492 *shape->mutable_dims() = dims; 1493 } 1494 } 1495 1496 void ResolveModelFlags(const ModelFlags& model_flags, Model* model) { 1497 // Merge info about input_arrays from model_flags into model->flags 1498 for (const auto& specified_input_array : model_flags.input_arrays()) { 1499 toco::InputArray* dst_input_array = nullptr; 1500 for (int i = 0; i < model->flags.input_arrays_size(); i++) { 1501 toco::InputArray* candidate_dst_input_array = 1502 model->flags.mutable_input_arrays(i); 1503 if (candidate_dst_input_array->name() == specified_input_array.name()) { 1504 // specified_input_array from model_flags maps to dst_input_array 1505 // in model->flags 1506 dst_input_array = candidate_dst_input_array; 1507 break; 1508 } 1509 } 1510 if (!dst_input_array) { 1511 // Specified_input_array from model_flags is not found in model->flags. 1512 // Match a name-less specified input array when there can be no ambiguity 1513 // as there is only 1 input array. 1514 if (model->flags.input_arrays_size() == 1 && 1515 model_flags.input_arrays_size() == 1 && 1516 !specified_input_array.has_name()) { 1517 dst_input_array = model->flags.mutable_input_arrays(0); 1518 } 1519 } 1520 if (!dst_input_array) { 1521 // Still no match, so create a new input array to copy 1522 // specified_input_array into. 1523 dst_input_array = model->flags.add_input_arrays(); 1524 dst_input_array->set_name(specified_input_array.name()); 1525 } 1526 1527 #define RESOLVE_MODEL_FLAG(field_name) \ 1528 if (specified_input_array.has_##field_name()) { \ 1529 if (dst_input_array->has_##field_name()) { \ 1530 QCHECK_EQ(dst_input_array->field_name(), \ 1531 specified_input_array.field_name()) \ 1532 << "For input array '" << dst_input_array->name() << "', " \ 1533 << "specified " #field_name " flag with value: " \ 1534 << specified_input_array.field_name() \ 1535 << " does not agree with already defined " #field_name \ 1536 " of this model, with value: " \ 1537 << specified_input_array.field_name(); \ 1538 } else { \ 1539 dst_input_array->set_##field_name(specified_input_array.field_name()); \ 1540 } \ 1541 } 1542 RESOLVE_MODEL_FLAG(std_value); 1543 RESOLVE_MODEL_FLAG(mean_value); 1544 #undef RESOLVE_MODEL_FLAG 1545 1546 if (specified_input_array.has_shape()) { 1547 if (dst_input_array->has_shape()) { 1548 QCHECK_EQ(specified_input_array.shape().dims_size(), 1549 dst_input_array->shape().dims_size()) 1550 << "For input array '" << specified_input_array.name() << "', " 1551 << "size of specified input shape flag with size: " 1552 << specified_input_array.shape().dims_size() 1553 << " does not agree with already defined input shape" 1554 " of this model, with size: " 1555 << dst_input_array->shape().dims_size(); 1556 // We treat the first dimension as a special case, since it is often 1557 // a batch size and the input_shape flag is effectively overriding 1558 // the model. 1559 for (int i = 1; i < specified_input_array.shape().dims_size(); i++) { 1560 QCHECK_EQ(specified_input_array.shape().dims(i), 1561 dst_input_array->shape().dims(i)) 1562 << "At dimension number " << i << " of input array " 1563 << specified_input_array.name() << ", the specified shape's " 1564 << "dimension flag with dimension: " 1565 << specified_input_array.shape().dims(i) 1566 << " does not agree with already defined shape" 1567 << " of this model, with dimension: " 1568 << dst_input_array->shape().dims(i); 1569 } 1570 } else { 1571 *dst_input_array->mutable_shape() = specified_input_array.shape(); 1572 } 1573 } 1574 1575 if (specified_input_array.has_data_type()) { 1576 QCHECK(!dst_input_array->has_data_type()); 1577 dst_input_array->set_data_type(specified_input_array.data_type()); 1578 } 1579 } 1580 1581 if (model_flags.output_arrays_size() > 0) { 1582 model->flags.mutable_output_arrays()->CopyFrom(model_flags.output_arrays()); 1583 } 1584 1585 #define RESOLVE_MODEL_FLAG(name) \ 1586 if (model_flags.has_##name()) { \ 1587 if (model->flags.has_##name()) { \ 1588 QCHECK_EQ(model_flags.name(), model->flags.name()) \ 1589 << "Specified " #name " flag with value: " << model_flags.name() \ 1590 << " does not agree with already defined " #name \ 1591 " of this model, with value: " \ 1592 << model->flags.name(); \ 1593 } else { \ 1594 model->flags.set_##name(model_flags.name()); \ 1595 } \ 1596 } 1597 1598 RESOLVE_MODEL_FLAG(variable_batch) 1599 1600 #undef RESOLVE_MODEL_FLAG 1601 1602 if (!model_flags.rnn_states().empty()) { 1603 model->flags.mutable_rnn_states()->CopyFrom(model_flags.rnn_states()); 1604 } 1605 1606 if (model->flags.model_checks_size() == 0) { 1607 model->flags.mutable_model_checks()->CopyFrom(model_flags.model_checks()); 1608 } 1609 1610 QCHECK_GT(model->flags.output_arrays_size(), 0) 1611 << "This model does not define output arrays, so a " 1612 "--output_arrays flag must be given on the command-line."; 1613 1614 for (auto& input_array_proto : *model->flags.mutable_input_arrays()) { 1615 auto& input_array = model->GetOrCreateArray(input_array_proto.name()); 1616 if (input_array_proto.has_data_type()) { 1617 const ArrayDataType specified_type = 1618 ConvertIODataTypeToArrayDataType(input_array_proto.data_type()); 1619 QCHECK(specified_type != ArrayDataType::kNone); 1620 if (input_array.data_type != ArrayDataType::kNone) { 1621 QCHECK(specified_type == input_array.data_type) 1622 << "For input array " << input_array_proto.name() 1623 << " the specified input data type " 1624 << IODataType_Name(input_array_proto.data_type()) 1625 << " conflicts with the existing type."; 1626 } 1627 input_array.data_type = specified_type; 1628 } 1629 1630 if (input_array.data_type == ArrayDataType::kNone) { 1631 // We start out with a float input array; 1632 // that may get replaced by a uint8 array later, by 1633 // MakeInitialDequantizeOp. 1634 input_array.data_type = ArrayDataType::kFloat; 1635 } 1636 1637 // Compare/merge the model->flags describing the input_shape with 1638 // the actual input array's shape. 1639 if (!input_array.has_shape()) { 1640 if (input_array_proto.has_shape()) { 1641 auto& input_array_dims = *input_array.mutable_shape()->mutable_dims(); 1642 CheckValidShapeDimensions(input_array_proto.shape().dims()); 1643 for (const auto& dim : input_array_proto.shape().dims()) { 1644 input_array_dims.push_back(dim); 1645 } 1646 } 1647 } else { 1648 if (input_array_proto.has_shape()) { 1649 // If an input shape was specified on the flags ensure that it matches 1650 // the actual shape in the model. 1651 const auto& input_array_dims = 1652 *input_array.mutable_shape()->mutable_dims(); 1653 CHECK_EQ(input_array_dims.size(), 1654 input_array_proto.shape().dims_size()); 1655 for (int i = 0; i < input_array_dims.size(); i++) { 1656 CHECK_EQ(input_array_dims[i], input_array_proto.shape().dims(i)); 1657 } 1658 } else { 1659 for (int i = 0; i < input_array.shape().dimensions_count(); i++) { 1660 input_array_proto.mutable_shape()->add_dims( 1661 input_array.shape().dims(i)); 1662 } 1663 } 1664 } 1665 1666 const float mean_value = input_array_proto.mean_value(); 1667 const float std_value = input_array_proto.std_value(); 1668 MinMax input_minmax; 1669 float qmin = 0, qmax = 255; 1670 if (input_array.data_type == ArrayDataType::kInt16) { 1671 qmin = -32768; 1672 qmax = 32767; 1673 } 1674 input_minmax.min = (qmin - mean_value) / std_value; 1675 input_minmax.max = (qmax - mean_value) / std_value; 1676 if (!input_array.minmax) { 1677 input_array.GetOrCreateMinMax() = input_minmax; 1678 } 1679 } 1680 1681 // Creation of the RNN state arrays 1682 for (const auto& rnn_state : model->flags.rnn_states()) { 1683 CreateOrCheckRnnStateArray(rnn_state.state_array(), rnn_state.size(), 1684 rnn_state.num_dims(), model); 1685 } 1686 1687 model->flags.set_change_concat_input_ranges( 1688 model_flags.change_concat_input_ranges()); 1689 model->flags.set_allow_nonascii_arrays(model_flags.allow_nonascii_arrays()); 1690 model->flags.set_allow_nonexistent_arrays( 1691 model_flags.allow_nonexistent_arrays()); 1692 1693 CHECK(!model->flags.has_arrays_extra_info()); 1694 *model->flags.mutable_arrays_extra_info() = model_flags.arrays_extra_info(); 1695 } 1696 1697 void CheckIsReadyForQuantization(const Model& model) { 1698 for (const auto& op : model.operators) { 1699 for (const auto& input : op->inputs) { 1700 const auto& input_array = model.GetArray(input); 1701 if (input_array.data_type != ArrayDataType::kFloat) { 1702 // The array is not floats, no quantization needed. 1703 continue; 1704 } 1705 if (input_array.minmax) { 1706 // The array has minmax, we're good. 1707 continue; 1708 } 1709 if (input_array.buffer) { 1710 // The array has a constant buffer, so we can 1711 // fall back to computing the minmax from actual array entries 1712 // (with a WARNING about possible accuracy implications). 1713 continue; 1714 } 1715 LOG(FATAL) 1716 << "Array " << input << ", which is an input to the " 1717 << HelpfulOperatorTypeName(*op) << " operator producing the output " 1718 << "array " << op->outputs[0] << ", is lacking min/max data, " 1719 << "which is necessary for quantization. If accuracy matters, either " 1720 << "target a non-quantized output format, or run quantized training " 1721 << "with your model from a floating point checkpoint to change the " 1722 << "input graph to contain min/max information. If you don't care " 1723 << "about accuracy, you can pass --default_ranges_min= and " 1724 << "--default_ranges_max= for easy experimentation."; 1725 } 1726 } 1727 } 1728 1729 int ElementSize(ArrayDataType data_type) { 1730 switch (data_type) { 1731 case ArrayDataType::kBool: 1732 return sizeof(bool); 1733 case ArrayDataType::kFloat: 1734 return 4; 1735 case ArrayDataType::kInt8: 1736 return 1; 1737 case ArrayDataType::kUint8: 1738 return 1; 1739 case ArrayDataType::kInt16: 1740 return 2; 1741 case ArrayDataType::kUint16: 1742 return 2; 1743 case ArrayDataType::kInt32: 1744 return 4; 1745 case ArrayDataType::kUint32: 1746 return 4; 1747 case ArrayDataType::kInt64: 1748 return 8; 1749 case ArrayDataType::kUint64: 1750 return 8; 1751 case ArrayDataType::kComplex64: 1752 return 8; 1753 1754 // Usually not critical limitation because strings are only input and/or 1755 // output. 1756 case ArrayDataType::kString: 1757 LOG(FATAL) << "Transient arrays with strings are not supported yet"; 1758 return 0; 1759 default: 1760 LOG(FATAL) << "Unknown data_type = " << static_cast<int>(data_type); 1761 return 0; 1762 } 1763 } 1764 1765 void DropMinMax(Model* model, const string& array_name) { 1766 auto& array = model->GetArray(array_name); 1767 if (!!array.minmax) { 1768 LOG(WARNING) << "Dropping MinMax information in array " << array_name 1769 << ". Expect inaccuracy in quantized inference."; 1770 array.minmax = nullptr; 1771 } 1772 } 1773 1774 bool IsAllocatableTransientArray(const Model& model, const string& array_name) { 1775 // Optional array is not transient 1776 if (model.IsOptionalArray(array_name)) return false; 1777 // The model's input and output arrays are externally allocated. 1778 // They are not transient arrays. 1779 if (IsInputArray(model, array_name) || IsOutputArray(model, array_name)) { 1780 return false; 1781 } 1782 const auto& array = &model.GetArray(array_name); 1783 // An array with a constant buffer isn't a transient array. 1784 if (!!array->buffer) { 1785 return false; 1786 } 1787 // An array without shape isn't allocatable. 1788 if (!array->has_shape()) { 1789 return false; 1790 } 1791 1792 // The size of string tensors is rarely known ahead of time, so all transient 1793 // tensors of this type will need to be dynamically allocated. 1794 if (array->final_data_type == ArrayDataType::kString || 1795 array->data_type == ArrayDataType::kString) { 1796 return false; 1797 } 1798 1799 return true; 1800 } 1801 1802 string AvailableArrayName(const Model& model, const string& name) { 1803 string sanitized_name = SanitizeNameForTFNode(name); 1804 if (!model.HasArray(sanitized_name) && 1805 !model.IsOptionalArray(sanitized_name)) { 1806 return sanitized_name; 1807 } 1808 const int kNumSuffixesToTry = 1000; 1809 for (int i = 0; i < kNumSuffixesToTry; i++) { 1810 const string& name_with_suffix = 1811 toco::port::StringF("%s_%d", sanitized_name, i); 1812 if (!model.HasArray(name_with_suffix) && 1813 !model.IsOptionalArray(name_with_suffix)) { 1814 return name_with_suffix; 1815 } 1816 } 1817 LOG(FATAL) << "Could not find an available array name starting with " 1818 << sanitized_name << ". Tried " << kNumSuffixesToTry 1819 << " suffixes, all were taken!"; 1820 return ""; 1821 } 1822 1823 string ShapeToString(const Shape& shape) { 1824 if (shape.dimensions_count() == 0) { 1825 return "[]"; 1826 } 1827 1828 return absl::StrCat("[ ", absl::StrJoin(shape.dims(), ", "), " ]"); 1829 } 1830 1831 void PrintArrayShape(Model* model, const string& name) { 1832 if (!model->GetArray(name).has_shape()) { 1833 LOG(INFO) << name << " has no shape"; 1834 return; 1835 } 1836 LOG(INFO) << name 1837 << " has shape: " << ShapeToString(model->GetArray(name).shape()); 1838 } 1839 1840 bool IsArrayFullyConnectedWeights(const Model& model, const string& name) { 1841 bool is_fc_weights = false; 1842 bool is_something_else = false; 1843 for (const auto& op : model.operators) { 1844 for (int input_index = 0; input_index < op->inputs.size(); input_index++) { 1845 if (op->inputs[input_index] == name) { 1846 if (op->type == OperatorType::kFullyConnected && input_index == 1) { 1847 is_fc_weights = true; 1848 } else { 1849 is_something_else = true; 1850 } 1851 } 1852 } 1853 } 1854 CHECK(!(is_fc_weights && is_something_else)); 1855 return is_fc_weights; 1856 } 1857 1858 string CreateInt32Array(Model* model, const string& param_name, 1859 const std::vector<int>& value) { 1860 auto param_array_name = AvailableArrayName(*model, param_name); 1861 auto& param_array = model->GetOrCreateArray(param_array_name); 1862 param_array.mutable_shape()->ReplaceDims({static_cast<int>(value.size())}); 1863 param_array.data_type = ArrayDataType::kInt32; 1864 auto& param_array_data = 1865 param_array.GetMutableBuffer<ArrayDataType::kInt32>().data; 1866 param_array_data.resize(RequiredBufferSizeForShape(param_array.shape())); 1867 for (int i = 0; i < value.size(); ++i) { 1868 param_array_data[i] = value[i]; 1869 } 1870 return param_array_name; 1871 } 1872 1873 bool EstimateArithmeticOpsCount(const Model& model, const Operator& op, 1874 int64* result) { 1875 switch (op.type) { 1876 case OperatorType::kFullyConnected: 1877 case OperatorType::kConv: 1878 case OperatorType::kDepthwiseConv: { 1879 const auto& output_array = model.GetArray(op.outputs[0]); 1880 const auto& weights_array = model.GetArray(op.inputs[1]); 1881 if (!output_array.has_shape() || !weights_array.has_shape()) { 1882 return false; 1883 } 1884 int64 cols = 1; 1885 for (int i = 0; i < output_array.shape().dimensions_count() - 1; i++) { 1886 cols *= output_array.shape().dims(i); 1887 } 1888 const int64 cost_per_col = 1889 2 * RequiredBufferSizeForShape(weights_array.shape()); 1890 *result = cost_per_col * cols; 1891 if (op.inputs.size() > 2) { 1892 // There is a bias vector. One more op per output value. 1893 *result += RequiredBufferSizeForShape(output_array.shape()); 1894 } 1895 break; 1896 } 1897 case OperatorType::kAdd: 1898 case OperatorType::kSub: 1899 case OperatorType::kMul: { 1900 const auto& output_array = model.GetArray(op.outputs[0]); 1901 if (!output_array.has_shape()) { 1902 return false; 1903 } 1904 *result = RequiredBufferSizeForShape(output_array.shape()); 1905 break; 1906 } 1907 case OperatorType::kAddN: { 1908 const auto& output_array = model.GetArray(op.outputs[0]); 1909 if (!output_array.has_shape()) { 1910 return false; 1911 } 1912 // AddN cost is roughly the same cost as N-1 Adds. 1913 const int64 num_adds = op.inputs.size() - 1; 1914 *result = num_adds * RequiredBufferSizeForShape(output_array.shape()); 1915 break; 1916 } 1917 case OperatorType::kLogistic: 1918 case OperatorType::kSoftmax: 1919 case OperatorType::kLogSoftmax: 1920 case OperatorType::kTanh: { 1921 const auto& output_array = model.GetArray(op.outputs[0]); 1922 if (!output_array.has_shape()) { 1923 return false; 1924 } 1925 // As a very rough ballpark, the cost of evaluating a math function 1926 // such as tanh or logistic is about 32 multiplications, and about as 1927 // many additions/subtractions. (Just a power-of-two order-of-magnitude 1928 // from looking at actual implementations that we use in runtime/ code). 1929 *result = 64 * RequiredBufferSizeForShape(output_array.shape()); 1930 break; 1931 } 1932 case OperatorType::kMaxPool: { 1933 const auto& maxpool = *static_cast<const MaxPoolOperator*>(&op); 1934 const auto& output_array = model.GetArray(op.outputs[0]); 1935 if (!output_array.has_shape()) { 1936 return false; 1937 } 1938 *result = RequiredBufferSizeForShape(output_array.shape()) * 1939 maxpool.kheight * maxpool.kwidth; 1940 break; 1941 } 1942 case OperatorType::kAveragePool: { 1943 const auto& avgpool = *static_cast<const AveragePoolOperator*>(&op); 1944 const auto& output_array = model.GetArray(op.outputs[0]); 1945 if (!output_array.has_shape()) { 1946 return false; 1947 } 1948 *result = RequiredBufferSizeForShape(output_array.shape()) * 1949 avgpool.kheight * avgpool.kwidth; 1950 break; 1951 } 1952 case OperatorType::kL2Pool: { 1953 const auto* maxpool = static_cast<const MaxPoolOperator*>(&op); 1954 const auto& output_array = model.GetArray(op.outputs[0]); 1955 if (!output_array.has_shape()) { 1956 return false; 1957 } 1958 // The sum of squares requires (kheight*kwidth) multiply-adds, 1959 // and then there is the sqrt which we ballpark at 32 ops. 1960 const int64 cost_per_val = 2 * maxpool->kheight * maxpool->kwidth + 32; 1961 *result = RequiredBufferSizeForShape(output_array.shape()) * cost_per_val; 1962 break; 1963 } 1964 case OperatorType::kL2Normalization: { 1965 const auto& output_array = model.GetArray(op.outputs[0]); 1966 if (!output_array.has_shape()) { 1967 return false; 1968 } 1969 // Computing the squared L2 norm is N multiply-adds so 2N ops, 1970 // then the single inverse-sqrt is negligible, then we multiply each 1971 // value by the resulting multiplier, so an extra N ops. count 3N ops. 1972 *result = 3 * RequiredBufferSizeForShape(output_array.shape()); 1973 break; 1974 } 1975 default: 1976 *result = 0; 1977 break; 1978 } 1979 return true; 1980 } 1981 1982 bool EstimateArithmeticOpsCount(const Model& model, int64* result) { 1983 int64 total = 0; 1984 for (const auto& op : model.operators) { 1985 int64 num_ops; 1986 if (!EstimateArithmeticOpsCount(model, *op, &num_ops)) { 1987 return false; 1988 } 1989 total += num_ops; 1990 } 1991 *result = total; 1992 return true; 1993 } 1994 1995 string FormattedNumber(int64 x) { 1996 const int64 million = 1000000; 1997 const int64 billion = 1000000000; 1998 if (x < 10000) { 1999 return toco::port::StringF("%d ", x); 2000 } else if (x < billion) { 2001 return toco::port::StringF("%.3f M", static_cast<double>(x) / million); 2002 } else { 2003 return toco::port::StringF("%.3f G", static_cast<double>(x) / billion); 2004 } 2005 } 2006 2007 void GetShuffleShape(AxesOrder input_axes_order, AxesOrder output_axes_order, 2008 std::vector<int>* shuffle) { 2009 CHECK_EQ(AxesCount(input_axes_order), AxesCount(output_axes_order)); 2010 shuffle->resize(4); 2011 for (int i = 0; i < 4; i++) { 2012 (*shuffle)[i] = i; 2013 } 2014 if (input_axes_order == output_axes_order) { 2015 // nothing to do 2016 } else if (AxesCount(input_axes_order) == 2) { 2017 shuffle->resize(2); 2018 (*shuffle)[0] = 1; 2019 (*shuffle)[1] = 0; 2020 } else if (input_axes_order == AxesOrder::kOHWI && 2021 output_axes_order == AxesOrder::kHWIO) { 2022 // 3210 <- 3210 2023 // HWIO <- OHWI 2024 *shuffle = {1, 2, 3, 0}; 2025 } else if (input_axes_order == AxesOrder::kHWIO && 2026 output_axes_order == AxesOrder::kOHWI) { 2027 // 3210 <- 3210 2028 // OHWI <- HWIO 2029 *shuffle = {3, 0, 1, 2}; 2030 } else if (input_axes_order == AxesOrder::kOHWI && 2031 output_axes_order == AxesOrder::kHWOI) { 2032 *shuffle = {1, 2, 0, 3}; 2033 } else { 2034 LOG(FATAL) << "Bad shuffle"; 2035 } 2036 } 2037 2038 void ExtendShuffle(const std::vector<int>& input_shuffle, int newdim, 2039 std::vector<int>* extended_shuffle) { 2040 *extended_shuffle = input_shuffle; 2041 CHECK(newdim >= input_shuffle.size()); 2042 const int pad_size = newdim - input_shuffle.size(); 2043 extended_shuffle->resize(newdim); 2044 for (int i = 0; i < pad_size; i++) { 2045 (*extended_shuffle)[i] = i; 2046 } 2047 for (int i = pad_size; i < newdim; i++) { 2048 (*extended_shuffle)[i] = input_shuffle[i - pad_size] + pad_size; 2049 } 2050 } 2051 2052 void ShuffleDims(const Shape& input_shape, AxesOrder input_axes_order, 2053 AxesOrder output_axes_order, Shape* output_shape) { 2054 if (input_axes_order == AxesOrder::kHWIM && 2055 output_axes_order == AxesOrder::k1HWO) { 2056 // This special case isn't just a permutation, the IM pair of dims get 2057 // merged into the 3 dim, so we have to special-case it. 2058 *output_shape = Shape({1, input_shape.dims(0), input_shape.dims(1), 2059 input_shape.dims(3) * input_shape.dims(2)}); 2060 } else { 2061 std::vector<int> shuffle; 2062 GetShuffleShape(input_axes_order, output_axes_order, &shuffle); 2063 std::vector<int>* output_dims = output_shape->mutable_dims(); 2064 output_dims->resize(input_shape.dimensions_count()); 2065 for (int i = 0; i < input_shape.dimensions_count(); i++) { 2066 (*output_dims)[i] = input_shape.dims(shuffle[i]); 2067 } 2068 } 2069 } 2070 2071 template <typename T> 2072 void ShuffleArrayTemplate(const Shape& input_shape, AxesOrder input_axes_order, 2073 AxesOrder output_axes_order, 2074 const Shape& output_shape, const T* input_data, 2075 T* output_data) { 2076 if (input_axes_order == AxesOrder::kHWIM && 2077 output_axes_order == AxesOrder::k1HWO) { 2078 // This special case isn't just a permutation, the IM pair of dims get 2079 // merged into the O dim, so we have to special-case it. Fortunately, 2080 // as far as array shuffling is concerned, it's just the identity 2081 // transformation. 2082 memcpy(output_data, input_data, 2083 RequiredBufferSizeForShape(input_shape) * sizeof(output_data[0])); 2084 return; 2085 } 2086 CHECK(input_shape.dimensions_count() == output_shape.dimensions_count()); 2087 const int dim = input_shape.dimensions_count(); 2088 CHECK_LE(dim, 4); 2089 std::vector<int> shuffle; 2090 GetShuffleShape(input_axes_order, output_axes_order, &shuffle); 2091 CHECK(shuffle.size() >= dim); 2092 for (int i = 0; i < dim; i++) { 2093 CHECK(shuffle[i] >= 0 && shuffle[i] < dim); 2094 CHECK(input_shape.dims(shuffle[i]) == output_shape.dims(i)); 2095 } 2096 Shape extended_input_shape = input_shape; 2097 ExtendShape(&extended_input_shape, 4); 2098 Shape extended_output_shape = output_shape; 2099 ExtendShape(&extended_output_shape, 4); 2100 std::vector<int> extended_shuffle; 2101 ExtendShuffle(shuffle, 4, &extended_shuffle); 2102 2103 const std::vector<int>& extended_input_dims = extended_input_shape.dims(); 2104 const std::vector<int>& extended_output_dims = extended_output_shape.dims(); 2105 2106 // TODO(starka): Rework to handle different numbers of dimensions. 2107 int input_strides[4]; 2108 input_strides[3] = 1; 2109 input_strides[2] = extended_input_dims[3]; 2110 input_strides[1] = input_strides[2] * extended_input_dims[2]; 2111 input_strides[0] = input_strides[1] * extended_input_dims[1]; 2112 const int input_stride_0 = input_strides[extended_shuffle[3]]; 2113 const int input_stride_1 = input_strides[extended_shuffle[2]]; 2114 const int input_stride_2 = input_strides[extended_shuffle[1]]; 2115 const int input_stride_3 = input_strides[extended_shuffle[0]]; 2116 2117 const int output_size_0 = extended_output_dims[3]; 2118 const int output_size_1 = extended_output_dims[2]; 2119 const int output_size_2 = extended_output_dims[1]; 2120 const int output_size_3 = extended_output_dims[0]; 2121 const int output_stride_0 = 1; 2122 const int output_stride_1 = output_size_0; 2123 const int output_stride_2 = output_stride_1 * output_size_1; 2124 const int output_stride_3 = output_stride_2 * output_size_2; 2125 2126 for (int i3 = 0; i3 < output_size_3; i3++) { 2127 const T* const input_ptr_3 = input_data + i3 * input_stride_3; 2128 T* const output_ptr_3 = output_data + i3 * output_stride_3; 2129 for (int i2 = 0; i2 < output_size_2; i2++) { 2130 const T* const input_ptr_2 = input_ptr_3 + i2 * input_stride_2; 2131 T* const output_ptr_2 = output_ptr_3 + i2 * output_stride_2; 2132 for (int i1 = 0; i1 < output_size_1; i1++) { 2133 const T* input_ptr = input_ptr_2 + i1 * input_stride_1; 2134 T* output_ptr = output_ptr_2 + i1 * output_stride_1; 2135 T* const output_ptr_end = output_ptr + output_size_0 * output_stride_0; 2136 while (output_ptr != output_ptr_end) { 2137 *output_ptr = *input_ptr; 2138 input_ptr += input_stride_0; 2139 output_ptr += output_stride_0; 2140 } 2141 } 2142 } 2143 } 2144 } 2145 2146 void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order, 2147 AxesOrder output_axes_order, const Shape& output_shape, 2148 const uint8* input_data, uint8* output_data) { 2149 ShuffleArrayTemplate<uint8>(input_shape, input_axes_order, output_axes_order, 2150 output_shape, input_data, output_data); 2151 } 2152 2153 void ShuffleArray(const Shape& input_shape, AxesOrder input_axes_order, 2154 AxesOrder output_axes_order, const Shape& output_shape, 2155 const float* input_data, float* output_data) { 2156 ShuffleArrayTemplate<float>(input_shape, input_axes_order, output_axes_order, 2157 output_shape, input_data, output_data); 2158 } 2159 2160 int AxesCount(AxesOrder axes_order) { 2161 switch (axes_order) { 2162 case AxesOrder::kOneAxis: 2163 return 1; 2164 case AxesOrder::kRC: 2165 return 2; 2166 case AxesOrder::kCR: 2167 return 2; 2168 case AxesOrder::kHWIO: 2169 return 4; 2170 case AxesOrder::kOHWI: 2171 return 4; 2172 case AxesOrder::kHWIM: 2173 return 4; 2174 case AxesOrder::k1HWO: 2175 return 4; 2176 case AxesOrder::kNHWC: 2177 return 4; 2178 case AxesOrder::kHWOI: 2179 return 4; 2180 default: 2181 LOG(FATAL) << "Bad AxesOrder"; 2182 return 0; 2183 } 2184 } 2185 2186 bool IsDiscardableArray(const Model& model, const string& array_name) { 2187 if (IsInputArray(model, array_name) || IsOutputArray(model, array_name)) { 2188 return false; 2189 } 2190 for (const auto& rnn_state : model.flags.rnn_states()) { 2191 if (!rnn_state.discardable()) { 2192 if (array_name == rnn_state.state_array()) { 2193 return false; 2194 } 2195 if (array_name == rnn_state.back_edge_source_array()) { 2196 return false; 2197 } 2198 } 2199 } 2200 return true; 2201 } 2202 2203 bool ReshapeIsEquivalentToTranspose(const Model& model, 2204 const TensorFlowReshapeOperator* op, 2205 bool allow_extra_unary_dims) { 2206 CHECK(!op->shape.empty()); 2207 CHECK(model.HasArray(op->inputs[0])); 2208 CHECK(model.HasArray(op->outputs[0])); 2209 2210 const auto& input_array = model.GetArray(op->inputs[0]); 2211 const auto& output_array = model.GetArray(op->outputs[0]); 2212 2213 CHECK(input_array.has_shape()); 2214 CHECK(output_array.has_shape()); 2215 2216 std::vector<int> in_shape = input_array.shape().dims(); 2217 std::vector<int> out_shape = output_array.shape().dims(); 2218 2219 // If the reshape changes the number of dimensions so it cannot be interpreted 2220 // as a transpose. 2221 if (!allow_extra_unary_dims && in_shape.size() != out_shape.size()) { 2222 return false; 2223 } 2224 2225 in_shape.erase(std::remove(in_shape.begin(), in_shape.end(), 1), 2226 in_shape.end()); 2227 out_shape.erase(std::remove(out_shape.begin(), out_shape.end(), 1), 2228 out_shape.end()); 2229 return in_shape == out_shape; 2230 } 2231 2232 void CheckFinalDataTypesSatisfied(const Model& model) { 2233 for (const auto& array_entry : model.GetArrayMap()) { 2234 const auto& array = *array_entry.second; 2235 if (array.data_type == ArrayDataType::kBool) { 2236 // Boolean values are never quantized. 2237 continue; 2238 } 2239 2240 // If the final data type is int16, the data type may be float, for example 2241 // after dequantization. 2242 if (array.final_data_type != ArrayDataType::kNone && 2243 array.final_data_type != ArrayDataType::kInt16) { 2244 CHECK(array.data_type == array.final_data_type) 2245 << "Array \"" << array_entry.first 2246 << "\" has mis-matching actual and final data types (data_type=" 2247 << ArrayDataTypeName(array.data_type) 2248 << ", final_data_type=" << ArrayDataTypeName(array.final_data_type) 2249 << ")."; 2250 } 2251 } 2252 } 2253 2254 ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) { 2255 switch (type) { 2256 case FLOAT: 2257 return ArrayDataType::kFloat; 2258 case QUANTIZED_UINT8: 2259 return ArrayDataType::kUint8; 2260 case INT8: 2261 return ArrayDataType::kInt8; 2262 case QUANTIZED_INT16: 2263 return ArrayDataType::kInt16; 2264 case INT32: 2265 return ArrayDataType::kInt32; 2266 case INT64: 2267 return ArrayDataType::kInt64; 2268 case BOOL: 2269 return ArrayDataType::kBool; 2270 case STRING: 2271 return ArrayDataType::kString; 2272 case COMPLEX64: 2273 return ArrayDataType::kComplex64; 2274 default: 2275 return ArrayDataType::kNone; 2276 } 2277 } 2278 2279 void FinishBuildingRNNStates(Model* model) { 2280 for (const auto& rnn_state : model->flags.rnn_states()) { 2281 if (!model->HasArray(rnn_state.back_edge_source_array()) || 2282 !model->HasArray(rnn_state.state_array())) { 2283 CHECK(model->HasArray(rnn_state.back_edge_source_array())); 2284 CHECK(model->HasArray(rnn_state.state_array())); 2285 continue; 2286 } 2287 const auto& src_array = model->GetArray(rnn_state.back_edge_source_array()); 2288 auto& dst_array = model->GetArray(rnn_state.state_array()); 2289 if (src_array.data_type == ArrayDataType::kNone && 2290 dst_array.data_type == ArrayDataType::kNone) { 2291 dst_array.data_type = ArrayDataType::kFloat; 2292 } 2293 } 2294 } 2295 2296 // Returns the array names that match the ArraysExtraInfo's name and 2297 // name_regexp. The regexp match is for a full match. 2298 std::unordered_set<string> ScanArrayNames( 2299 const Model& model, const toco::ArraysExtraInfo_Entry& entry) { 2300 std::unordered_set<string> matches; 2301 if (model.HasArray(entry.name())) { 2302 matches.insert(entry.name()); 2303 } 2304 if (!entry.name_regexp().empty()) { 2305 const auto& arrays = model.GetArrayMap(); 2306 const RE2 name_regexp = {entry.name_regexp()}; 2307 for (auto it = arrays.begin(); it != arrays.end(); ++it) { 2308 if (RE2::FullMatch(it->first, name_regexp)) { 2309 matches.insert(it->first); 2310 } 2311 } 2312 } 2313 return matches; 2314 } 2315 2316 void UseArraysExtraInfo(Model* model, bool quantize_output) { 2317 for (const auto& entry : model->flags.arrays_extra_info().entries()) { 2318 const auto matches = ScanArrayNames(*model, entry); 2319 for (const auto& matched_name : matches) { 2320 auto& array = model->GetArray(matched_name); 2321 if (entry.has_min() || entry.has_max()) { 2322 CHECK_EQ(entry.has_min(), entry.has_max()); 2323 auto& minmax = array.GetOrCreateMinMax(); 2324 minmax.min = entry.min(); 2325 minmax.max = entry.max(); 2326 } 2327 if (entry.has_data_type() && quantize_output) { 2328 array.final_data_type = 2329 ConvertIODataTypeToArrayDataType(entry.data_type()); 2330 } 2331 if (entry.has_shape()) { 2332 array.clear_shape(); 2333 // Make sure to create the shape even if there are no dims, to 2334 // correctly record 0-D shapes. 2335 array.mutable_shape(); 2336 for (const auto& dim : entry.shape().dims()) { 2337 array.mutable_shape()->mutable_dims()->push_back(dim); 2338 } 2339 } 2340 if (entry.has_constant_float_value()) { 2341 CHECK(array.has_shape()); 2342 if (array.data_type == ArrayDataType::kFloat) { 2343 auto& data = array.GetMutableBuffer<ArrayDataType::kFloat>().data; 2344 data.resize(RequiredBufferSizeForShape(array.shape())); 2345 for (float& f : data) { 2346 f = entry.constant_float_value(); 2347 } 2348 } 2349 } 2350 } 2351 } 2352 } 2353 2354 void UndoWeightsShuffling(Model* model) { 2355 for (const auto& op : model->operators) { 2356 if (op->type != toco::OperatorType::kFullyConnected) { 2357 continue; 2358 } 2359 const auto& fc_op = static_cast<toco::FullyConnectedOperator&>(*op); 2360 if (fc_op.weights_format == FullyConnectedWeightsFormat::kDefault) { 2361 continue; 2362 } 2363 const string& weights_name = fc_op.inputs[1]; 2364 QCHECK_EQ(CountOpsWithInput(*model, weights_name), 1); 2365 auto& weights_array = model->GetArray(weights_name); 2366 QCHECK(weights_array.data_type == ArrayDataType::kUint8); 2367 auto& weights_data = 2368 weights_array.GetMutableBuffer<toco::ArrayDataType::kUint8>().data; 2369 const auto& weights_shape = weights_array.shape(); 2370 QCHECK_EQ(weights_shape.dimensions_count(), 2); 2371 const int rows = weights_shape.dims(0); 2372 const int cols = weights_shape.dims(1); 2373 QCHECK_EQ(rows % 4, 0); 2374 QCHECK_EQ(cols % 16, 0); 2375 CHECK_EQ(rows * cols, weights_data.size()); 2376 // Compute the de-shuffled weights 2377 std::vector<uint8> deshuffled_data(weights_data.size()); 2378 uint8* shuffled_data_ptr = weights_data.data(); 2379 for (int r = 0; r < rows; r += 4) { 2380 for (int c = 0; c < cols; c += 16) { 2381 for (int i = 0; i < 4; i++) { 2382 uint8* deshuffled_data_ptr = 2383 deshuffled_data.data() + (r + i) * cols + c; 2384 for (int j = 0; j < 16; j++) { 2385 uint8 shuffled_val = *shuffled_data_ptr++; 2386 // Deshuffling isn't only about deshuffling the storage layout, 2387 // it's also about undoing the flipping of the sign bit, which is 2388 // performed on the shuffled weights. 2389 uint8 deshuffled_val = shuffled_val ^ 0x80; 2390 *deshuffled_data_ptr++ = deshuffled_val; 2391 } 2392 } 2393 } 2394 } 2395 CHECK_EQ(shuffled_data_ptr, weights_data.data() + rows * cols); 2396 // Switch this FC op to using the deshuffled weights. 2397 weights_data = std::move(deshuffled_data); 2398 } 2399 } 2400 2401 void CopyMinMaxAndQuantizationRelatedFields(const Array& src, Array* dst) { 2402 if (src.minmax) { 2403 dst->GetOrCreateMinMax() = src.GetMinMax(); 2404 } 2405 if (src.quantization_params) { 2406 dst->GetOrCreateQuantizationParams() = src.GetQuantizationParams(); 2407 } 2408 dst->narrow_range = src.narrow_range; 2409 } 2410 2411 } // namespace toco 2412