1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #undef NDEBUG 18 19 #include "Bridge.h" 20 #include "CompilationBuilder.h" 21 #include "Manager.h" 22 #include "ModelBuilder.h" 23 #include "NeuralNetworks.h" 24 #include "NeuralNetworksWrapper.h" 25 #include "SampleDriver.h" 26 #include "Utils.h" 27 #include "ValidateHal.h" 28 29 #include <algorithm> 30 #include <cassert> 31 #include <cstdio> 32 #include <random> 33 #include <set> 34 #include <tuple> 35 #include <utility> 36 #include <vector> 37 38 #include <unistd.h> 39 40 #include <android-base/logging.h> 41 #include <android/sharedmem.h> 42 #include <gtest/gtest.h> 43 44 // Uncomment the following line to generate some debugging output that 45 // may be useful when analyzing failures: 46 // 47 // #define VERBOSE VERBOSE 48 49 // Uncomment the following line to generate graphs from models: 50 // 51 // #define GRAPH GRAPH 52 53 // We randomly generate tests (model + input data) at runtime, and verify 54 // that we get the same results whether we do partitioned compilation/execution 55 // or non partitioned compilation/execution. We perform a test as follows: 56 // 57 // (1) Randomly generate a model (graph and weights), randomly generate input 58 // data, randomly assign inputs and outputs to CPU memory or to shared 59 // memory. 60 // 61 // Randomly leaves dimensions unset for intermediate operands. 62 // 63 // (2) Randomly generate drivers based on the sample driver, each of which 64 // executes models on the CPU. They differ according to which operations 65 // they support. 66 // 67 // (3) Compile and execute without partitioning, saving off the results. 68 // 69 // (4) Compile and execute with partitioning. 70 // 71 // (5) Verify that the saved results from (3) match the results from (4). 72 // 73 // For simplicity, all data (model inputs, model outputs, weights, 74 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two 75 // dimensions are fixed throughout a particular test case (and 76 // randomly determined). This prevents us from having to find a 77 // mechanism to "resize" data (e.g., if ADD#a operates on data of size 78 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a 79 // and ADD#b become inputs of ADD#c, do we need to insert one or more 80 // operations between (say) ADD#a and ADD#c to convert ADD#2's data 81 // from size 2x2 to size 3x3 in order to match ADD#b). In the few 82 // cases where an operand cannot be of this type, it is a constant 83 // (e.g., activation functions and RNN bias). 84 // 85 // Each operation we generate has a signature (described in more 86 // detail later). The randomly generated drivers decide which 87 // operations they can execute by checking operation signatures. Once 88 // we have built the model and know the set of signatures, we randomly 89 // assign each signature to a driver. No signature is supported by 90 // multiple drivers -- we're not testing the logic that the 91 // partitioning algorithm uses to select the best driver for an 92 // operation. 93 94 namespace android { 95 96 using CompilationBuilder = nn::CompilationBuilder; 97 using Device = nn::Device; 98 using DeviceManager = nn::DeviceManager; 99 using ExecutionPlan = nn::ExecutionPlan; 100 using HidlModel = hardware::neuralnetworks::V1_1::Model; 101 using MemoryBuilder = nn::Memory; 102 using ModelBuilder = nn::ModelBuilder; 103 using Result = nn::wrapper::Result; 104 using SampleDriver = nn::sample_driver::SampleDriver; 105 using WrapperCompilation = nn::wrapper::Compilation; 106 using WrapperExecution = nn::wrapper::Execution; 107 using WrapperMemory = nn::wrapper::Memory; 108 using WrapperModel = nn::wrapper::Model; 109 using WrapperOperandType = nn::wrapper::OperandType; 110 using WrapperType = nn::wrapper::Type; 111 112 namespace { 113 114 /// Configure test size ////////////////////////////////////////////////////////// 115 116 // We may exceed this in order to connect otherwise disjoint subgraphs. 117 static const unsigned kMaxNumOperations = 100; 118 119 // We build models to process 2-D square tensors up to this size in each dimension; 120 // note that the API promotes by-value weights larger than 128 to by-reference, 121 // so we want to ensure that we can pick both types that exceed and types that do 122 // not exceed this size. 123 static const unsigned kMaxProblemSize = 8; 124 125 // First seed for pseudorandom test generation. 126 static const unsigned kFirstSeed = 0; 127 128 // Number of test cases. 129 static const unsigned kNumTestCases = 225; 130 131 // Force all graph weights into a single pool (as we recommend to users) 132 // or allow them to be distributed across multiple pools (more stress 133 // on the partitioning algorithm and the rest of the runtime)? 134 // Forcing all graph weights into a single pool may be necessary to 135 // prevent large graphs from running up against http://b/70302693 136 // "NNAPI overuses (?) fds". 137 static const bool kAllWeightsInOnePool = false; 138 139 ////////////////////////////////////////////////////////////////////////////////// 140 141 // The signature of an operation consists of the operation type (e.g., 142 // ADD) and the activation function (use -1 in the case of an 143 // operation type for which the activation function is inapplicable). 144 typedef std::pair<ANeuralNetworksOperationType, int> Signature; 145 146 // This class adds some simple utilities on top of 147 // ::android::nn::wrapper::Model. For example, it provides access to 148 // certain features from ModelBuilder that are not exposed by the base 149 // class (such as inputCount() and operation index). 150 class TestModel : public WrapperModel { 151 public: 152 153 uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs, 154 const std::vector<uint32_t>& outputs) { 155 const uint32_t operationIndex = operationCount(); 156 mOperations.push_back(outputs); 157 WrapperModel::addOperation(type, inputs, outputs); 158 return operationIndex; 159 } 160 161 uint32_t operationCount() const { 162 return mOperations.size(); 163 } 164 165 uint32_t inputCount() const { 166 return builder()->inputCount(); 167 } 168 uint32_t outputCount() const { 169 return builder()->outputCount(); 170 } 171 172 const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const { 173 assert(index < mOperations.size()); 174 return mOperations[index]; 175 } 176 177 // All values are immediately copied into the model (we need to do 178 // this ourselves in cases where the underlying NNAPI does not). 179 void setOperandValue(uint32_t index, const std::vector<float>& value) { 180 const size_t length = value.size() * sizeof(float); 181 182 if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) { 183 WrapperModel::setOperandValue(index, value.data(), length); 184 } else { 185 mOperandValues.push_back(value); 186 WrapperModel::setOperandValue(index, mOperandValues.back().data(), length); 187 } 188 } 189 190 void setOperandValue(uint32_t index, int32_t value) { 191 assert(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES); 192 WrapperModel::setOperandValue(index, &value, sizeof(value)); 193 } 194 195 private: 196 197 const ModelBuilder* builder() const { 198 return reinterpret_cast<const ModelBuilder*>(getHandle()); 199 } 200 201 // Representation of operations: vector index is operation number, 202 // vector value is operation's output operands. 203 std::vector<std::vector<uint32_t>> mOperations; 204 205 // Large operand values -- not immediately copied into the 206 // WrapperModel, so remembered here instead. 207 std::vector<std::vector<float>> mOperandValues; 208 }; 209 210 // This class adds some simple utilities on top of 211 // ::android::nn::wrapper::Compilation in order to provide access to 212 // certain features from CompilationBuilder that are not exposed by 213 // the base class. 214 class TestCompilation : public WrapperCompilation { 215 public: 216 TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {} 217 218 Result setPartitioning(uint32_t partitioning) { 219 return static_cast<Result>(builder()->setPartitioning(partitioning)); 220 } 221 222 using WrapperCompilation::finish; 223 Result finish(const std::vector<std::shared_ptr<Device>>& devices) { 224 return static_cast<Result>(builder()->finish(devices)); 225 } 226 227 const ExecutionPlan& getExecutionPlan() const { 228 return builder()->forTest_getExecutionPlan(); 229 } 230 231 private: 232 const CompilationBuilder* builder() const { 233 return reinterpret_cast<const CompilationBuilder*>(getHandle()); 234 } 235 CompilationBuilder* builder() { 236 return reinterpret_cast<CompilationBuilder*>(getHandle()); 237 } 238 }; 239 240 // This class is used to manage a collection of memory regions, 241 // disjoint windows onto a set of Memory instances, each of which is 242 // associated with a single shared memory region. Each region and 243 // Memory instance is assigned a number. The usage pattern is as 244 // follows: 245 // - Call addMemory() and addRegion() as many times as needed to 246 // declare (but not define) Memory instances and declare region 247 // instances. 248 // - Call layout() to define the Memory instances. 249 // - Call getRegion() as many times as needed to get the details 250 // of memory regions (such as address, or Memory/offset/length). 251 // The Memory instances created by layout() are owned by the 252 // TestMemories instance, and are destroyed when the TestMemories 253 // instance is destroyed. 254 class TestMemories { 255 public: 256 TestMemories() = default; 257 ~TestMemories(); 258 259 TestMemories(const TestMemories&) = delete; 260 TestMemories& operator=(const TestMemories&) = delete; 261 262 unsigned addMemory() { 263 assert(!mLayoutDone); 264 mMemorySizes.push_back(0); 265 return memoryCount() - 1; 266 } 267 unsigned memoryCount() const { 268 return mMemorySizes.size(); 269 } 270 271 unsigned addRegion(unsigned memoryIndex, uint32_t length) { 272 assert(!mLayoutDone); 273 assert(memoryIndex < memoryCount()); 274 uint32_t& memorySize = mMemorySizes[memoryIndex]; 275 auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length); 276 mRegions.push_back(desc); 277 memorySize += length; 278 return regionCount() - 1; 279 } 280 unsigned regionCount() const { 281 return mRegions.size(); 282 } 283 284 void layout(); 285 286 void* getRegion(unsigned regionIndex, 287 const WrapperMemory** pMemory, uint32_t* pOffset, uint32_t* pLength) { 288 assert(mLayoutDone); 289 assert(regionIndex < regionCount()); 290 const auto& regionDescriptor = mRegions[regionIndex]; 291 const WrapperMemory* memory = &mMemorys[std::get<0>(regionDescriptor)]; 292 uint32_t offset = std::get<1>(regionDescriptor); 293 uint32_t length = std::get<2>(regionDescriptor); 294 295 uint8_t* buffer; 296 if (reinterpret_cast<MemoryBuilder*>(memory->get())->getPointer(&buffer) != 297 ANEURALNETWORKS_NO_ERROR) { 298 assert(0); 299 } 300 301 if (pMemory) *pMemory = memory; 302 if (pOffset) *pOffset = offset; 303 if (pLength) *pLength = length; 304 305 return buffer + offset; 306 } 307 308 void* getRegion(unsigned regionIndex) { 309 return getRegion(regionIndex, nullptr, nullptr, nullptr); 310 } 311 312 private: 313 // Index is the memory index; value is the size of the memory 314 // (aggregate size of all regions in the memory). 315 std::vector<uint32_t> mMemorySizes; 316 317 // Index is the memory index. 318 std::vector<WrapperMemory> mMemorys; 319 std::vector<int> mFDs; 320 321 // Index is the region index; tuple represents memory index, 322 // region offset within memory, region length. 323 std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions; 324 325 // For sanity checking. 326 bool mLayoutDone = false; 327 }; 328 329 void TestMemories::layout() { 330 assert(!mLayoutDone); 331 for (uint32_t memorySize : mMemorySizes) { 332 const int fd = ASharedMemory_create(nullptr, memorySize); 333 assert(fd >= 0); 334 mMemorys.emplace_back(memorySize, PROT_READ | PROT_WRITE, fd, 0); 335 mFDs.push_back(fd); 336 } 337 mLayoutDone = true; 338 } 339 340 TestMemories::~TestMemories() { 341 for (int fd : mFDs) { 342 close(fd); 343 } 344 } 345 346 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> { 347 public: 348 RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {} 349 350 static Signature getSignature(const HidlModel& model, const Operation& operation); 351 352 protected: 353 void graphDump(const WrapperModel& model); 354 355 bool randBool() { 356 return randUInt(2) == 1; 357 } 358 359 double randFrac() { // [0.0, 1.0) 360 return mRandNumUnitDist(mRandNumEng); 361 } 362 363 unsigned randUInt(unsigned limit) { // [0, limit) 364 return unsigned(randFrac() * limit); 365 } 366 367 // Represents an operation in which every input and output operand 368 // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except: 369 // - One input operand may be an activation function. 370 // - Any number of input operands may be "special" in some other way 371 // (and in this implementation, not produced by any other operation). 372 // We require that: 373 // - There be at least one input operand that is neither an 374 // activation function nor "special". 375 struct OperationPattern { 376 int mOperationType; 377 unsigned mNumInputs; 378 unsigned mNumOutputs; 379 int mActivationFunctionInputIndex; // <0 if none 380 381 // Returns operand index, or <0 if input is normal (must not 382 // be called for an activation function operand). Function 383 // should have the following prototype: 384 // 385 // int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex); 386 // 387 int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned); 388 }; 389 390 static const OperationPattern kOperationPatterns[]; 391 392 int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) { 393 if (inputIndex != 3) { 394 return -1; 395 } 396 397 // input operand 3 is bias, a 1-D tensor 398 const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, { problemSize }); 399 const uint32_t operandIndex = model->addOperand(&biasType); 400 std::vector<float> biasValue(problemSize); 401 std::generate(biasValue.begin(), biasValue.end(), 402 [this]{ return randFrac(); }); 403 model->setOperandValue(operandIndex, biasValue); 404 return int(operandIndex); 405 } 406 407 #ifdef VERBOSE 408 class ModelStats { 409 public: 410 ModelStats(const ModelBuilder* model) : 411 mBuilder(model) { } 412 ModelStats(const WrapperModel* model) : 413 mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) { } 414 friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) { 415 const uint32_t operandCount = stats.mBuilder->operandCount(); 416 const uint32_t inputCount = stats.mBuilder->inputCount(); 417 const uint32_t outputCount = stats.mBuilder->outputCount(); 418 out << "operationCount = " << stats.mBuilder->operationCount() 419 << ", operandCount = " << operandCount 420 << ", inputCount = " << inputCount 421 << " (" << (double(inputCount) / operandCount) << ")" 422 << ", outputCount = " << outputCount 423 << " (" << (double(outputCount) / operandCount) << ")"; 424 return out; 425 } 426 private: 427 const ModelBuilder* mBuilder; 428 }; 429 #endif 430 431 private: 432 std::mt19937 mRandNumEng; 433 std::uniform_real_distribution<double> mRandNumUnitDist; 434 }; 435 436 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = { 437 { ANEURALNETWORKS_ADD, 3, 1, 2, nullptr }, 438 { ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr }, 439 { ANEURALNETWORKS_MUL, 3, 1, 2, nullptr }, 440 { ANEURALNETWORKS_RNN, 6, 2, 5, &RandomPartitioningTest::makeRnnSpecialInput }, 441 { ANEURALNETWORKS_TANH, 1, 1, -1, nullptr }, 442 }; 443 444 Signature RandomPartitioningTest::getSignature(const HidlModel& model, const Operation& operation) { 445 static const std::map<ANeuralNetworksOperationType, int> kOperationToActivation = []() { 446 std::map<ANeuralNetworksOperationType, int> result; 447 for (const auto& pattern : kOperationPatterns) { 448 result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex; 449 } 450 return result; 451 }(); 452 453 const ANeuralNetworksOperationType operationType = 454 static_cast<ANeuralNetworksOperationType>(operation.type); 455 const int activationFunctionInputIndex = kOperationToActivation.at(operationType); 456 if (activationFunctionInputIndex < 0) { 457 return Signature(operationType, -1); 458 } 459 460 const Operand& operand = model.operands[operation.inputs[activationFunctionInputIndex]]; 461 assert(operand.lifetime == OperandLifeTime::CONSTANT_COPY); 462 assert(operand.type == OperandType::INT32); 463 int32_t value; 464 memcpy(&value, 465 &model.operandValues[operand.location.offset], 466 operand.location.length); 467 return Signature(operationType, value); 468 } 469 470 void RandomPartitioningTest::graphDump([[maybe_unused]] const WrapperModel& model) { 471 #ifdef GRAPH 472 const std::string name = "Test-" + std::to_string(GetParam()); 473 nn::bridge_tests::graphDump(name.c_str(), 474 reinterpret_cast<const ModelBuilder*>(model.getHandle())); 475 #endif 476 } 477 478 class TestDriver : public SampleDriver { 479 public: 480 // Behaves like SampleDriver, except that it only supports 481 // operations with the specified signatures. 482 TestDriver(const char* name, std::set<Signature> signatures) : 483 SampleDriver(name), mSignatures(std::move(signatures)) { } 484 485 Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override { 486 android::nn::initVLogMask(); 487 Capabilities capabilities = 488 {.float32Performance = {.execTime = 0.75f, .powerUsage = 0.75f}, 489 .quantized8Performance = {.execTime = 0.75f, .powerUsage = 0.75f}, 490 .relaxedFloat32toFloat16Performance = {.execTime = 0.75f, .powerUsage = 0.75f}}; 491 _hidl_cb(ErrorStatus::NONE, capabilities); 492 return Void(); 493 } 494 495 Return<void> getSupportedOperations_1_1(const HidlModel& model, 496 getSupportedOperations_cb cb) override { 497 if (nn::validateModel(model)) { 498 const size_t count = model.operations.size(); 499 std::vector<bool> supported(count); 500 for (size_t i = 0; i < count; i++) { 501 supported[i] = 502 (mSignatures.count( 503 RandomPartitioningTest::getSignature( 504 model, 505 model.operations[i])) != 0); 506 } 507 cb(ErrorStatus::NONE, supported); 508 } else { 509 std::vector<bool> supported; 510 cb(ErrorStatus::INVALID_ARGUMENT, supported); 511 } 512 return Void(); 513 } 514 515 Return<ErrorStatus> prepareModel_1_1(const HidlModel& model, ExecutionPreference preference, 516 const sp<IPreparedModelCallback>& callback) override { 517 // NOTE: We verify that all operations in the model are supported. 518 ErrorStatus outStatus = ErrorStatus::INVALID_ARGUMENT; 519 auto ret = getSupportedOperations_1_1( 520 model, 521 [&outStatus](ErrorStatus inStatus, const hidl_vec<bool>& supportedOperations) { 522 if (inStatus == ErrorStatus::NONE) { 523 if (std::all_of(supportedOperations.begin(), supportedOperations.end(), 524 [](bool v){ return v; })) { 525 outStatus = ErrorStatus::NONE; 526 } 527 } 528 }); 529 if (ret.isOk() && (outStatus == ErrorStatus::NONE)) { 530 return SampleDriver::prepareModel_1_1(model, preference, callback); 531 } else { 532 callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr); 533 return ErrorStatus::INVALID_ARGUMENT; 534 } 535 } 536 537 private: 538 const std::set<Signature> mSignatures; 539 }; 540 541 INSTANTIATE_TEST_CASE_P(Seed, RandomPartitioningTest, 542 ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases)); 543 544 TEST_P(RandomPartitioningTest, Test) { 545 LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam(); 546 547 #ifdef VERBOSE 548 std::cout << std::setprecision(2) << std::fixed << std::setw(4); 549 #endif 550 551 const unsigned problemSize = 1+randUInt(kMaxProblemSize); 552 const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize }); 553 const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 }); 554 555 static const WrapperOperandType activationFunctionType(WrapperType::INT32, { }); 556 557 const unsigned numOperations = 2+randUInt(kMaxNumOperations-1); 558 const bool allowDeadOperations = (randFrac() < 0.2); 559 const bool allowUnknownDimensions = (randFrac() < 0.25); 560 561 // TODO: The current algorithm builds the graph in a forward 562 // direction (i.e., later-generated operations consume outputs 563 // from earlier-generated operations). In order to get more 564 // variation in graph topology, perhaps we should also create an 565 // algorithm to build the graph in a backward direction (i.e., 566 // later-generated operations produce outputs to be consumed by 567 // earlier-generated operations). 568 [[maybe_unused]] const bool buildForward = randBool(); 569 570 // TODO: Add a form of forced connectivity that operates by 571 // joining disjoint subgraphs rather than by forcing a root. 572 const bool forceCommonRoot = (randFrac() < 0.75); 573 574 TestModel model; 575 std::vector<uint32_t> modelInputs; 576 std::vector<uint32_t> modelOutputs; 577 578 // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32. 579 TestMemories weights; 580 581 // Keep track of all normal (i.e., not activation function and not 582 // "special") operands that are values (from setOperandValue*()). 583 // .first: operand index 584 // .second: if the operand is already defined (via setOperandValue*()) then ~0U; 585 // otherwise, the operand has yet to be defined, and this is the corresponding 586 // region index in "weights" 587 std::vector<std::pair<uint32_t, unsigned>> valueOperands; 588 589 // An operand is "dead" if it is not consumed by another operation 590 // and is not a model output. Key is operand index; value is 591 // operation index. 592 std::map<uint32_t, uint32_t> deadOperands; 593 594 // An operation is "dead" if all of its outputs are dead. 595 std::set<uint32_t> deadOperations; 596 597 // Collect the signatures of operations in this model. 598 std::set<Signature> signatures; 599 600 // For reporting purposes, keep track of the number of root 601 // operations (those that do not consume results produced by other 602 // operations). 603 unsigned rootOperationCount = 0; 604 605 // Track if we added operands with unknown dimensions. In this case, 606 // partitioned compilation will fail if such an operand is read in a 607 // different partition than it is written. 608 bool hasUnknownDimensions = false; 609 610 // Generate operations. 611 for (unsigned i = 0; i < numOperations; i++) { 612 const unsigned operationPatternIndex = 613 randUInt(sizeof(kOperationPatterns)/sizeof(kOperationPatterns[0])); 614 const auto& operationPattern = kOperationPatterns[operationPatternIndex]; 615 616 // INPUTS ////////////////////////////////////////////////////////////////////////////////// 617 618 std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U); 619 620 // First, process activation function and special inputs, and 621 // keep track of which inputs remain. 622 std::vector<uint32_t> normalOperationInputIndexes; 623 int32_t activationFunction = -1; 624 for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs; 625 operationInputIndex++) { 626 if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) { 627 const uint32_t operandIndex = model.addOperand(&activationFunctionType); 628 activationFunction = randUInt(4); 629 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) { 630 // workaround for http://b/69011131 631 activationFunction = ANEURALNETWORKS_FUSED_NONE; 632 } 633 model.setOperandValue(operandIndex, activationFunction); 634 operationInputs[operationInputIndex] = operandIndex; 635 continue; 636 } 637 if (operationPattern.mMakeSpecialInput != nullptr) { 638 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))( 639 problemSize, &model, operationInputIndex); 640 if (operandIndex >= 0) { 641 operationInputs[operationInputIndex] = operandIndex; 642 continue; 643 } 644 } 645 normalOperationInputIndexes.push_back(operationInputIndex); 646 } 647 assert(!normalOperationInputIndexes.empty()); 648 signatures.insert(Signature(operationPattern.mOperationType, activationFunction)); 649 650 // A (normal) operation input can be one of: 651 // - a new or existing model input 652 // - an output of an existing operation 653 // - an OperandValue 654 // - an OperandValueFromMemory 655 // Some guidelines: 656 // - We generally don't want all of an operation's inputs to be values (constants) 657 const unsigned normalOperationInputCount = normalOperationInputIndexes.size(); 658 // How many of this operation's inputs are constants? 659 unsigned normalOperationInputConstantCount = 0; 660 // How many of this operation's inputs are model inputs? 661 unsigned normalOperationInputModelInputCount = 0; 662 // We begin by deciding what kind of input each (normal) operation will be; we don't 663 // actually pick input operand indexes at this time, because we might override this 664 // decision later. 665 enum InputKind { IK_MODEL_INPUT, IK_OPERATION_OUTPUT, IK_VALUE }; 666 std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount); 667 std::generate(normalOperationInputKinds.begin(), normalOperationInputKinds.end(), 668 [this, &model, 669 numOperations, 670 normalOperationInputCount, 671 &normalOperationInputConstantCount, 672 &normalOperationInputModelInputCount]() -> InputKind { 673 // Constant? Becomes less likely the more 674 // constants we already have as inputs to 675 // this operation. 676 if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) / 677 normalOperationInputCount)) { 678 normalOperationInputConstantCount++; 679 return IK_VALUE; 680 } 681 682 // Model input? Becomes less likely the 683 // more model inputs we already have as 684 // inputs to this operation, and the further 685 // along we are in generating this model 686 // (i.e., the more operations we have 687 // generated). 688 if ((model.operationCount() == 0) || 689 (randFrac() < 0.5 * 690 (1 - double(normalOperationInputModelInputCount) / 691 normalOperationInputCount) * 692 std::min(0.3, (1 - double(model.operationCount()) / 693 numOperations)))) { 694 normalOperationInputModelInputCount++; 695 return IK_MODEL_INPUT; 696 } 697 698 // Else output of an existing operation. 699 return IK_OPERATION_OUTPUT; 700 }); 701 702 // Now force common root or model input, if necessary. (A 703 // model must have at least one input.) 704 auto force = 705 [this, &normalOperationInputKinds, normalOperationInputCount](InputKind forceKind){ 706 if (std::none_of(normalOperationInputKinds.begin(), 707 normalOperationInputKinds.end(), 708 [forceKind](InputKind kind){ return kind == forceKind; })) { 709 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind; 710 } 711 }; 712 if (forceCommonRoot && (model.operationCount() != 0)) { 713 force(IK_OPERATION_OUTPUT); 714 } 715 if (modelInputs.empty()) { 716 assert(model.operationCount() == 0); 717 force(IK_MODEL_INPUT); 718 } 719 720 // Finally create the normal inputs. 721 bool isRootOperation = true; 722 for (unsigned i = 0; i < normalOperationInputCount; i++) { 723 uint32_t operandIndex = ~0U; 724 switch (normalOperationInputKinds[i]) { 725 case IK_MODEL_INPUT: { 726 if (!modelInputs.empty() && (randFrac() < 0.5)) { 727 operandIndex = modelInputs[randUInt(modelInputs.size())]; 728 } else { 729 operandIndex = model.addOperand(&problemType); 730 modelInputs.push_back(operandIndex); 731 } 732 break; 733 } 734 case IK_OPERATION_OUTPUT: { 735 decltype(deadOperands.begin()) deadOperandI; 736 if (!deadOperands.empty() && (randFrac() < 0.5)) { 737 deadOperandI = deadOperands.begin(); 738 std::advance(deadOperandI, randUInt(deadOperands.size())); 739 operandIndex = deadOperandI->first; 740 } else { 741 const uint32_t existingOperationIndex = randUInt(model.operationCount()); 742 const auto& existingOperationOutputs = 743 model.getOperationOutputs(existingOperationIndex); 744 operandIndex = 745 existingOperationOutputs[randUInt(existingOperationOutputs.size())]; 746 deadOperandI = deadOperands.find(operandIndex); 747 assert(deadOperandI == deadOperands.end() || 748 deadOperandI->second == existingOperationIndex); 749 } 750 if (deadOperandI != deadOperands.end()) { 751 const uint32_t correspondingOperation = deadOperandI->second; 752 deadOperands.erase(deadOperandI); 753 754 auto deadOperationI = deadOperations.find(correspondingOperation); 755 if (deadOperationI != deadOperations.end()) { 756 deadOperations.erase(deadOperationI); 757 } 758 } 759 isRootOperation = false; 760 break; 761 } 762 case IK_VALUE: { 763 if (!valueOperands.empty() && (randFrac() < 0.25)) { 764 operandIndex = valueOperands[randUInt(valueOperands.size())].first; 765 } else { 766 operandIndex = model.addOperand(&problemType); 767 if (randFrac() < 0.5) { 768 std::vector<float> value(problemSize * problemSize); 769 std::generate(value.begin(), value.end(), [this]{ return randFrac(); }); 770 model.setOperandValue(operandIndex, value); 771 valueOperands.push_back(std::make_pair(operandIndex, ~0U)); 772 } else { 773 unsigned memoryIndex = ~0U; 774 if ((weights.memoryCount() != 0) && 775 (kAllWeightsInOnePool || (randFrac() < 0.5))) { 776 memoryIndex = randUInt(weights.memoryCount()); 777 } else { 778 memoryIndex = weights.addMemory(); 779 } 780 const size_t length = problemSize * problemSize * sizeof(float); 781 const unsigned regionIndex = weights.addRegion(memoryIndex, length); 782 valueOperands.push_back(std::make_pair(operandIndex, regionIndex)); 783 } 784 } 785 break; 786 } 787 default: 788 FAIL(); 789 } 790 operationInputs[normalOperationInputIndexes[i]] = operandIndex; 791 } 792 if (isRootOperation) { 793 rootOperationCount++; 794 } 795 796 // OUTPUTS ///////////////////////////////////////////////////////////////////////////////// 797 798 std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs); 799 std::generate(operationOutputs.begin(), operationOutputs.end(), 800 [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions, 801 allowUnknownDimensions, this]{ 802 // 3% unknowns causes ~35% of partitionings to fail 803 // (determined by commenting out the fallback code, 804 // running tests and noting number of failures). 805 if (allowUnknownDimensions && randFrac() < 0.03) { 806 hasUnknownDimensions = true; 807 return model.addOperand(&unknownDimensionsType); 808 } else { 809 return model.addOperand(&problemType); 810 } 811 }); 812 813 // OPERATION /////////////////////////////////////////////////////////////////////////////// 814 815 const uint32_t operationIndex = 816 model.addOperation(operationPattern.mOperationType, 817 operationInputs, operationOutputs); 818 deadOperations.insert(operationIndex); 819 std::for_each(operationOutputs.begin(), operationOutputs.end(), 820 [&deadOperands, operationIndex](uint32_t operandIndex) { 821 deadOperands.insert(std::make_pair(operandIndex, operationIndex)); 822 }); 823 } 824 825 // Now finalize the weights. 826 weights.layout(); 827 for (const auto& valueOperand : valueOperands) { 828 const uint32_t operandIndex = valueOperand.first; 829 const unsigned regionIndex = valueOperand.second; 830 831 if (regionIndex == ~0U) { 832 continue; 833 } 834 835 const WrapperMemory* memory; 836 uint32_t offset, length; 837 float* region = 838 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length)); 839 assert(length == problemSize * problemSize * sizeof(float)); 840 std::generate(region, region + problemSize * problemSize, [this]{ return randFrac(); }); 841 model.setOperandValueFromMemory(operandIndex, memory, offset, length); 842 } 843 844 // Now select model outputs. 845 for (uint32_t operationIdx = 0, operationCount = model.operationCount(); 846 operationIdx < operationCount; operationIdx++) { 847 const auto& outputs = model.getOperationOutputs(operationIdx); 848 for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount; 849 outputIdx++) { 850 bool modelOutput = false; 851 const uint32_t operandIndex = outputs[outputIdx]; 852 const auto deadOperandI = deadOperands.find(operandIndex); 853 if (deadOperandI != deadOperands.end()) { 854 // This is not consumed within the model, so unless we 855 // make it an output of the model, it's dead. The 856 // further along we are in generating this model 857 // (i.e., the more operations we have generated), the 858 // more likely we are to classify this operation 859 // output as a model output. 860 const double probabilityOfModelOutput = 861 0.50 * [](double x){ return x*x; }((operationIdx + 1) / operationCount); 862 modelOutput = (randFrac() < probabilityOfModelOutput); 863 } else { 864 // This is consumed within the model, so we'll rarely 865 // make it an output of the model. 866 modelOutput = (randFrac() < 0.05); 867 } 868 if (!modelOutput) { 869 continue; 870 } 871 modelOutputs.push_back(operandIndex); 872 if (deadOperandI != deadOperands.end()) { 873 deadOperands.erase(deadOperandI); 874 const auto deadOperationI = deadOperations.find(operationIdx); 875 if (deadOperationI != deadOperations.end()) { 876 deadOperations.erase(deadOperationI); 877 } 878 } 879 } 880 } 881 if (!allowDeadOperations) { 882 // For each dead operation, pick a random output to become a model output. 883 for (uint32_t deadOperationIndex : deadOperations) { 884 const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex); 885 const uint32_t deadOperandIndex = 886 deadOperationOutputs[randUInt(deadOperationOutputs.size())]; 887 modelOutputs.push_back(deadOperandIndex); 888 } 889 } 890 // A model must have at least one output. 891 if (modelOutputs.empty()) { 892 const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount())); 893 modelOutputs.push_back(outputs[randUInt(outputs.size())]); 894 } 895 896 model.identifyInputsAndOutputs(modelInputs, modelOutputs); 897 #ifdef VERBOSE 898 { 899 std::cout << "Original model: " << ModelStats(&model) << std::endl; 900 std::cout << "rootOperationCount = " << rootOperationCount 901 << ", deadOperations = "; 902 if (allowDeadOperations) { 903 std::cout << deadOperations.size(); 904 } else { 905 std::cout << "forbidden (converted " << deadOperations.size() << ")"; 906 } 907 std::cout << std::endl; 908 } 909 #endif 910 ASSERT_EQ(model.finish(), Result::NO_ERROR); 911 graphDump(model); 912 913 // Non-partitioned compilation. 914 TestCompilation c(&model); 915 ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR); 916 ASSERT_EQ(c.finish(), Result::NO_ERROR); 917 918 // Create some drivers for partitioned compilation. 919 assert(!signatures.empty()); 920 std::vector<std::set<Signature>> signaturesForDriver(signatures.size()); 921 // First assign each signature to a random driver (a driver is 922 // just represented as an entry in the signaturesForDriver 923 // vector). 924 for (Signature signature : signatures) { 925 signaturesForDriver[randUInt(signatures.size())].insert(signature); 926 } 927 // Now remove each entry that has no signatures. 928 auto firstExtra = 929 std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(), 930 [](const std::set<Signature>& sigSet) { return sigSet.empty(); }); 931 if (firstExtra != signaturesForDriver.end()) { 932 signaturesForDriver.erase(firstExtra, signaturesForDriver.end()); 933 } 934 // Now actually create the drivers. 935 std::vector<std::shared_ptr<Device>> devices; 936 for (unsigned i = 0; i < signaturesForDriver.size(); i++) { 937 const std::string name = "TestDriver(" + std::to_string(i) + ")"; 938 devices.push_back(std::make_shared<Device>( 939 name, new TestDriver(name.c_str(), signaturesForDriver[i]))); 940 ASSERT_TRUE(devices.back()->initialize()); 941 } 942 943 // Partitioned compilation. 944 // For test cases without unknown intermediate operand sizes we require the 945 // partitioning to succeed without CPU fallback. With unknown sizes we 946 // retry with a fallback if the non-fallback partitioning fails and require 947 // the fallback to succeed. 948 TestCompilation cNoFallback(&model); 949 TestCompilation cWithFallback(&model); 950 TestCompilation *c2 = nullptr; 951 ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback), 952 Result::NO_ERROR); 953 auto compilationResult = cNoFallback.finish(devices); 954 if (hasUnknownDimensions && compilationResult == Result::OP_FAILED && 955 cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) { 956 ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback), 957 Result::NO_ERROR); 958 ASSERT_EQ(cWithFallback.finish(devices), Result::NO_ERROR); 959 c2 = &cWithFallback; 960 } else { 961 ASSERT_EQ(compilationResult, Result::NO_ERROR); 962 c2 = &cNoFallback; 963 } 964 965 #ifdef VERBOSE 966 { 967 std::cout << "signatures = " << signatures.size() 968 << ", devices = " << devices.size() << std::endl; 969 const ExecutionPlan& plan = c2->getExecutionPlan(); 970 switch (plan.forTest_getKind()) { 971 case ExecutionPlan::Kind::SIMPLE: 972 std::cout << "plan: simple" << std::endl; 973 break; 974 case ExecutionPlan::Kind::COMPOUND: { 975 const auto& steps = plan.forTest_compoundGetSteps(); 976 std::set<const Device*> devicesInPlan; 977 for (const auto& step : steps) { 978 devicesInPlan.insert(step->getDevice().get()); 979 } 980 std::cout << "plan: compound, " << steps.size() << " steps over " 981 << devicesInPlan.size() << " devices" << std::endl; 982 for (unsigned i = 0; i < steps.size(); i++) { 983 std::cout << "Step " << i << ": " 984 << ModelStats(steps[i]->getSubModel()) << std::endl; 985 } 986 break; 987 } 988 default: 989 std::cout << "Unexpected plan kind: " 990 << static_cast<unsigned>(plan.forTest_getKind()); 991 break; 992 } 993 } 994 #endif 995 996 // For execution: 997 // - create master inputs (one long vector) and master output value 998 // - master inputs will be copied to actual inputs before each 999 // of the two executions 1000 // - master output will be used to fill actual outputs before each 1001 // of the two executions 1002 // - create actual inputs and outputs 1003 // - first execution (non-partitioned) 1004 // - initialize inputs and (to avoid unrelated oddities) outputs 1005 // - execute 1006 // - copy outputs to a save area (one long vector) 1007 // - second execution (partitioned) 1008 // - (to avoid unrelated oddities) initialize inputs and outputs 1009 // - execute 1010 // - compare outputs to save area 1011 1012 // If the runtime and drivers are working properly, execution 1013 // should not change the inputs. Nonetheless, we reinitialize the 1014 // inputs for each execution, so as to avoid unrelated problems 1015 // appearing to be problems related to unpartitioned execution 1016 // versus partitioned execution. Similarly, execution behavior 1017 // should not be dependent on the outputs; but we'll initialize the 1018 // outputs anyway. 1019 std::vector<float> masterInputs(problemSize * problemSize * model.inputCount()); 1020 std::generate(masterInputs.begin(), masterInputs.end(), [this]{ return randFrac(); }); 1021 const float masterOutput = randFrac(); 1022 1023 // Create the memory for the actual inputs and outputs. 1024 struct InputOutputDescriptor { 1025 enum Kind { INPUT, OUTPUT }; 1026 Kind mKind; 1027 1028 // The input or output either resides in a local buffer 1029 // (mVector, in which case mMemoryRegion is ignored); or in a 1030 // shared memory region within a TestMemories instance 1031 // (mMemoryRegion, in which case mVector is ignored). 1032 enum Location { VECTOR, REGION }; 1033 Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; } 1034 1035 std::vector<float> mVector; 1036 unsigned mMemoryRegion; 1037 }; 1038 std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount()); 1039 for (unsigned i = 0; i < ioDescriptors.size(); i++) { 1040 ioDescriptors[i].mKind = (i < model.inputCount() 1041 ? InputOutputDescriptor::INPUT 1042 : InputOutputDescriptor::OUTPUT); 1043 } 1044 // We randomly interleave inputs and outputs in creation 1045 // order, because when we we create memory regions in a 1046 // TestMemories instance, the order in which regions are 1047 // created within a single Memory is the order they'll be laid 1048 // out in that memory; and when we have inputs and outputs 1049 // within the same Memory, we want the possibility that 1050 // they'll be interleaved. 1051 std::random_shuffle(ioDescriptors.begin(), ioDescriptors.end(), 1052 [this](unsigned n) { return randUInt(n); }); 1053 TestMemories ioMemories; 1054 for (auto &desc : ioDescriptors) { 1055 if (randFrac() < 0.5) { 1056 desc.mVector.resize(problemSize * problemSize); 1057 } else { 1058 // TODO: common this with the way we create IK_VALUE inputs? 1059 unsigned memoryIndex = ~0U; 1060 if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) { 1061 memoryIndex = randUInt(ioMemories.memoryCount()); 1062 } else { 1063 memoryIndex = ioMemories.addMemory(); 1064 } 1065 const size_t length = problemSize * problemSize * sizeof(float); 1066 desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length); 1067 } 1068 } 1069 ioMemories.layout(); 1070 1071 // Function to set up actual inputs and outputs (initializing them 1072 // and telling the WrapperExecution about them). 1073 auto prepareForExecution = 1074 [&model, &ioDescriptors, &ioMemories, 1075 &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) { 1076 uint32_t inputIndex = 0, outputIndex = 0; 1077 for (auto &desc : ioDescriptors) { 1078 if (desc.getLocation() == InputOutputDescriptor::VECTOR) { 1079 if (desc.mKind == InputOutputDescriptor::INPUT) { 1080 const size_t inputOffset = inputIndex * problemSize * problemSize; 1081 std::copy(masterInputs.begin() + inputOffset, 1082 masterInputs.begin() + inputOffset + problemSize * problemSize, 1083 desc.mVector.begin()); 1084 e->setInput(inputIndex++, desc.mVector.data(), 1085 desc.mVector.size() * sizeof(float)); 1086 } else { 1087 std::fill(desc.mVector.begin(), 1088 desc.mVector.begin() + problemSize * problemSize, 1089 masterOutput); 1090 e->setOutput(outputIndex++, desc.mVector.data(), 1091 desc.mVector.size() * sizeof(float), 1092 &problemType.operandType); 1093 } 1094 } else { 1095 const WrapperMemory* memory; 1096 uint32_t offset, length; 1097 float* region = 1098 static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion, 1099 &memory, &offset, &length)); 1100 assert(length == problemSize * problemSize * sizeof(float)); 1101 if (desc.mKind == InputOutputDescriptor::INPUT) { 1102 const size_t inputOffset = inputIndex * problemSize * problemSize; 1103 std::copy(masterInputs.begin() + inputOffset, 1104 masterInputs.begin() + inputOffset + problemSize * problemSize, 1105 region); 1106 e->setInputFromMemory(inputIndex++, memory, offset, length); 1107 } else { 1108 std::fill(region, 1109 region + problemSize * problemSize, 1110 masterOutput); 1111 e->setOutputFromMemory(outputIndex++, memory, offset, length, 1112 &problemType.operandType); 1113 } 1114 } 1115 }; 1116 assert(inputIndex == model.inputCount()); 1117 assert(outputIndex == model.outputCount()); 1118 }; 1119 1120 // Non-partitioned execution. 1121 WrapperExecution e(&c); 1122 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e)); 1123 ASSERT_EQ(e.compute(), Result::NO_ERROR); 1124 1125 // Copy the outputs of the non-partitioned execution to a save area. 1126 std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount()); 1127 { 1128 uint32_t outputIndex = 0; 1129 for (const auto& desc : ioDescriptors) { 1130 if (desc.mKind != InputOutputDescriptor::OUTPUT) { 1131 continue; 1132 } 1133 const size_t outputOffset = outputIndex * problemSize * problemSize; 1134 if (desc.getLocation() == InputOutputDescriptor::VECTOR) { 1135 std::copy(desc.mVector.begin(), 1136 desc.mVector.end(), 1137 nonPartitionedOutputs.begin() + outputOffset); 1138 } else { 1139 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion)); 1140 std::copy(region, 1141 region + problemSize * problemSize, 1142 nonPartitionedOutputs.begin() + outputOffset); 1143 } 1144 #ifdef VERBOSE 1145 { 1146 std::cout << "output[" << outputIndex << "] = {"; 1147 for (auto I = nonPartitionedOutputs.begin() + outputOffset, 1148 E = nonPartitionedOutputs.begin() + 1149 outputOffset + problemSize * problemSize; 1150 I != E; I++) { 1151 std::cout << " " << *I; 1152 } 1153 std::cout << " }" << std::endl; 1154 } 1155 #endif 1156 outputIndex++; 1157 } 1158 } 1159 1160 // Partitioned execution. 1161 WrapperExecution e2(c2); 1162 ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2)); 1163 ASSERT_EQ(e2.compute(), Result::NO_ERROR); 1164 1165 // Compare the outputs of the partitioned execution to the save 1166 // area containing the outpus of the non-partitioned execution. 1167 { 1168 uint32_t outputIndex = 0; 1169 for (const auto& desc : ioDescriptors) { 1170 if (desc.mKind != InputOutputDescriptor::OUTPUT) { 1171 continue; 1172 } 1173 SCOPED_TRACE(outputIndex); 1174 const size_t outputOffset = outputIndex * problemSize * problemSize; 1175 if (desc.getLocation() == InputOutputDescriptor::VECTOR) { 1176 ASSERT_TRUE(std::equal(desc.mVector.begin(), 1177 desc.mVector.end(), 1178 nonPartitionedOutputs.begin() + outputOffset)); 1179 } else { 1180 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion)); 1181 ASSERT_TRUE(std::equal(region, 1182 region + problemSize * problemSize, 1183 nonPartitionedOutputs.begin() + outputOffset)); 1184 } 1185 outputIndex++; 1186 } 1187 } 1188 } 1189 1190 } // namespace 1191 } // namespace android 1192