Home | History | Annotate | Download | only in test
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #undef NDEBUG
     18 
     19 #include "Bridge.h"
     20 #include "CompilationBuilder.h"
     21 #include "Manager.h"
     22 #include "ModelBuilder.h"
     23 #include "NeuralNetworks.h"
     24 #include "NeuralNetworksWrapper.h"
     25 #include "SampleDriver.h"
     26 #include "Utils.h"
     27 #include "ValidateHal.h"
     28 
     29 #include <algorithm>
     30 #include <cassert>
     31 #include <cstdio>
     32 #include <random>
     33 #include <set>
     34 #include <tuple>
     35 #include <utility>
     36 #include <vector>
     37 
     38 #include <unistd.h>
     39 
     40 #include <android-base/logging.h>
     41 #include <android/sharedmem.h>
     42 #include <gtest/gtest.h>
     43 
     44 // Uncomment the following line to generate some debugging output that
     45 // may be useful when analyzing failures:
     46 //
     47 // #define VERBOSE VERBOSE
     48 
     49 // Uncomment the following line to generate graphs from models:
     50 //
     51 // #define GRAPH GRAPH
     52 
     53 // We randomly generate tests (model + input data) at runtime, and verify
     54 // that we get the same results whether we do partitioned compilation/execution
     55 // or non partitioned compilation/execution.  We perform a test as follows:
     56 //
     57 // (1) Randomly generate a model (graph and weights), randomly generate input
     58 //     data, randomly assign inputs and outputs to CPU memory or to shared
     59 //     memory.
     60 //
     61 //     Randomly leaves dimensions unset for intermediate operands.
     62 //
     63 // (2) Randomly generate drivers based on the sample driver, each of which
     64 //     executes models on the CPU.  They differ according to which operations
     65 //     they support.
     66 //
     67 // (3) Compile and execute without partitioning, saving off the results.
     68 //
     69 // (4) Compile and execute with partitioning.
     70 //
     71 // (5) Verify that the saved results from (3) match the results from (4).
     72 //
     73 // For simplicity, all data (model inputs, model outputs, weights,
     74 // temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
     75 // dimensions are fixed throughout a particular test case (and
     76 // randomly determined).  This prevents us from having to find a
     77 // mechanism to "resize" data (e.g., if ADD#a operates on data of size
     78 // 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
     79 // and ADD#b become inputs of ADD#c, do we need to insert one or more
     80 // operations between (say) ADD#a and ADD#c to convert ADD#2's data
     81 // from size 2x2 to size 3x3 in order to match ADD#b).  In the few
     82 // cases where an operand cannot be of this type, it is a constant
     83 // (e.g., activation functions and RNN bias).
     84 //
     85 // Each operation we generate has a signature (described in more
     86 // detail later).  The randomly generated drivers decide which
     87 // operations they can execute by checking operation signatures.  Once
     88 // we have built the model and know the set of signatures, we randomly
     89 // assign each signature to a driver.  No signature is supported by
     90 // multiple drivers -- we're not testing the logic that the
     91 // partitioning algorithm uses to select the best driver for an
     92 // operation.
     93 
     94 namespace android {
     95 
     96 using CompilationBuilder = nn::CompilationBuilder;
     97 using Device = nn::Device;
     98 using DeviceManager = nn::DeviceManager;
     99 using ExecutionPlan = nn::ExecutionPlan;
    100 using HidlModel = hardware::neuralnetworks::V1_1::Model;
    101 using MemoryBuilder = nn::Memory;
    102 using ModelBuilder = nn::ModelBuilder;
    103 using Result = nn::wrapper::Result;
    104 using SampleDriver = nn::sample_driver::SampleDriver;
    105 using WrapperCompilation = nn::wrapper::Compilation;
    106 using WrapperExecution = nn::wrapper::Execution;
    107 using WrapperMemory = nn::wrapper::Memory;
    108 using WrapperModel = nn::wrapper::Model;
    109 using WrapperOperandType = nn::wrapper::OperandType;
    110 using WrapperType = nn::wrapper::Type;
    111 
    112 namespace {
    113 
    114 /// Configure test size //////////////////////////////////////////////////////////
    115 
    116 // We may exceed this in order to connect otherwise disjoint subgraphs.
    117 static const unsigned kMaxNumOperations = 100;
    118 
    119 // We build models to process 2-D square tensors up to this size in each dimension;
    120 // note that the API promotes by-value weights larger than 128 to by-reference,
    121 // so we want to ensure that we can pick both types that exceed and types that do
    122 // not exceed this size.
    123 static const unsigned kMaxProblemSize = 8;
    124 
    125 // First seed for pseudorandom test generation.
    126 static const unsigned kFirstSeed = 0;
    127 
    128 // Number of test cases.
    129 static const unsigned kNumTestCases = 225;
    130 
    131 // Force all graph weights into a single pool (as we recommend to users)
    132 // or allow them to be distributed across multiple pools (more stress
    133 // on the partitioning algorithm and the rest of the runtime)?
    134 // Forcing all graph weights into a single pool may be necessary to
    135 // prevent large graphs from running up against http://b/70302693
    136 // "NNAPI overuses (?) fds".
    137 static const bool kAllWeightsInOnePool = false;
    138 
    139 //////////////////////////////////////////////////////////////////////////////////
    140 
    141 // The signature of an operation consists of the operation type (e.g.,
    142 // ADD) and the activation function (use -1 in the case of an
    143 // operation type for which the activation function is inapplicable).
    144 typedef std::pair<ANeuralNetworksOperationType, int> Signature;
    145 
    146 // This class adds some simple utilities on top of
    147 // ::android::nn::wrapper::Model.  For example, it provides access to
    148 // certain features from ModelBuilder that are not exposed by the base
    149 // class (such as inputCount() and operation index).
    150 class TestModel : public WrapperModel {
    151 public:
    152 
    153     uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
    154                           const std::vector<uint32_t>& outputs) {
    155         const uint32_t operationIndex = operationCount();
    156         mOperations.push_back(outputs);
    157         WrapperModel::addOperation(type, inputs, outputs);
    158         return operationIndex;
    159     }
    160 
    161     uint32_t operationCount() const {
    162         return mOperations.size();
    163     }
    164 
    165     uint32_t inputCount() const {
    166         return builder()->inputCount();
    167     }
    168     uint32_t outputCount() const {
    169         return builder()->outputCount();
    170     }
    171 
    172     const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
    173         assert(index < mOperations.size());
    174         return mOperations[index];
    175     }
    176 
    177     // All values are immediately copied into the model (we need to do
    178     // this ourselves in cases where the underlying NNAPI does not).
    179     void setOperandValue(uint32_t index, const std::vector<float>& value) {
    180         const size_t length = value.size() * sizeof(float);
    181 
    182         if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
    183             WrapperModel::setOperandValue(index, value.data(), length);
    184         } else {
    185             mOperandValues.push_back(value);
    186             WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
    187         }
    188     }
    189 
    190     void setOperandValue(uint32_t index, int32_t value) {
    191         assert(sizeof(value) <=  ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
    192         WrapperModel::setOperandValue(index, &value, sizeof(value));
    193     }
    194 
    195 private:
    196 
    197     const ModelBuilder* builder() const {
    198         return reinterpret_cast<const ModelBuilder*>(getHandle());
    199     }
    200 
    201     // Representation of operations: vector index is operation number,
    202     // vector value is operation's output operands.
    203     std::vector<std::vector<uint32_t>> mOperations;
    204 
    205     // Large operand values -- not immediately copied into the
    206     // WrapperModel, so remembered here instead.
    207     std::vector<std::vector<float>> mOperandValues;
    208 };
    209 
    210 // This class adds some simple utilities on top of
    211 // ::android::nn::wrapper::Compilation in order to provide access to
    212 // certain features from CompilationBuilder that are not exposed by
    213 // the base class.
    214 class TestCompilation : public WrapperCompilation {
    215 public:
    216     TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
    217 
    218     Result setPartitioning(uint32_t partitioning) {
    219         return static_cast<Result>(builder()->setPartitioning(partitioning));
    220     }
    221 
    222     using WrapperCompilation::finish;
    223     Result finish(const std::vector<std::shared_ptr<Device>>& devices) {
    224         return static_cast<Result>(builder()->finish(devices));
    225     }
    226 
    227     const ExecutionPlan& getExecutionPlan() const {
    228         return builder()->forTest_getExecutionPlan();
    229     }
    230 
    231 private:
    232     const CompilationBuilder* builder() const {
    233         return reinterpret_cast<const CompilationBuilder*>(getHandle());
    234     }
    235     CompilationBuilder* builder() {
    236         return reinterpret_cast<CompilationBuilder*>(getHandle());
    237     }
    238 };
    239 
    240 // This class is used to manage a collection of memory regions,
    241 // disjoint windows onto a set of Memory instances, each of which is
    242 // associated with a single shared memory region.  Each region and
    243 // Memory instance is assigned a number.  The usage pattern is as
    244 // follows:
    245 // - Call addMemory() and addRegion() as many times as needed to
    246 //   declare (but not define) Memory instances and declare region
    247 //   instances.
    248 // - Call layout() to define the Memory instances.
    249 // - Call getRegion() as many times as needed to get the details
    250 //   of memory regions (such as address, or Memory/offset/length).
    251 // The Memory instances created by layout() are owned by the
    252 // TestMemories instance, and are destroyed when the TestMemories
    253 // instance is destroyed.
    254 class TestMemories {
    255 public:
    256     TestMemories() = default;
    257     ~TestMemories();
    258 
    259     TestMemories(const TestMemories&) = delete;
    260     TestMemories& operator=(const TestMemories&) = delete;
    261 
    262     unsigned addMemory() {
    263         assert(!mLayoutDone);
    264         mMemorySizes.push_back(0);
    265         return memoryCount() - 1;
    266     }
    267     unsigned memoryCount() const {
    268         return mMemorySizes.size();
    269     }
    270 
    271     unsigned addRegion(unsigned memoryIndex, uint32_t length) {
    272         assert(!mLayoutDone);
    273         assert(memoryIndex < memoryCount());
    274         uint32_t& memorySize = mMemorySizes[memoryIndex];
    275         auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
    276         mRegions.push_back(desc);
    277         memorySize += length;
    278         return regionCount() - 1;
    279     }
    280     unsigned regionCount() const {
    281         return mRegions.size();
    282     }
    283 
    284     void layout();
    285 
    286     void* getRegion(unsigned regionIndex,
    287                     const WrapperMemory** pMemory, uint32_t* pOffset, uint32_t* pLength) {
    288         assert(mLayoutDone);
    289         assert(regionIndex < regionCount());
    290         const auto& regionDescriptor = mRegions[regionIndex];
    291         const WrapperMemory* memory = &mMemorys[std::get<0>(regionDescriptor)];
    292         uint32_t offset = std::get<1>(regionDescriptor);
    293         uint32_t length = std::get<2>(regionDescriptor);
    294 
    295         uint8_t* buffer;
    296         if (reinterpret_cast<MemoryBuilder*>(memory->get())->getPointer(&buffer) !=
    297             ANEURALNETWORKS_NO_ERROR) {
    298             assert(0);
    299         }
    300 
    301         if (pMemory) *pMemory = memory;
    302         if (pOffset) *pOffset = offset;
    303         if (pLength) *pLength = length;
    304 
    305         return buffer + offset;
    306     }
    307 
    308     void* getRegion(unsigned regionIndex) {
    309         return getRegion(regionIndex, nullptr, nullptr, nullptr);
    310     }
    311 
    312 private:
    313     // Index is the memory index; value is the size of the memory
    314     // (aggregate size of all regions in the memory).
    315     std::vector<uint32_t> mMemorySizes;
    316 
    317     // Index is the memory index.
    318     std::vector<WrapperMemory> mMemorys;
    319     std::vector<int> mFDs;
    320 
    321     // Index is the region index; tuple represents memory index,
    322     // region offset within memory, region length.
    323     std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
    324 
    325     // For sanity checking.
    326     bool mLayoutDone = false;
    327 };
    328 
    329 void TestMemories::layout() {
    330     assert(!mLayoutDone);
    331     for (uint32_t memorySize : mMemorySizes) {
    332         const int fd = ASharedMemory_create(nullptr, memorySize);
    333         assert(fd >= 0);
    334         mMemorys.emplace_back(memorySize, PROT_READ | PROT_WRITE, fd, 0);
    335         mFDs.push_back(fd);
    336     }
    337     mLayoutDone = true;
    338 }
    339 
    340 TestMemories::~TestMemories() {
    341     for (int fd : mFDs) {
    342         close(fd);
    343     }
    344 }
    345 
    346 class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
    347 public:
    348     RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
    349 
    350     static Signature getSignature(const HidlModel& model, const Operation& operation);
    351 
    352 protected:
    353     void graphDump(const WrapperModel& model);
    354 
    355     bool randBool() {
    356         return randUInt(2) == 1;
    357     }
    358 
    359     double randFrac() {  // [0.0, 1.0)
    360         return mRandNumUnitDist(mRandNumEng);
    361     }
    362 
    363     unsigned randUInt(unsigned limit) {  // [0, limit)
    364         return unsigned(randFrac() * limit);
    365     }
    366 
    367     // Represents an operation in which every input and output operand
    368     // is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
    369     // - One input operand may be an activation function.
    370     // - Any number of input operands may be "special" in some other way
    371     //   (and in this implementation, not produced by any other operation).
    372     // We require that:
    373     // - There be at least one input operand that is neither an
    374     //    activation function nor "special".
    375     struct OperationPattern {
    376         int mOperationType;
    377         unsigned mNumInputs;
    378         unsigned mNumOutputs;
    379         int mActivationFunctionInputIndex;  // <0 if none
    380 
    381         // Returns operand index, or <0 if input is normal (must not
    382         // be called for an activation function operand).  Function
    383         // should have the following prototype:
    384         //
    385         //     int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
    386         //
    387         int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
    388     };
    389 
    390     static const OperationPattern kOperationPatterns[];
    391 
    392     int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
    393         if (inputIndex != 3) {
    394             return -1;
    395         }
    396 
    397         // input operand 3 is bias, a 1-D tensor
    398         const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, { problemSize });
    399         const uint32_t operandIndex = model->addOperand(&biasType);
    400         std::vector<float> biasValue(problemSize);
    401         std::generate(biasValue.begin(), biasValue.end(),
    402                       [this]{ return randFrac(); });
    403         model->setOperandValue(operandIndex, biasValue);
    404         return int(operandIndex);
    405     }
    406 
    407 #ifdef VERBOSE
    408     class ModelStats {
    409     public:
    410         ModelStats(const ModelBuilder* model) :
    411                 mBuilder(model) { }
    412         ModelStats(const WrapperModel* model) :
    413                 mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) { }
    414         friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
    415             const uint32_t operandCount = stats.mBuilder->operandCount();
    416             const uint32_t inputCount = stats.mBuilder->inputCount();
    417             const uint32_t outputCount = stats.mBuilder->outputCount();
    418             out << "operationCount = " << stats.mBuilder->operationCount()
    419                 << ", operandCount = " << operandCount
    420                 << ", inputCount = " << inputCount
    421                 << " (" << (double(inputCount) / operandCount) << ")"
    422                 << ", outputCount = " << outputCount
    423                 << " (" << (double(outputCount) / operandCount) << ")";
    424             return out;
    425         }
    426     private:
    427         const ModelBuilder* mBuilder;
    428     };
    429 #endif
    430 
    431 private:
    432     std::mt19937 mRandNumEng;
    433     std::uniform_real_distribution<double> mRandNumUnitDist;
    434 };
    435 
    436 const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
    437     { ANEURALNETWORKS_ADD, 3, 1, 2, nullptr },
    438     { ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr },
    439     { ANEURALNETWORKS_MUL, 3, 1, 2, nullptr },
    440     { ANEURALNETWORKS_RNN, 6, 2, 5, &RandomPartitioningTest::makeRnnSpecialInput },
    441     { ANEURALNETWORKS_TANH, 1, 1, -1, nullptr },
    442 };
    443 
    444 Signature RandomPartitioningTest::getSignature(const HidlModel& model, const Operation& operation) {
    445     static const std::map<ANeuralNetworksOperationType, int> kOperationToActivation = []() {
    446         std::map<ANeuralNetworksOperationType, int> result;
    447         for (const auto& pattern : kOperationPatterns) {
    448             result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
    449         }
    450         return result;
    451     }();
    452 
    453     const ANeuralNetworksOperationType operationType =
    454             static_cast<ANeuralNetworksOperationType>(operation.type);
    455     const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
    456     if (activationFunctionInputIndex < 0) {
    457         return Signature(operationType, -1);
    458     }
    459 
    460     const Operand& operand = model.operands[operation.inputs[activationFunctionInputIndex]];
    461     assert(operand.lifetime == OperandLifeTime::CONSTANT_COPY);
    462     assert(operand.type == OperandType::INT32);
    463     int32_t value;
    464     memcpy(&value,
    465            &model.operandValues[operand.location.offset],
    466            operand.location.length);
    467     return Signature(operationType, value);
    468 }
    469 
    470 void RandomPartitioningTest::graphDump([[maybe_unused]] const WrapperModel& model) {
    471 #ifdef GRAPH
    472     const std::string name = "Test-" + std::to_string(GetParam());
    473     nn::bridge_tests::graphDump(name.c_str(),
    474                                 reinterpret_cast<const ModelBuilder*>(model.getHandle()));
    475 #endif
    476 }
    477 
    478 class TestDriver : public SampleDriver {
    479 public:
    480     // Behaves like SampleDriver, except that it only supports
    481     // operations with the specified signatures.
    482     TestDriver(const char* name, std::set<Signature> signatures) :
    483             SampleDriver(name), mSignatures(std::move(signatures)) { }
    484 
    485     Return<void> getCapabilities_1_1(getCapabilities_1_1_cb _hidl_cb) override {
    486         android::nn::initVLogMask();
    487         Capabilities capabilities =
    488                 {.float32Performance = {.execTime = 0.75f, .powerUsage = 0.75f},
    489                  .quantized8Performance = {.execTime = 0.75f, .powerUsage = 0.75f},
    490                  .relaxedFloat32toFloat16Performance = {.execTime = 0.75f, .powerUsage = 0.75f}};
    491         _hidl_cb(ErrorStatus::NONE, capabilities);
    492         return Void();
    493     }
    494 
    495     Return<void> getSupportedOperations_1_1(const HidlModel& model,
    496                                             getSupportedOperations_cb cb) override {
    497         if (nn::validateModel(model)) {
    498             const size_t count = model.operations.size();
    499             std::vector<bool> supported(count);
    500             for (size_t i = 0; i < count; i++) {
    501                 supported[i] =
    502                     (mSignatures.count(
    503                         RandomPartitioningTest::getSignature(
    504                             model,
    505                             model.operations[i])) != 0);
    506             }
    507             cb(ErrorStatus::NONE, supported);
    508         } else {
    509             std::vector<bool> supported;
    510             cb(ErrorStatus::INVALID_ARGUMENT, supported);
    511         }
    512         return Void();
    513     }
    514 
    515     Return<ErrorStatus> prepareModel_1_1(const HidlModel& model, ExecutionPreference preference,
    516                                          const sp<IPreparedModelCallback>& callback) override {
    517         // NOTE: We verify that all operations in the model are supported.
    518         ErrorStatus outStatus = ErrorStatus::INVALID_ARGUMENT;
    519         auto ret = getSupportedOperations_1_1(
    520             model,
    521             [&outStatus](ErrorStatus inStatus, const hidl_vec<bool>& supportedOperations) {
    522                 if (inStatus == ErrorStatus::NONE) {
    523                     if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
    524                                     [](bool v){ return v; })) {
    525                         outStatus = ErrorStatus::NONE;
    526                     }
    527                 }
    528             });
    529         if (ret.isOk() && (outStatus == ErrorStatus::NONE)) {
    530             return SampleDriver::prepareModel_1_1(model, preference, callback);
    531         } else {
    532             callback->notify(ErrorStatus::INVALID_ARGUMENT, nullptr);
    533             return ErrorStatus::INVALID_ARGUMENT;
    534         }
    535     }
    536 
    537 private:
    538     const std::set<Signature> mSignatures;
    539 };
    540 
    541 INSTANTIATE_TEST_CASE_P(Seed, RandomPartitioningTest,
    542                         ::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
    543 
    544 TEST_P(RandomPartitioningTest, Test) {
    545     LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
    546 
    547 #ifdef VERBOSE
    548     std::cout << std::setprecision(2) << std::fixed << std::setw(4);
    549 #endif
    550 
    551     const unsigned problemSize = 1+randUInt(kMaxProblemSize);
    552     const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, { problemSize, problemSize });
    553     const WrapperOperandType unknownDimensionsType(WrapperType::TENSOR_FLOAT32, { 0, 0 });
    554 
    555     static const WrapperOperandType activationFunctionType(WrapperType::INT32, { });
    556 
    557     const unsigned numOperations = 2+randUInt(kMaxNumOperations-1);
    558     const bool allowDeadOperations = (randFrac() < 0.2);
    559     const bool allowUnknownDimensions = (randFrac() < 0.25);
    560 
    561     // TODO: The current algorithm builds the graph in a forward
    562     // direction (i.e., later-generated operations consume outputs
    563     // from earlier-generated operations).  In order to get more
    564     // variation in graph topology, perhaps we should also create an
    565     // algorithm to build the graph in a backward direction (i.e.,
    566     // later-generated operations produce outputs to be consumed by
    567     // earlier-generated operations).
    568     [[maybe_unused]] const bool buildForward = randBool();
    569 
    570     // TODO: Add a form of forced connectivity that operates by
    571     // joining disjoint subgraphs rather than by forcing a root.
    572     const bool forceCommonRoot = (randFrac() < 0.75);
    573 
    574     TestModel model;
    575     std::vector<uint32_t> modelInputs;
    576     std::vector<uint32_t> modelOutputs;
    577 
    578     // Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
    579     TestMemories weights;
    580 
    581     // Keep track of all normal (i.e., not activation function and not
    582     // "special") operands that are values (from setOperandValue*()).
    583     // .first: operand index
    584     // .second: if the operand is already defined (via setOperandValue*()) then ~0U;
    585     //          otherwise, the operand has yet to be defined, and this is the corresponding
    586     //          region index in "weights"
    587     std::vector<std::pair<uint32_t, unsigned>> valueOperands;
    588 
    589     // An operand is "dead" if it is not consumed by another operation
    590     // and is not a model output.  Key is operand index; value is
    591     // operation index.
    592     std::map<uint32_t, uint32_t> deadOperands;
    593 
    594     // An operation is "dead" if all of its outputs are dead.
    595     std::set<uint32_t> deadOperations;
    596 
    597     // Collect the signatures of operations in this model.
    598     std::set<Signature> signatures;
    599 
    600     // For reporting purposes, keep track of the number of root
    601     // operations (those that do not consume results produced by other
    602     // operations).
    603     unsigned rootOperationCount = 0;
    604 
    605     // Track if we added operands with unknown dimensions. In this case,
    606     // partitioned compilation will fail if such an operand is read in a
    607     // different partition than it is written.
    608     bool hasUnknownDimensions = false;
    609 
    610     // Generate operations.
    611     for (unsigned i = 0; i < numOperations; i++) {
    612         const unsigned operationPatternIndex =
    613                 randUInt(sizeof(kOperationPatterns)/sizeof(kOperationPatterns[0]));
    614         const auto& operationPattern = kOperationPatterns[operationPatternIndex];
    615 
    616         // INPUTS //////////////////////////////////////////////////////////////////////////////////
    617 
    618         std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
    619 
    620         // First, process activation function and special inputs, and
    621         // keep track of which inputs remain.
    622         std::vector<uint32_t> normalOperationInputIndexes;
    623         int32_t activationFunction = -1;
    624         for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
    625              operationInputIndex++) {
    626             if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
    627                 const uint32_t operandIndex = model.addOperand(&activationFunctionType);
    628                 activationFunction = randUInt(4);
    629                 if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
    630                     // workaround for http://b/69011131
    631                     activationFunction = ANEURALNETWORKS_FUSED_NONE;
    632                 }
    633                 model.setOperandValue(operandIndex, activationFunction);
    634                 operationInputs[operationInputIndex] = operandIndex;
    635                 continue;
    636             }
    637             if (operationPattern.mMakeSpecialInput != nullptr) {
    638                 const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
    639                     problemSize, &model, operationInputIndex);
    640                 if (operandIndex >= 0) {
    641                     operationInputs[operationInputIndex] = operandIndex;
    642                     continue;
    643                 }
    644             }
    645             normalOperationInputIndexes.push_back(operationInputIndex);
    646         }
    647         assert(!normalOperationInputIndexes.empty());
    648         signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
    649 
    650         // A (normal) operation input can be one of:
    651         // - a new or existing model input
    652         // - an output of an existing operation
    653         // - an OperandValue
    654         // - an OperandValueFromMemory
    655         // Some guidelines:
    656         // - We generally don't want all of an operation's inputs to be values (constants)
    657         const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
    658         //     How many of this operation's inputs are constants?
    659         unsigned normalOperationInputConstantCount = 0;
    660         //     How many of this operation's inputs are model inputs?
    661         unsigned normalOperationInputModelInputCount = 0;
    662         // We begin by deciding what kind of input each (normal) operation will be; we don't
    663         // actually pick input operand indexes at this time, because we might override this
    664         // decision later.
    665         enum InputKind { IK_MODEL_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
    666         std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
    667         std::generate(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
    668                       [this, &model,
    669                        numOperations,
    670                        normalOperationInputCount,
    671                        &normalOperationInputConstantCount,
    672                        &normalOperationInputModelInputCount]() -> InputKind {
    673                           // Constant?  Becomes less likely the more
    674                           // constants we already have as inputs to
    675                           // this operation.
    676                           if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
    677                                                    normalOperationInputCount)) {
    678                               normalOperationInputConstantCount++;
    679                               return IK_VALUE;
    680                           }
    681 
    682                           // Model input?  Becomes less likely the
    683                           // more model inputs we already have as
    684                           // inputs to this operation, and the further
    685                           // along we are in generating this model
    686                           // (i.e., the more operations we have
    687                           // generated).
    688                           if ((model.operationCount() == 0) ||
    689                               (randFrac() < 0.5 *
    690                                (1 - double(normalOperationInputModelInputCount) /
    691                                 normalOperationInputCount) *
    692                                std::min(0.3, (1 - double(model.operationCount()) /
    693                                               numOperations)))) {
    694                               normalOperationInputModelInputCount++;
    695                               return IK_MODEL_INPUT;
    696                           }
    697 
    698                           // Else output of an existing operation.
    699                           return IK_OPERATION_OUTPUT;
    700                       });
    701 
    702         // Now force common root or model input, if necessary.  (A
    703         // model must have at least one input.)
    704         auto force =
    705                 [this, &normalOperationInputKinds, normalOperationInputCount](InputKind forceKind){
    706             if (std::none_of(normalOperationInputKinds.begin(),
    707                              normalOperationInputKinds.end(),
    708                              [forceKind](InputKind kind){ return kind == forceKind; })) {
    709                 normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
    710             }
    711         };
    712         if (forceCommonRoot && (model.operationCount() != 0)) {
    713             force(IK_OPERATION_OUTPUT);
    714         }
    715         if (modelInputs.empty()) {
    716             assert(model.operationCount() == 0);
    717             force(IK_MODEL_INPUT);
    718         }
    719 
    720         // Finally create the normal inputs.
    721         bool isRootOperation = true;
    722         for (unsigned i = 0; i < normalOperationInputCount; i++) {
    723             uint32_t operandIndex = ~0U;
    724             switch (normalOperationInputKinds[i]) {
    725                 case IK_MODEL_INPUT: {
    726                     if (!modelInputs.empty() && (randFrac() < 0.5)) {
    727                         operandIndex = modelInputs[randUInt(modelInputs.size())];
    728                     } else {
    729                         operandIndex = model.addOperand(&problemType);
    730                         modelInputs.push_back(operandIndex);
    731                     }
    732                     break;
    733                 }
    734                 case IK_OPERATION_OUTPUT: {
    735                     decltype(deadOperands.begin()) deadOperandI;
    736                     if (!deadOperands.empty() && (randFrac() < 0.5)) {
    737                         deadOperandI = deadOperands.begin();
    738                         std::advance(deadOperandI, randUInt(deadOperands.size()));
    739                         operandIndex = deadOperandI->first;
    740                     } else {
    741                         const uint32_t existingOperationIndex = randUInt(model.operationCount());
    742                         const auto& existingOperationOutputs =
    743                                 model.getOperationOutputs(existingOperationIndex);
    744                         operandIndex =
    745                             existingOperationOutputs[randUInt(existingOperationOutputs.size())];
    746                         deadOperandI = deadOperands.find(operandIndex);
    747                         assert(deadOperandI == deadOperands.end() ||
    748                                deadOperandI->second == existingOperationIndex);
    749                     }
    750                     if (deadOperandI != deadOperands.end()) {
    751                         const uint32_t correspondingOperation = deadOperandI->second;
    752                         deadOperands.erase(deadOperandI);
    753 
    754                         auto deadOperationI = deadOperations.find(correspondingOperation);
    755                         if (deadOperationI != deadOperations.end()) {
    756                             deadOperations.erase(deadOperationI);
    757                         }
    758                     }
    759                     isRootOperation = false;
    760                     break;
    761                 }
    762                 case IK_VALUE: {
    763                     if (!valueOperands.empty() && (randFrac() < 0.25)) {
    764                         operandIndex = valueOperands[randUInt(valueOperands.size())].first;
    765                     } else {
    766                         operandIndex = model.addOperand(&problemType);
    767                         if (randFrac() < 0.5) {
    768                             std::vector<float> value(problemSize * problemSize);
    769                             std::generate(value.begin(), value.end(), [this]{ return randFrac(); });
    770                             model.setOperandValue(operandIndex, value);
    771                             valueOperands.push_back(std::make_pair(operandIndex, ~0U));
    772                         } else {
    773                             unsigned memoryIndex = ~0U;
    774                             if ((weights.memoryCount() != 0) &&
    775                                 (kAllWeightsInOnePool || (randFrac() < 0.5))) {
    776                                 memoryIndex = randUInt(weights.memoryCount());
    777                             } else {
    778                                 memoryIndex = weights.addMemory();
    779                             }
    780                             const size_t length = problemSize * problemSize * sizeof(float);
    781                             const unsigned regionIndex = weights.addRegion(memoryIndex, length);
    782                             valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
    783                         }
    784                     }
    785                     break;
    786                 }
    787                 default:
    788                     FAIL();
    789             }
    790             operationInputs[normalOperationInputIndexes[i]] = operandIndex;
    791         }
    792         if (isRootOperation) {
    793             rootOperationCount++;
    794         }
    795 
    796         // OUTPUTS /////////////////////////////////////////////////////////////////////////////////
    797 
    798         std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
    799         std::generate(operationOutputs.begin(), operationOutputs.end(),
    800                       [&model, &problemType, &unknownDimensionsType, &hasUnknownDimensions,
    801                        allowUnknownDimensions, this]{
    802                           // 3% unknowns causes ~35% of partitionings to fail
    803                           // (determined by commenting out the fallback code,
    804                           // running tests and noting number of failures).
    805                           if (allowUnknownDimensions && randFrac() < 0.03) {
    806                               hasUnknownDimensions = true;
    807                               return model.addOperand(&unknownDimensionsType);
    808                           } else {
    809                               return model.addOperand(&problemType);
    810                           }
    811                       });
    812 
    813         // OPERATION ///////////////////////////////////////////////////////////////////////////////
    814 
    815         const uint32_t operationIndex =
    816                 model.addOperation(operationPattern.mOperationType,
    817                                    operationInputs, operationOutputs);
    818         deadOperations.insert(operationIndex);
    819         std::for_each(operationOutputs.begin(), operationOutputs.end(),
    820                       [&deadOperands, operationIndex](uint32_t operandIndex) {
    821                           deadOperands.insert(std::make_pair(operandIndex, operationIndex));
    822                       });
    823     }
    824 
    825     // Now finalize the weights.
    826     weights.layout();
    827     for (const auto& valueOperand : valueOperands) {
    828         const uint32_t operandIndex = valueOperand.first;
    829         const unsigned regionIndex = valueOperand.second;
    830 
    831         if (regionIndex == ~0U) {
    832             continue;
    833         }
    834 
    835         const WrapperMemory* memory;
    836         uint32_t offset, length;
    837         float* region =
    838                 static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
    839         assert(length == problemSize * problemSize * sizeof(float));
    840         std::generate(region, region + problemSize * problemSize, [this]{ return randFrac(); });
    841         model.setOperandValueFromMemory(operandIndex, memory, offset, length);
    842     }
    843 
    844     // Now select model outputs.
    845     for (uint32_t operationIdx = 0, operationCount = model.operationCount();
    846          operationIdx < operationCount; operationIdx++) {
    847         const auto& outputs = model.getOperationOutputs(operationIdx);
    848         for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
    849              outputIdx++) {
    850             bool modelOutput = false;
    851             const uint32_t operandIndex = outputs[outputIdx];
    852             const auto deadOperandI = deadOperands.find(operandIndex);
    853             if (deadOperandI != deadOperands.end()) {
    854                 // This is not consumed within the model, so unless we
    855                 // make it an output of the model, it's dead.  The
    856                 // further along we are in generating this model
    857                 // (i.e., the more operations we have generated), the
    858                 // more likely we are to classify this operation
    859                 // output as a model output.
    860                 const double probabilityOfModelOutput =
    861                         0.50 * [](double x){ return x*x; }((operationIdx + 1) / operationCount);
    862                 modelOutput = (randFrac() < probabilityOfModelOutput);
    863             } else {
    864                 // This is consumed within the model, so we'll rarely
    865                 // make it an output of the model.
    866                 modelOutput = (randFrac() < 0.05);
    867             }
    868             if (!modelOutput) {
    869                 continue;
    870             }
    871             modelOutputs.push_back(operandIndex);
    872             if (deadOperandI != deadOperands.end()) {
    873                 deadOperands.erase(deadOperandI);
    874                 const auto deadOperationI = deadOperations.find(operationIdx);
    875                 if (deadOperationI != deadOperations.end()) {
    876                     deadOperations.erase(deadOperationI);
    877                 }
    878             }
    879         }
    880     }
    881     if (!allowDeadOperations) {
    882         // For each dead operation, pick a random output to become a model output.
    883         for (uint32_t deadOperationIndex : deadOperations) {
    884             const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
    885             const uint32_t deadOperandIndex =
    886                     deadOperationOutputs[randUInt(deadOperationOutputs.size())];
    887             modelOutputs.push_back(deadOperandIndex);
    888         }
    889     }
    890     // A model must have at least one output.
    891     if (modelOutputs.empty()) {
    892         const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
    893         modelOutputs.push_back(outputs[randUInt(outputs.size())]);
    894     }
    895 
    896     model.identifyInputsAndOutputs(modelInputs, modelOutputs);
    897 #ifdef VERBOSE
    898     {
    899         std::cout << "Original model: " << ModelStats(&model) << std::endl;
    900         std::cout << "rootOperationCount = " << rootOperationCount
    901                   << ", deadOperations = ";
    902         if (allowDeadOperations) {
    903             std::cout << deadOperations.size();
    904         } else {
    905             std::cout << "forbidden (converted " << deadOperations.size() << ")";
    906         }
    907         std::cout << std::endl;
    908     }
    909 #endif
    910     ASSERT_EQ(model.finish(), Result::NO_ERROR);
    911     graphDump(model);
    912 
    913     // Non-partitioned compilation.
    914     TestCompilation c(&model);
    915     ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
    916     ASSERT_EQ(c.finish(), Result::NO_ERROR);
    917 
    918     // Create some drivers for partitioned compilation.
    919     assert(!signatures.empty());
    920     std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
    921     //     First assign each signature to a random driver (a driver is
    922     //     just represented as an entry in the signaturesForDriver
    923     //     vector).
    924     for (Signature signature : signatures) {
    925         signaturesForDriver[randUInt(signatures.size())].insert(signature);
    926     }
    927     //     Now remove each entry that has no signatures.
    928     auto firstExtra =
    929         std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
    930                        [](const std::set<Signature>& sigSet) { return sigSet.empty(); });
    931     if (firstExtra != signaturesForDriver.end()) {
    932         signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
    933     }
    934     //     Now actually create the drivers.
    935     std::vector<std::shared_ptr<Device>> devices;
    936     for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
    937         const std::string name = "TestDriver(" + std::to_string(i) + ")";
    938         devices.push_back(std::make_shared<Device>(
    939             name, new TestDriver(name.c_str(), signaturesForDriver[i])));
    940         ASSERT_TRUE(devices.back()->initialize());
    941     }
    942 
    943     // Partitioned compilation.
    944     // For test cases without unknown intermediate operand sizes we require the
    945     // partitioning to succeed without CPU fallback. With unknown sizes we
    946     // retry with a fallback if the non-fallback partitioning fails and require
    947     // the fallback to succeed.
    948     TestCompilation cNoFallback(&model);
    949     TestCompilation cWithFallback(&model);
    950     TestCompilation *c2 = nullptr;
    951     ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
    952               Result::NO_ERROR);
    953     auto compilationResult = cNoFallback.finish(devices);
    954     if (hasUnknownDimensions && compilationResult == Result::OP_FAILED &&
    955         cNoFallback.getExecutionPlan().forTest_hasSubModelOutputsOfUnknownSize()) {
    956         ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
    957                   Result::NO_ERROR);
    958         ASSERT_EQ(cWithFallback.finish(devices), Result::NO_ERROR);
    959         c2 = &cWithFallback;
    960     } else {
    961         ASSERT_EQ(compilationResult, Result::NO_ERROR);
    962         c2 = &cNoFallback;
    963     }
    964 
    965 #ifdef VERBOSE
    966     {
    967         std::cout << "signatures = " << signatures.size()
    968                   << ", devices = " << devices.size() << std::endl;
    969         const ExecutionPlan& plan = c2->getExecutionPlan();
    970         switch (plan.forTest_getKind()) {
    971             case ExecutionPlan::Kind::SIMPLE:
    972                 std::cout << "plan: simple" << std::endl;
    973                 break;
    974             case ExecutionPlan::Kind::COMPOUND: {
    975                 const auto& steps = plan.forTest_compoundGetSteps();
    976                 std::set<const Device*> devicesInPlan;
    977                 for (const auto& step : steps) {
    978                     devicesInPlan.insert(step->getDevice().get());
    979                 }
    980                 std::cout << "plan: compound, " << steps.size() << " steps over "
    981                           << devicesInPlan.size() << " devices" << std::endl;
    982                 for (unsigned i = 0; i < steps.size(); i++) {
    983                     std::cout << "Step " << i << ": "
    984                               << ModelStats(steps[i]->getSubModel()) << std::endl;
    985                 }
    986                 break;
    987             }
    988             default:
    989                 std::cout << "Unexpected plan kind: "
    990                     << static_cast<unsigned>(plan.forTest_getKind());
    991                 break;
    992         }
    993     }
    994 #endif
    995 
    996     // For execution:
    997     // - create master inputs (one long vector) and master output value
    998     //   - master inputs will be copied to actual inputs before each
    999     //     of the two executions
   1000     //   - master output will be used to fill actual outputs before each
   1001     //     of the two executions
   1002     // - create actual inputs and outputs
   1003     // - first execution (non-partitioned)
   1004     //   - initialize inputs and (to avoid unrelated oddities) outputs
   1005     //   - execute
   1006     //   - copy outputs to a save area (one long vector)
   1007     // - second execution (partitioned)
   1008     //   - (to avoid unrelated oddities) initialize inputs and outputs
   1009     //   - execute
   1010     //   - compare outputs to save area
   1011 
   1012     // If the runtime and drivers are working properly, execution
   1013     // should not change the inputs.  Nonetheless, we reinitialize the
   1014     // inputs for each execution, so as to avoid unrelated problems
   1015     // appearing to be problems related to unpartitioned execution
   1016     // versus partitioned execution.  Similarly, execution behavior
   1017     // should not be dependent on the outputs; but we'll initialize the
   1018     // outputs anyway.
   1019     std::vector<float> masterInputs(problemSize * problemSize * model.inputCount());
   1020     std::generate(masterInputs.begin(), masterInputs.end(), [this]{ return randFrac(); });
   1021     const float masterOutput = randFrac();
   1022 
   1023     // Create the memory for the actual inputs and outputs.
   1024     struct InputOutputDescriptor {
   1025         enum Kind { INPUT, OUTPUT };
   1026         Kind mKind;
   1027 
   1028         // The input or output either resides in a local buffer
   1029         // (mVector, in which case mMemoryRegion is ignored); or in a
   1030         // shared memory region within a TestMemories instance
   1031         // (mMemoryRegion, in which case mVector is ignored).
   1032         enum Location { VECTOR, REGION };
   1033         Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
   1034 
   1035         std::vector<float> mVector;
   1036         unsigned mMemoryRegion;
   1037     };
   1038     std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
   1039     for (unsigned i = 0; i < ioDescriptors.size(); i++) {
   1040         ioDescriptors[i].mKind = (i < model.inputCount()
   1041                                   ? InputOutputDescriptor::INPUT
   1042                                   : InputOutputDescriptor::OUTPUT);
   1043     }
   1044     //     We randomly interleave inputs and outputs in creation
   1045     //     order, because when we we create memory regions in a
   1046     //     TestMemories instance, the order in which regions are
   1047     //     created within a single Memory is the order they'll be laid
   1048     //     out in that memory; and when we have inputs and outputs
   1049     //     within the same Memory, we want the possibility that
   1050     //     they'll be interleaved.
   1051     std::random_shuffle(ioDescriptors.begin(), ioDescriptors.end(),
   1052                         [this](unsigned n) { return randUInt(n); });
   1053     TestMemories ioMemories;
   1054     for (auto &desc : ioDescriptors) {
   1055         if (randFrac() < 0.5) {
   1056             desc.mVector.resize(problemSize * problemSize);
   1057         } else {
   1058             // TODO: common this with the way we create IK_VALUE inputs?
   1059             unsigned memoryIndex = ~0U;
   1060             if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
   1061                 memoryIndex = randUInt(ioMemories.memoryCount());
   1062             } else {
   1063                 memoryIndex = ioMemories.addMemory();
   1064             }
   1065             const size_t length = problemSize * problemSize * sizeof(float);
   1066             desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
   1067         }
   1068     }
   1069     ioMemories.layout();
   1070 
   1071     // Function to set up actual inputs and outputs (initializing them
   1072     // and telling the WrapperExecution about them).
   1073     auto prepareForExecution =
   1074             [&model, &ioDescriptors, &ioMemories,
   1075              &masterInputs, &masterOutput, problemSize, &problemType](WrapperExecution *e) {
   1076         uint32_t inputIndex = 0, outputIndex = 0;
   1077         for (auto &desc : ioDescriptors) {
   1078             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
   1079                 if (desc.mKind == InputOutputDescriptor::INPUT) {
   1080                     const size_t inputOffset = inputIndex * problemSize * problemSize;
   1081                     std::copy(masterInputs.begin() + inputOffset,
   1082                               masterInputs.begin() + inputOffset + problemSize * problemSize,
   1083                               desc.mVector.begin());
   1084                     e->setInput(inputIndex++, desc.mVector.data(),
   1085                                 desc.mVector.size() * sizeof(float));
   1086                 } else {
   1087                     std::fill(desc.mVector.begin(),
   1088                               desc.mVector.begin() + problemSize * problemSize,
   1089                               masterOutput);
   1090                     e->setOutput(outputIndex++, desc.mVector.data(),
   1091                                  desc.mVector.size() * sizeof(float),
   1092                                  &problemType.operandType);
   1093                 }
   1094             } else {
   1095                 const WrapperMemory* memory;
   1096                 uint32_t offset, length;
   1097                 float* region =
   1098                         static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion,
   1099                                                                  &memory, &offset, &length));
   1100                 assert(length == problemSize * problemSize * sizeof(float));
   1101                 if (desc.mKind == InputOutputDescriptor::INPUT) {
   1102                     const size_t inputOffset = inputIndex * problemSize * problemSize;
   1103                     std::copy(masterInputs.begin() + inputOffset,
   1104                               masterInputs.begin() + inputOffset + problemSize * problemSize,
   1105                               region);
   1106                     e->setInputFromMemory(inputIndex++, memory, offset, length);
   1107                 } else {
   1108                     std::fill(region,
   1109                               region + problemSize * problemSize,
   1110                               masterOutput);
   1111                     e->setOutputFromMemory(outputIndex++, memory, offset, length,
   1112                                            &problemType.operandType);
   1113                 }
   1114             }
   1115         };
   1116         assert(inputIndex == model.inputCount());
   1117         assert(outputIndex == model.outputCount());
   1118     };
   1119 
   1120     // Non-partitioned execution.
   1121     WrapperExecution e(&c);
   1122     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
   1123     ASSERT_EQ(e.compute(), Result::NO_ERROR);
   1124 
   1125     // Copy the outputs of the non-partitioned execution to a save area.
   1126     std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
   1127     {
   1128         uint32_t outputIndex = 0;
   1129         for (const auto& desc : ioDescriptors) {
   1130             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
   1131                 continue;
   1132             }
   1133             const size_t outputOffset = outputIndex * problemSize * problemSize;
   1134             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
   1135                 std::copy(desc.mVector.begin(),
   1136                           desc.mVector.end(),
   1137                           nonPartitionedOutputs.begin() + outputOffset);
   1138             } else {
   1139                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
   1140                 std::copy(region,
   1141                           region + problemSize * problemSize,
   1142                           nonPartitionedOutputs.begin() + outputOffset);
   1143             }
   1144 #ifdef VERBOSE
   1145             {
   1146                 std::cout << "output[" << outputIndex << "] = {";
   1147                 for (auto I = nonPartitionedOutputs.begin() + outputOffset,
   1148                              E = nonPartitionedOutputs.begin() +
   1149                                      outputOffset + problemSize * problemSize;
   1150                      I != E; I++) {
   1151                     std::cout << " " << *I;
   1152                 }
   1153                 std::cout << " }" << std::endl;
   1154             }
   1155 #endif
   1156             outputIndex++;
   1157         }
   1158     }
   1159 
   1160     // Partitioned execution.
   1161     WrapperExecution e2(c2);
   1162     ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
   1163     ASSERT_EQ(e2.compute(), Result::NO_ERROR);
   1164 
   1165     // Compare the outputs of the partitioned execution to the save
   1166     // area containing the outpus of the non-partitioned execution.
   1167     {
   1168         uint32_t outputIndex = 0;
   1169         for (const auto& desc : ioDescriptors) {
   1170             if (desc.mKind != InputOutputDescriptor::OUTPUT) {
   1171                 continue;
   1172             }
   1173             SCOPED_TRACE(outputIndex);
   1174             const size_t outputOffset = outputIndex * problemSize * problemSize;
   1175             if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
   1176                 ASSERT_TRUE(std::equal(desc.mVector.begin(),
   1177                                        desc.mVector.end(),
   1178                                        nonPartitionedOutputs.begin() + outputOffset));
   1179             } else {
   1180                 float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
   1181                 ASSERT_TRUE(std::equal(region,
   1182                                        region + problemSize * problemSize,
   1183                                        nonPartitionedOutputs.begin() + outputOffset));
   1184             }
   1185             outputIndex++;
   1186         }
   1187     }
   1188 }
   1189 
   1190 }  // namespace
   1191 }  // namespace android
   1192