Home | History | Annotate | Download | only in driver
      1 /*
      2  * Copyright (C) 2016 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsovScript.h"
     18 
     19 #include "bcinfo/MetadataExtractor.h"
     20 #include "module.h"
     21 #include "rsContext.h"
     22 #include "rsDefines.h"
     23 #include "rsType.h"
     24 #include "rsUtils.h"
     25 #include "rsovAllocation.h"
     26 #include "rsovContext.h"
     27 #include "rsovCore.h"
     28 #include "spirit/file_utils.h"
     29 #include "spirit/instructions.h"
     30 #include "spirit/module.h"
     31 
     32 #include <fstream>
     33 #include <functional>
     34 #include <iostream>
     35 #include <sstream>
     36 #include <string>
     37 
     38 extern "C" {
     39 char*  __GPUBlock = nullptr;
     40 }
     41 
     42 namespace android {
     43 namespace renderscript {
     44 namespace rsov {
     45 
     46 namespace {
     47 // Layout of this struct has to be the same as the struct in generated SPIR-V
     48 // TODO: generate this file from some spec that is shared with the compiler
     49 struct rsovTypeInfo {
     50   uint32_t element_size;  // TODO: not implemented
     51   uint32_t x_size;
     52   uint32_t y_size;
     53   uint32_t z_size;
     54 };
     55 
     56 const char *COMPILER_EXE_PATH = "/system/bin/rs2spirv";
     57 
     58 std::vector<const char *> setCompilerArgs(const char *bcFileName,
     59                                           const char *cacheDir) {
     60   rsAssert(bcFileName && cacheDir);
     61 
     62   std::vector<const char *> args;
     63 
     64   args.push_back(COMPILER_EXE_PATH);
     65   args.push_back(bcFileName);
     66 
     67   args.push_back(nullptr);
     68   return args;
     69 }
     70 
     71 void writeBytes(const char *filename, const char *bytes, size_t size) {
     72   std::ofstream ofs(filename, std::ios::binary);
     73   ofs.write(bytes, size);
     74   ofs.close();
     75 }
     76 
     77 std::vector<uint32_t> readWords(const char *filename) {
     78   std::ifstream ifs(filename, std::ios::binary);
     79 
     80   ifs.seekg(0, ifs.end);
     81   int length = ifs.tellg();
     82   ifs.seekg(0, ifs.beg);
     83 
     84   rsAssert(((length & 3) == 0) && "File size expected to be multiples of 4");
     85 
     86   std::vector<uint32_t> spvWords(length / sizeof(uint32_t));
     87 
     88   ifs.read((char *)(spvWords.data()), length);
     89 
     90   ifs.close();
     91 
     92   return spvWords;
     93 }
     94 
     95 std::vector<uint32_t> compileBitcode(const char *resName, const char *cacheDir,
     96                                      const char *bitcode, size_t bitcodeSize,
     97                                      std::vector<uint8_t> &modifiedBitcode) {
     98   rsAssert(bitcode && bitcodeSize);
     99 
    100   // TODO: Cache the generated code
    101 
    102   std::string bcFileName(cacheDir);
    103   bcFileName.append("/");
    104   bcFileName.append(resName);
    105   bcFileName.append(".bc");
    106 
    107   writeBytes(bcFileName.c_str(), bitcode, bitcodeSize);
    108 
    109   auto args = setCompilerArgs(bcFileName.c_str(), cacheDir);
    110 
    111   if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
    112     ALOGE("compiler command line failed");
    113     return std::vector<uint32_t>();
    114   }
    115 
    116   ALOGV("compiler command line succeeded");
    117 
    118   std::string spvFileName(cacheDir);
    119   spvFileName.append("/");
    120   spvFileName.append(resName);
    121   spvFileName.append(".spv");
    122 
    123   std::string modifiedBCFileName(cacheDir);
    124   modifiedBCFileName.append("/").append(resName).append("_modified.bc");
    125 
    126   args.pop_back();
    127   args.push_back("-bc");
    128   args.push_back(modifiedBCFileName.c_str());
    129   args.push_back(nullptr);
    130 
    131   if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) {
    132     ALOGE("compiler command line to create modified bitcode failed");
    133     return std::vector<uint32_t>();
    134   }
    135 
    136   modifiedBitcode = android::spirit::readFile<uint8_t>(modifiedBCFileName);
    137 
    138   return readWords(spvFileName.c_str());
    139 }
    140 
    141 void splitOffsets(const std::string &str, char delimiter,
    142                   std::vector<uint32_t> *offsets) {
    143   std::stringstream ss(str);
    144   std::string tok;
    145 
    146   while (std::getline(ss, tok, delimiter)) {
    147     const uint32_t offset = static_cast<uint32_t>(std::stoi(tok));
    148     offsets->push_back(offset);
    149   }
    150 }
    151 
    152 }  // anonymous namespace
    153 
    154 bool RSoVScript::isScriptCpuBacked(const Script *s) {
    155   return s->mHal.info.mVersionMinor == CPU_SCRIPT_MAGIC_NUMBER;
    156 }
    157 
    158 void RSoVScript::initScriptOnCpu(Script *s, RsdCpuReference::CpuScript *cs) {
    159   s->mHal.drv = cs;
    160   s->mHal.info.mVersionMajor = 0;  // Unused. Don't care.
    161   s->mHal.info.mVersionMinor = CPU_SCRIPT_MAGIC_NUMBER;
    162 }
    163 
    164 void RSoVScript::initScriptOnRSoV(Script *s, RSoVScript *rsovScript) {
    165   s->mHal.drv = rsovScript;
    166   s->mHal.info.mVersionMajor = 0;  // Unused. Don't care.
    167   s->mHal.info.mVersionMinor = 0;
    168 }
    169 
    170 using android::spirit::Module;
    171 using android::spirit::Deserialize;
    172 
    173 RSoVScript::RSoVScript(RSoVContext *context, std::vector<uint32_t> &&spvWords,
    174                        bcinfo::MetadataExtractor *ME,
    175                        std::map<std::string, int> *GA2ID)
    176     : mRSoV(context),
    177       mDevice(context->getDevice()),
    178       mSPIRVWords(std::move(spvWords)),
    179       mME(ME),
    180       mGlobalAllocationMetadata(nullptr),
    181       mGAMapping(GA2ID) {
    182   std::unique_ptr<Module> module(Deserialize<Module>(mSPIRVWords));
    183 
    184   const std::string &strGlobalSize =
    185       module->findStringOfPrefix(".rsov.GlobalSize:");
    186   if (strGlobalSize.empty()) {
    187     mGlobals.reset(new RSoVBuffer(context, 4));
    188     return;
    189   }
    190   const size_t colonPosSize = strGlobalSize.find(':');
    191   const std::string &strVal = strGlobalSize.substr(colonPosSize + 1);
    192   const uint64_t globalSize = static_cast<uint64_t>(std::stol(strVal));
    193   if (globalSize > 0) {
    194     mGlobals.reset(new RSoVBuffer(context, globalSize));
    195     __GPUBlock = mGlobals->getHostPtr();
    196     const std::string &offsetStr =
    197       module->findStringOfPrefix(".rsov.ExportedVars:");
    198     const size_t colonPos = offsetStr.find(':');
    199     splitOffsets(offsetStr.substr(colonPos + 1), ';', &mExportedVarOffsets);
    200   }
    201 }
    202 
    203 RSoVScript::~RSoVScript() {
    204   delete mCpuScript;
    205   delete mME;
    206 }
    207 
    208 void RSoVScript::populateScript(Script *) {
    209 }
    210 
    211 void RSoVScript::invokeFunction(uint32_t slot, const void *params,
    212                                 size_t paramLength) {
    213   getCpuScript()->invokeFunction(slot, params, paramLength);
    214 }
    215 
    216 int RSoVScript::invokeRoot() { return getCpuScript()->invokeRoot(); }
    217 
    218 void RSoVScript::invokeForEach(uint32_t slot, const Allocation **ains,
    219                                uint32_t inLen, Allocation *aout,
    220                                const void *usr, uint32_t usrLen,
    221                                const RsScriptCall *sc) {
    222   // TODO: Handle kernel without input Allocation
    223   rsAssert(ains);
    224   std::vector<RSoVAllocation *> inputAllocations(inLen);
    225   for (uint32_t i = 0; i < inLen; ++i) {
    226     inputAllocations[i] = static_cast<RSoVAllocation *>(ains[i]->mHal.drv);
    227   }
    228   RSoVAllocation *outputAllocation =
    229       static_cast<RSoVAllocation *>(aout->mHal.drv);
    230   runForEach(slot, inLen, inputAllocations, outputAllocation);
    231 }
    232 
    233 void RSoVScript::invokeReduce(uint32_t slot, const Allocation **ains,
    234                               uint32_t inLen, Allocation *aout,
    235                               const RsScriptCall *sc) {
    236   getCpuScript()->invokeReduce(slot, ains, inLen, aout, sc);
    237 }
    238 
    239 void RSoVScript::invokeInit() {
    240   getCpuScript()->invokeInit();
    241 }
    242 
    243 void RSoVScript::invokeFreeChildren() {
    244   // TODO: implement this
    245 }
    246 
    247 void RSoVScript::setGlobalVar(uint32_t slot, const void *data,
    248                               size_t dataLength) {
    249   char *basePtr = mGlobals->getHostPtr();
    250   rsAssert(basePtr != nullptr);
    251   const uint32_t offset = GetExportedVarOffset(slot);
    252   memcpy(basePtr + offset, data, dataLength);
    253 }
    254 
    255 void RSoVScript::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
    256   const char *basePtr = mGlobals->getHostPtr();
    257   rsAssert(basePtr != nullptr);
    258   const uint32_t offset = GetExportedVarOffset(slot);
    259   memcpy(data, basePtr + offset, dataLength);
    260 }
    261 
    262 void RSoVScript::setGlobalVarWithElemDims(uint32_t slot, const void *data,
    263                                           size_t dataLength, const Element *elem,
    264                                           const uint32_t *dims,
    265                                           size_t dimLength) {
    266   char *basePtr = mGlobals->getHostPtr();
    267   rsAssert(basePtr != nullptr);
    268   const uint32_t offset = GetExportedVarOffset(slot);
    269   char *destPtr = basePtr + offset;
    270 
    271   // We want to look at dimension in terms of integer components,
    272   // but dimLength is given in terms of bytes.
    273   dimLength /= sizeof(int);
    274 
    275   // Only a single dimension is currently supported.
    276   rsAssert(dimLength == 1);
    277   if (dimLength != 1) {
    278     return;
    279   }
    280 
    281   // First do the increment loop.
    282   size_t stride = elem->getSizeBytes();
    283   const char *cVal = reinterpret_cast<const char *>(data);
    284   for (uint32_t i = 0; i < dims[0]; i++) {
    285     elem->incRefs(cVal);
    286     cVal += stride;
    287   }
    288 
    289   // Decrement loop comes after (to prevent race conditions).
    290   char *oldVal = destPtr;
    291   for (uint32_t i = 0; i < dims[0]; i++) {
    292     elem->decRefs(oldVal);
    293     oldVal += stride;
    294   }
    295 
    296   memcpy(destPtr, data, dataLength);
    297 }
    298 
    299 void RSoVScript::setGlobalBind(uint32_t slot, Allocation *data) {
    300   ALOGV("%s succeeded.", __FUNCTION__);
    301   // TODO: implement this
    302 }
    303 
    304 void RSoVScript::setGlobalObj(uint32_t slot, ObjectBase *obj) {
    305   mCpuScript->setGlobalObj(slot, obj);
    306   ALOGV("%s succeeded.", __FUNCTION__);
    307 }
    308 
    309 Allocation *RSoVScript::getAllocationForPointer(const void *ptr) const {
    310   // TODO: implement this
    311   return nullptr;
    312 }
    313 
    314 int RSoVScript::getGlobalEntries() const {
    315   // TODO: implement this
    316   return 0;
    317 }
    318 
    319 const char *RSoVScript::getGlobalName(int i) const {
    320   // TODO: implement this
    321   return nullptr;
    322 }
    323 
    324 const void *RSoVScript::getGlobalAddress(int i) const {
    325   // TODO: implement this
    326   return nullptr;
    327 }
    328 
    329 size_t RSoVScript::getGlobalSize(int i) const {
    330   // TODO: implement this
    331   return 0;
    332 }
    333 
    334 uint32_t RSoVScript::getGlobalProperties(int i) const {
    335   // TODO: implement this
    336   return 0;
    337 }
    338 
    339 void RSoVScript::InitDescriptorAndPipelineLayouts(uint32_t inLen) {
    340   // TODO: kernels with zero output allocations
    341   std::vector<VkDescriptorSetLayoutBinding> bindings(
    342       inLen + 3, {
    343                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    344                      .descriptorCount = 1,
    345                      .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
    346                  });
    347   for (uint32_t i = 0; i < inLen + 3; i++) {
    348     bindings[i].binding = i;
    349   }
    350 
    351   VkDescriptorSetLayoutCreateInfo descriptor_layout = {
    352       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
    353       .pNext = nullptr,
    354       .flags = 0,
    355       .bindingCount = inLen + 3,
    356       .pBindings = bindings.data(),
    357   };
    358 
    359   VkResult res;
    360 
    361   mDescLayout.resize(NUM_DESCRIPTOR_SETS);
    362   res = vkCreateDescriptorSetLayout(mDevice, &descriptor_layout, NULL,
    363                                     mDescLayout.data());
    364   rsAssert(res == VK_SUCCESS);
    365 
    366   /* Now use the descriptor layout to create a pipeline layout */
    367   VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = {
    368       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    369       .pNext = nullptr,
    370       .pushConstantRangeCount = 0,
    371       .pPushConstantRanges = nullptr,
    372       .setLayoutCount = NUM_DESCRIPTOR_SETS,
    373       .pSetLayouts = mDescLayout.data(),
    374   };
    375 
    376   res = vkCreatePipelineLayout(mDevice, &pPipelineLayoutCreateInfo, NULL,
    377                                &mPipelineLayout);
    378   rsAssert(res == VK_SUCCESS);
    379 }
    380 
    381 void RSoVScript::InitShader(uint32_t slot) {
    382   VkResult res;
    383 
    384   mShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
    385   mShaderStage.pNext = nullptr;
    386   mShaderStage.pSpecializationInfo = nullptr;
    387   mShaderStage.flags = 0;
    388   mShaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT;
    389 
    390   const char **RSKernelNames = mME->getExportForEachNameList();
    391   size_t RSKernelNum = mME->getExportForEachSignatureCount();
    392   rsAssert(slot < RSKernelNum);
    393   rsAssert(RSKernelNames);
    394   rsAssert(RSKernelNames[slot]);
    395   // ALOGV("slot = %d kernel name = %s", slot, RSKernelNames[slot]);
    396   std::string entryName("entry_");
    397   entryName.append(RSKernelNames[slot]);
    398 
    399   mShaderStage.pName = strndup(entryName.c_str(), entryName.size());
    400 
    401   VkShaderModuleCreateInfo moduleCreateInfo = {
    402       .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
    403       .pNext = nullptr,
    404       .flags = 0,
    405       .codeSize = mSPIRVWords.size() * sizeof(unsigned int),
    406       .pCode = mSPIRVWords.data(),
    407   };
    408   res = vkCreateShaderModule(mDevice, &moduleCreateInfo, NULL,
    409                              &mShaderStage.module);
    410   rsAssert(res == VK_SUCCESS);
    411 }
    412 
    413 void RSoVScript::InitDescriptorPool(uint32_t inLen) {
    414   VkResult res;
    415   // 1 global buffer, 1 global allocation metadata buffer, 1 output allocation,
    416   // and inLen input allocations
    417   VkDescriptorPoolSize type_count[] = {{
    418       .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = inLen + 3,
    419   }};
    420 
    421   VkDescriptorPoolCreateInfo descriptor_pool = {
    422       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
    423       .pNext = nullptr,
    424       .maxSets = 1,
    425       .poolSizeCount = NELEM(type_count),
    426       .pPoolSizes = type_count,
    427   };
    428 
    429   res = vkCreateDescriptorPool(mDevice, &descriptor_pool, NULL, &mDescPool);
    430   rsAssert(res == VK_SUCCESS);
    431 }
    432 
    433 // Iterate through a list of global allocations that are used inside the module
    434 // and marshal their type information to a dedicated Vulkan Buffer
    435 void RSoVScript::MarshalTypeInfo(void) {
    436   // Marshal global allocation metadata to the device
    437   auto *cs = getCpuScript();
    438   int nr_globals = mGAMapping->size();
    439   if (mGlobalAllocationMetadata == nullptr) {
    440     mGlobalAllocationMetadata.reset(
    441         new RSoVBuffer(mRSoV, sizeof(struct rsovTypeInfo) * nr_globals));
    442   }
    443   struct rsovTypeInfo *mappedMetadata =
    444       (struct rsovTypeInfo *)mGlobalAllocationMetadata->getHostPtr();
    445   for (int i = 0; i < nr_globals; ++i) {
    446     if (getGlobalRsType(cs->getGlobalProperties(i)) ==
    447         RsDataType::RS_TYPE_ALLOCATION) {
    448       ALOGV("global variable %d is an allocation!", i);
    449       const void *host_buf;
    450       cs->getGlobalVar(i, (void *)&host_buf, sizeof(host_buf));
    451       if (!host_buf) continue;
    452       const android::renderscript::Allocation *GA =
    453           static_cast<const android::renderscript::Allocation *>(host_buf);
    454       const android::renderscript::Type *T = GA->getType();
    455       rsAssert(T);
    456 
    457       auto global_it = mGAMapping->find(cs->getGlobalName(i));
    458       rsAssert(global_it != (*mGAMapping).end());
    459       int id = global_it->second;
    460       ALOGV("global allocation %s is mapped to ID %d", cs->getGlobalName(i),
    461             id);
    462       // TODO: marshal other properties
    463       mappedMetadata[id].x_size = T->getDimX();
    464       mappedMetadata[id].y_size = T->getDimY();
    465       mappedMetadata[id].z_size = T->getDimZ();
    466     }
    467   }
    468 }
    469 
    470 void RSoVScript::InitDescriptorSet(
    471     const std::vector<RSoVAllocation *> &inputAllocations,
    472     RSoVAllocation *outputAllocation) {
    473   VkResult res;
    474 
    475   VkDescriptorSetAllocateInfo alloc_info = {
    476       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
    477       .pNext = NULL,
    478       .descriptorPool = mDescPool,
    479       .descriptorSetCount = NUM_DESCRIPTOR_SETS,
    480       .pSetLayouts = mDescLayout.data(),
    481   };
    482 
    483   mDescSet.resize(NUM_DESCRIPTOR_SETS);
    484   res = vkAllocateDescriptorSets(mDevice, &alloc_info, mDescSet.data());
    485   rsAssert(res == VK_SUCCESS);
    486 
    487   std::vector<VkWriteDescriptorSet> writes{
    488       // Global variables
    489       {
    490           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
    491           .dstSet = mDescSet[0],
    492           .dstBinding = 0,
    493           .dstArrayElement = 0,
    494           .descriptorCount = 1,
    495           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    496           .pBufferInfo = mGlobals->getBufferInfo(),
    497       },
    498       // Metadata for global Allocations
    499       {
    500           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
    501           .dstSet = mDescSet[0],
    502           .dstBinding = 1,
    503           .dstArrayElement = 0,
    504           .descriptorCount = 1,
    505           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    506           .pBufferInfo = mGlobalAllocationMetadata->getBufferInfo(),
    507       },
    508       // Output Allocation
    509       {
    510           .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
    511           .dstSet = mDescSet[0],
    512           .dstBinding = 2,
    513           .dstArrayElement = 0,
    514           .descriptorCount = 1,
    515           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    516           .pBufferInfo = outputAllocation->getBuffer()->getBufferInfo(),
    517       },
    518   };
    519 
    520   // Input Allocations
    521   for (uint32_t i = 0; i < inputAllocations.size(); ++i) {
    522     writes.push_back({
    523         .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
    524         .dstSet = mDescSet[0],
    525         .dstBinding = 3 + i,  // input allocations start from binding #3
    526         .dstArrayElement = 0,
    527         .descriptorCount = 1,
    528         .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    529         .pBufferInfo = inputAllocations[i]->getBuffer()->getBufferInfo(),
    530     });
    531   }
    532 
    533   vkUpdateDescriptorSets(mDevice, writes.size(), writes.data(), 0, NULL);
    534 }
    535 
    536 void RSoVScript::InitPipeline() {
    537   // DEPENDS on mShaderStage, i.e., InitShader()
    538 
    539   VkResult res;
    540 
    541   VkComputePipelineCreateInfo pipeline_info = {
    542       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
    543       .pNext = nullptr,
    544       .layout = mPipelineLayout,
    545       .basePipelineHandle = VK_NULL_HANDLE,
    546       .basePipelineIndex = 0,
    547       .flags = 0,
    548       .stage = mShaderStage,
    549   };
    550   res = vkCreateComputePipelines(mDevice, VK_NULL_HANDLE, 1, &pipeline_info,
    551                                  NULL, &mComputePipeline);
    552   rsAssert(res == VK_SUCCESS);
    553 }
    554 
    555 void RSoVScript::runForEach(
    556     uint32_t slot, uint32_t inLen,
    557     const std::vector<RSoVAllocation *> &inputAllocations,
    558     RSoVAllocation *outputAllocation) {
    559   VkResult res;
    560 
    561   InitShader(slot);
    562   InitDescriptorPool(inLen);
    563   InitDescriptorAndPipelineLayouts(inLen);
    564   MarshalTypeInfo();
    565   InitDescriptorSet(inputAllocations, outputAllocation);
    566   // InitPipelineCache();
    567   InitPipeline();
    568 
    569   VkCommandBuffer cmd;
    570 
    571   VkCommandBufferAllocateInfo cmd_info = {
    572       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
    573       .pNext = nullptr,
    574       .commandPool = mRSoV->getCmdPool(),
    575       .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
    576       .commandBufferCount = 1,
    577   };
    578 
    579   res = vkAllocateCommandBuffers(mDevice, &cmd_info, &cmd);
    580   rsAssert(res == VK_SUCCESS);
    581 
    582   VkCommandBufferBeginInfo cmd_buf_info = {
    583       .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
    584       .pNext = nullptr,
    585       .flags = 0,
    586       .pInheritanceInfo = nullptr,
    587   };
    588 
    589   res = vkBeginCommandBuffer(cmd, &cmd_buf_info);
    590   rsAssert(res == VK_SUCCESS);
    591 
    592   vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mComputePipeline);
    593 
    594   vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mPipelineLayout,
    595                           0, mDescSet.size(), mDescSet.data(), 0, nullptr);
    596   // Assuming all input allocations are of the same dimensionality
    597   const uint32_t width = inputAllocations[0]->getWidth();
    598   const uint32_t height = rsMax(inputAllocations[0]->getHeight(), 1U);
    599   const uint32_t depth = rsMax(inputAllocations[0]->getDepth(), 1U);
    600   vkCmdDispatch(cmd, width, height, depth);
    601 
    602   res = vkEndCommandBuffer(cmd);
    603   assert(res == VK_SUCCESS);
    604 
    605   VkSubmitInfo submit_info = {
    606       .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
    607       .commandBufferCount = 1,
    608       .pCommandBuffers = &cmd,
    609   };
    610 
    611   VkFence fence;
    612 
    613   VkFenceCreateInfo fenceInfo = {
    614       .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
    615       .pNext = nullptr,
    616       .flags = 0,
    617   };
    618 
    619   vkCreateFence(mDevice, &fenceInfo, NULL, &fence);
    620 
    621   vkQueueSubmit(mRSoV->getQueue(), 1, &submit_info, fence);
    622 
    623   // Make sure command buffer is finished
    624   do {
    625     res = vkWaitForFences(mDevice, 1, &fence, VK_TRUE, 100000);
    626   } while (res == VK_TIMEOUT);
    627 
    628   rsAssert(res == VK_SUCCESS);
    629 
    630   vkDestroyFence(mDevice, fence, NULL);
    631 
    632   // TODO: shall we reuse command buffers?
    633   VkCommandBuffer cmd_bufs[] = {cmd};
    634   vkFreeCommandBuffers(mDevice, mRSoV->getCmdPool(), 1, cmd_bufs);
    635 
    636   vkDestroyPipeline(mDevice, mComputePipeline, nullptr);
    637   for (int i = 0; i < NUM_DESCRIPTOR_SETS; i++)
    638     vkDestroyDescriptorSetLayout(mDevice, mDescLayout[i], nullptr);
    639   vkDestroyPipelineLayout(mDevice, mPipelineLayout, nullptr);
    640   vkFreeDescriptorSets(mDevice, mDescPool, NUM_DESCRIPTOR_SETS,
    641                        mDescSet.data());
    642   vkDestroyDescriptorPool(mDevice, mDescPool, nullptr);
    643   free((void *)mShaderStage.pName);
    644   vkDestroyShaderModule(mDevice, mShaderStage.module, nullptr);
    645 }
    646 
    647 }  // namespace rsov
    648 }  // namespace renderscript
    649 }  // namespace android
    650 
    651 using android::renderscript::Allocation;
    652 using android::renderscript::Context;
    653 using android::renderscript::Element;
    654 using android::renderscript::ObjectBase;
    655 using android::renderscript::RsdCpuReference;
    656 using android::renderscript::Script;
    657 using android::renderscript::ScriptC;
    658 using android::renderscript::rs_script;
    659 using android::renderscript::rsov::RSoVContext;
    660 using android::renderscript::rsov::RSoVScript;
    661 using android::renderscript::rsov::compileBitcode;
    662 
    663 namespace {
    664 // A class to parse global allocation metadata; essentially a subset of JSON
    665 // it would look like {"__RSoV_GA": {"g":42}}
    666 // The result is stored in a refence to a map<string, int>
    667 class ParseMD {
    668  public:
    669   ParseMD(std::string s, std::map<std::string, int> &map)
    670       : mString(s), mMapping(map) {}
    671 
    672   bool parse(void) {
    673     // remove outermose two pairs of braces
    674     mString = removeBraces(mString);
    675     if (mString.empty()) {
    676       return false;
    677     }
    678 
    679     mString = removeBraces(mString);
    680     if (mString.empty()) {
    681       return false;
    682     }
    683 
    684     // Now we are supposed to have a comma-separated list that looks like:
    685     // "foo":42, "bar":56
    686     split<','>(mString, [&](auto s) {
    687       split<':'>(s, nullptr, [&](auto pair) {
    688         rsAssert(pair.size() == 2);
    689         std::string ga_name = removeQuotes(pair[0]);
    690         int id = atoi(pair[1].c_str());
    691         ALOGV("ParseMD: global allocation %s has ID %d", ga_name.c_str(), id);
    692         mMapping[ga_name] = id;
    693       });
    694     });
    695     return true;
    696   }
    697 
    698  private:
    699   template <char L, char R>
    700   static std::string removeMatching(const std::string &s) {
    701     auto leftCBrace = s.find(L);
    702     if (leftCBrace == std::string::npos) {
    703       return "";
    704     }
    705     leftCBrace++;
    706     return s.substr(leftCBrace, s.rfind(R) - leftCBrace);
    707   }
    708 
    709   static std::string removeBraces(const std::string &s) {
    710     return removeMatching<'{', '}'>(s);
    711   }
    712 
    713   static std::string removeQuotes(const std::string &s) {
    714     return removeMatching<'"', '"'>(s);
    715   }
    716 
    717   // Splitting a string, and call "each" and/or "all" with individal elements
    718   // and a vector of all tokenized elements
    719   template <char D>
    720   static void split(const std::string &s,
    721                     std::function<void(const std::string &)> each,
    722                     std::function<void(const std::vector<const std::string> &)>
    723                         all = nullptr) {
    724     std::vector<const std::string> result;
    725     for (std::string::size_type pos = 0; pos < s.size(); pos++) {
    726       std::string::size_type begin = pos;
    727 
    728       while (pos <= s.size() && s[pos] != D) pos++;
    729       std::string found = s.substr(begin, pos - begin);
    730       if (each) each(found);
    731       if (all) result.push_back(found);
    732     }
    733     if (all) all(result);
    734   }
    735 
    736   std::string mString;
    737   std::map<std::string, int> &mMapping;
    738 };
    739 
    740 }  // namespace
    741 
    742 class ExtractRSoVMD : public android::spirit::DoNothingVisitor {
    743  public:
    744   ExtractRSoVMD() : mGAMapping(new std::map<std::string, int>) {}
    745 
    746   void visit(android::spirit::StringInst *s) {
    747     ALOGV("ExtractRSoVMD: string = %s", s->mOperand1.c_str());
    748     std::map<std::string, int> mapping;
    749     ParseMD p(s->mOperand1, mapping);
    750     if (p.parse()) {
    751       *mGAMapping = std::move(mapping);
    752     }
    753   }
    754 
    755   std::map<std::string, int> *takeMapping(void) { return mGAMapping.release(); }
    756 
    757  private:
    758   std::unique_ptr<std::map<std::string, int> > mGAMapping;
    759 };
    760 
    761 bool rsovScriptInit(const Context *rsc, ScriptC *script, char const *resName,
    762                     char const *cacheDir, uint8_t const *bitcode,
    763                     size_t bitcodeSize, uint32_t flags) {
    764   RSoVHal *hal = static_cast<RSoVHal *>(rsc->mHal.drv);
    765 
    766   std::unique_ptr<bcinfo::MetadataExtractor> bitcodeMetadata(
    767       new bcinfo::MetadataExtractor((const char *)bitcode, bitcodeSize));
    768   if (!bitcodeMetadata || !bitcodeMetadata->extract()) {
    769     ALOGE("Could not extract metadata from bitcode from %s", resName);
    770     return false;
    771   }
    772 
    773   std::vector<uint8_t> modifiedBitcode;
    774   auto spvWords =
    775     compileBitcode(resName, cacheDir, (const char *)bitcode, bitcodeSize, modifiedBitcode);
    776   if (!spvWords.empty() && !modifiedBitcode.empty()) {
    777     // Extract compiler metadata on allocation->binding mapping
    778     android::spirit::Module *module =
    779         android::spirit::Deserialize<android::spirit::Module>(spvWords);
    780     rsAssert(module);
    781     ExtractRSoVMD ga_md;
    782     module->accept(&ga_md);
    783 
    784     RSoVScript *rsovScript =
    785         new RSoVScript(hal->mRSoV, std::move(spvWords),
    786                        bitcodeMetadata.release(), ga_md.takeMapping());
    787     if (rsovScript) {
    788       std::string modifiedResName(resName);
    789       modifiedResName.append("_modified");
    790       RsdCpuReference::CpuScript *cs = hal->mCpuRef->createScript(
    791           script, modifiedResName.c_str(), cacheDir, modifiedBitcode.data(),
    792           modifiedBitcode.size(), flags);
    793       if (cs != nullptr) {
    794         cs->populateScript(script);
    795         rsovScript->setCpuScript(cs);
    796         RSoVScript::initScriptOnRSoV(script, rsovScript);
    797         return true;
    798       }
    799     }
    800   }
    801 
    802   ALOGD("Failed creating an RSoV script for %s", resName);
    803   // Fall back to CPU driver instead
    804 
    805   std::unique_ptr<RsdCpuReference::CpuScript> cs(hal->mCpuRef->createScript(
    806       script, resName, cacheDir, bitcode, bitcodeSize, flags));
    807   if (cs == nullptr) {
    808     ALOGE("Failed creating a CPU script %p for %s (%p)", cs.get(), resName,
    809           script);
    810     return false;
    811   }
    812   cs->populateScript(script);
    813 
    814   RSoVScript::initScriptOnCpu(script, cs.release());
    815 
    816   return true;
    817 }
    818 
    819 bool rsovInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid,
    820                        Element *e) {
    821   RSoVHal *dc = (RSoVHal *)rsc->mHal.drv;
    822   RsdCpuReference::CpuScript *cs = dc->mCpuRef->createIntrinsic(s, iid, e);
    823   if (cs == nullptr) {
    824     return false;
    825   }
    826   s->mHal.drv = cs;
    827   cs->populateScript(s);
    828   return true;
    829 }
    830 
    831 void rsovScriptInvokeForEach(const Context *rsc, Script *s, uint32_t slot,
    832                              const Allocation *ain, Allocation *aout,
    833                              const void *usr, size_t usrLen,
    834                              const RsScriptCall *sc) {
    835   if (ain == nullptr) {
    836     rsovScriptInvokeForEachMulti(rsc, s, slot, nullptr, 0, aout, usr, usrLen,
    837                                  sc);
    838   } else {
    839     const Allocation *ains[1] = {ain};
    840 
    841     rsovScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen, sc);
    842   }
    843 }
    844 
    845 void rsovScriptInvokeForEachMulti(const Context *rsc, Script *s, uint32_t slot,
    846                                   const Allocation **ains, size_t inLen,
    847                                   Allocation *aout, const void *usr,
    848                                   size_t usrLen, const RsScriptCall *sc) {
    849   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    850   cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc);
    851 }
    852 
    853 int rsovScriptInvokeRoot(const Context *dc, Script *s) {
    854   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    855   return cs->invokeRoot();
    856 }
    857 
    858 void rsovScriptInvokeInit(const Context *dc, Script *s) {
    859   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    860   cs->invokeInit();
    861 }
    862 
    863 void rsovScriptInvokeFreeChildren(const Context *dc, Script *s) {
    864   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    865   cs->invokeFreeChildren();
    866 }
    867 
    868 void rsovScriptInvokeFunction(const Context *dc, Script *s, uint32_t slot,
    869                               const void *params, size_t paramLength) {
    870   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    871   cs->invokeFunction(slot, params, paramLength);
    872 }
    873 
    874 void rsovScriptInvokeReduce(const Context *dc, Script *s, uint32_t slot,
    875                             const Allocation **ains, size_t inLen,
    876                             Allocation *aout, const RsScriptCall *sc) {
    877   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    878   cs->invokeReduce(slot, ains, inLen, aout, sc);
    879 }
    880 
    881 void rsovScriptSetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
    882                             void *data, size_t dataLength) {
    883   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    884   cs->setGlobalVar(slot, data, dataLength);
    885 }
    886 
    887 void rsovScriptGetGlobalVar(const Context *dc, const Script *s, uint32_t slot,
    888                             void *data, size_t dataLength) {
    889   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    890   cs->getGlobalVar(slot, data, dataLength);
    891 }
    892 
    893 void rsovScriptSetGlobalVarWithElemDims(
    894     const Context *dc, const Script *s, uint32_t slot, void *data,
    895     size_t dataLength, const android::renderscript::Element *elem,
    896     const uint32_t *dims, size_t dimLength) {
    897   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    898   cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength);
    899 }
    900 
    901 void rsovScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot,
    902                              Allocation *data) {
    903   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    904   cs->setGlobalBind(slot, data);
    905 }
    906 
    907 void rsovScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot,
    908                             ObjectBase *data) {
    909   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    910   cs->setGlobalObj(slot, data);
    911 }
    912 
    913 void rsovScriptDestroy(const Context *dc, Script *s) {
    914   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv;
    915   delete cs;
    916   s->mHal.drv = nullptr;
    917 }
    918 
    919 Allocation *rsovScriptGetAllocationForPointer(
    920     const android::renderscript::Context *dc,
    921     const android::renderscript::Script *sc, const void *ptr) {
    922   RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv;
    923   return cs->getAllocationForPointer(ptr);
    924 }
    925 
    926 void rsovScriptUpdateCachedObject(const Context *rsc, const Script *script,
    927                                   rs_script *obj) {
    928   obj->p = script;
    929 #ifdef __LP64__
    930   obj->unused1 = nullptr;
    931   obj->unused2 = nullptr;
    932   obj->unused3 = nullptr;
    933 #endif
    934 }
    935