1 /* 2 * Copyright (C) 2016 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsovScript.h" 18 19 #include "bcinfo/MetadataExtractor.h" 20 #include "module.h" 21 #include "rsContext.h" 22 #include "rsDefines.h" 23 #include "rsType.h" 24 #include "rsUtils.h" 25 #include "rsovAllocation.h" 26 #include "rsovContext.h" 27 #include "rsovCore.h" 28 #include "spirit/file_utils.h" 29 #include "spirit/instructions.h" 30 #include "spirit/module.h" 31 32 #include <fstream> 33 #include <functional> 34 #include <iostream> 35 #include <sstream> 36 #include <string> 37 38 extern "C" { 39 char* __GPUBlock = nullptr; 40 } 41 42 namespace android { 43 namespace renderscript { 44 namespace rsov { 45 46 namespace { 47 // Layout of this struct has to be the same as the struct in generated SPIR-V 48 // TODO: generate this file from some spec that is shared with the compiler 49 struct rsovTypeInfo { 50 uint32_t element_size; // TODO: not implemented 51 uint32_t x_size; 52 uint32_t y_size; 53 uint32_t z_size; 54 }; 55 56 const char *COMPILER_EXE_PATH = "/system/bin/rs2spirv"; 57 58 std::vector<const char *> setCompilerArgs(const char *bcFileName, 59 const char *cacheDir) { 60 rsAssert(bcFileName && cacheDir); 61 62 std::vector<const char *> args; 63 64 args.push_back(COMPILER_EXE_PATH); 65 args.push_back(bcFileName); 66 67 args.push_back(nullptr); 68 return args; 69 } 70 71 void writeBytes(const char *filename, const char *bytes, size_t size) { 72 std::ofstream ofs(filename, std::ios::binary); 73 ofs.write(bytes, size); 74 ofs.close(); 75 } 76 77 std::vector<uint32_t> readWords(const char *filename) { 78 std::ifstream ifs(filename, std::ios::binary); 79 80 ifs.seekg(0, ifs.end); 81 int length = ifs.tellg(); 82 ifs.seekg(0, ifs.beg); 83 84 rsAssert(((length & 3) == 0) && "File size expected to be multiples of 4"); 85 86 std::vector<uint32_t> spvWords(length / sizeof(uint32_t)); 87 88 ifs.read((char *)(spvWords.data()), length); 89 90 ifs.close(); 91 92 return spvWords; 93 } 94 95 std::vector<uint32_t> compileBitcode(const char *resName, const char *cacheDir, 96 const char *bitcode, size_t bitcodeSize, 97 std::vector<uint8_t> &modifiedBitcode) { 98 rsAssert(bitcode && bitcodeSize); 99 100 // TODO: Cache the generated code 101 102 std::string bcFileName(cacheDir); 103 bcFileName.append("/"); 104 bcFileName.append(resName); 105 bcFileName.append(".bc"); 106 107 writeBytes(bcFileName.c_str(), bitcode, bitcodeSize); 108 109 auto args = setCompilerArgs(bcFileName.c_str(), cacheDir); 110 111 if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) { 112 ALOGE("compiler command line failed"); 113 return std::vector<uint32_t>(); 114 } 115 116 ALOGV("compiler command line succeeded"); 117 118 std::string spvFileName(cacheDir); 119 spvFileName.append("/"); 120 spvFileName.append(resName); 121 spvFileName.append(".spv"); 122 123 std::string modifiedBCFileName(cacheDir); 124 modifiedBCFileName.append("/").append(resName).append("_modified.bc"); 125 126 args.pop_back(); 127 args.push_back("-bc"); 128 args.push_back(modifiedBCFileName.c_str()); 129 args.push_back(nullptr); 130 131 if (!rsuExecuteCommand(COMPILER_EXE_PATH, args.size() - 1, args.data())) { 132 ALOGE("compiler command line to create modified bitcode failed"); 133 return std::vector<uint32_t>(); 134 } 135 136 modifiedBitcode = android::spirit::readFile<uint8_t>(modifiedBCFileName); 137 138 return readWords(spvFileName.c_str()); 139 } 140 141 void splitOffsets(const std::string &str, char delimiter, 142 std::vector<uint32_t> *offsets) { 143 std::stringstream ss(str); 144 std::string tok; 145 146 while (std::getline(ss, tok, delimiter)) { 147 const uint32_t offset = static_cast<uint32_t>(std::stoi(tok)); 148 offsets->push_back(offset); 149 } 150 } 151 152 } // anonymous namespace 153 154 bool RSoVScript::isScriptCpuBacked(const Script *s) { 155 return s->mHal.info.mVersionMinor == CPU_SCRIPT_MAGIC_NUMBER; 156 } 157 158 void RSoVScript::initScriptOnCpu(Script *s, RsdCpuReference::CpuScript *cs) { 159 s->mHal.drv = cs; 160 s->mHal.info.mVersionMajor = 0; // Unused. Don't care. 161 s->mHal.info.mVersionMinor = CPU_SCRIPT_MAGIC_NUMBER; 162 } 163 164 void RSoVScript::initScriptOnRSoV(Script *s, RSoVScript *rsovScript) { 165 s->mHal.drv = rsovScript; 166 s->mHal.info.mVersionMajor = 0; // Unused. Don't care. 167 s->mHal.info.mVersionMinor = 0; 168 } 169 170 using android::spirit::Module; 171 using android::spirit::Deserialize; 172 173 RSoVScript::RSoVScript(RSoVContext *context, std::vector<uint32_t> &&spvWords, 174 bcinfo::MetadataExtractor *ME, 175 std::map<std::string, int> *GA2ID) 176 : mRSoV(context), 177 mDevice(context->getDevice()), 178 mSPIRVWords(std::move(spvWords)), 179 mME(ME), 180 mGlobalAllocationMetadata(nullptr), 181 mGAMapping(GA2ID) { 182 std::unique_ptr<Module> module(Deserialize<Module>(mSPIRVWords)); 183 184 const std::string &strGlobalSize = 185 module->findStringOfPrefix(".rsov.GlobalSize:"); 186 if (strGlobalSize.empty()) { 187 mGlobals.reset(new RSoVBuffer(context, 4)); 188 return; 189 } 190 const size_t colonPosSize = strGlobalSize.find(':'); 191 const std::string &strVal = strGlobalSize.substr(colonPosSize + 1); 192 const uint64_t globalSize = static_cast<uint64_t>(std::stol(strVal)); 193 if (globalSize > 0) { 194 mGlobals.reset(new RSoVBuffer(context, globalSize)); 195 __GPUBlock = mGlobals->getHostPtr(); 196 const std::string &offsetStr = 197 module->findStringOfPrefix(".rsov.ExportedVars:"); 198 const size_t colonPos = offsetStr.find(':'); 199 splitOffsets(offsetStr.substr(colonPos + 1), ';', &mExportedVarOffsets); 200 } 201 } 202 203 RSoVScript::~RSoVScript() { 204 delete mCpuScript; 205 delete mME; 206 } 207 208 void RSoVScript::populateScript(Script *) { 209 } 210 211 void RSoVScript::invokeFunction(uint32_t slot, const void *params, 212 size_t paramLength) { 213 getCpuScript()->invokeFunction(slot, params, paramLength); 214 } 215 216 int RSoVScript::invokeRoot() { return getCpuScript()->invokeRoot(); } 217 218 void RSoVScript::invokeForEach(uint32_t slot, const Allocation **ains, 219 uint32_t inLen, Allocation *aout, 220 const void *usr, uint32_t usrLen, 221 const RsScriptCall *sc) { 222 // TODO: Handle kernel without input Allocation 223 rsAssert(ains); 224 std::vector<RSoVAllocation *> inputAllocations(inLen); 225 for (uint32_t i = 0; i < inLen; ++i) { 226 inputAllocations[i] = static_cast<RSoVAllocation *>(ains[i]->mHal.drv); 227 } 228 RSoVAllocation *outputAllocation = 229 static_cast<RSoVAllocation *>(aout->mHal.drv); 230 runForEach(slot, inLen, inputAllocations, outputAllocation); 231 } 232 233 void RSoVScript::invokeReduce(uint32_t slot, const Allocation **ains, 234 uint32_t inLen, Allocation *aout, 235 const RsScriptCall *sc) { 236 getCpuScript()->invokeReduce(slot, ains, inLen, aout, sc); 237 } 238 239 void RSoVScript::invokeInit() { 240 getCpuScript()->invokeInit(); 241 } 242 243 void RSoVScript::invokeFreeChildren() { 244 // TODO: implement this 245 } 246 247 void RSoVScript::setGlobalVar(uint32_t slot, const void *data, 248 size_t dataLength) { 249 char *basePtr = mGlobals->getHostPtr(); 250 rsAssert(basePtr != nullptr); 251 const uint32_t offset = GetExportedVarOffset(slot); 252 memcpy(basePtr + offset, data, dataLength); 253 } 254 255 void RSoVScript::getGlobalVar(uint32_t slot, void *data, size_t dataLength) { 256 const char *basePtr = mGlobals->getHostPtr(); 257 rsAssert(basePtr != nullptr); 258 const uint32_t offset = GetExportedVarOffset(slot); 259 memcpy(data, basePtr + offset, dataLength); 260 } 261 262 void RSoVScript::setGlobalVarWithElemDims(uint32_t slot, const void *data, 263 size_t dataLength, const Element *elem, 264 const uint32_t *dims, 265 size_t dimLength) { 266 char *basePtr = mGlobals->getHostPtr(); 267 rsAssert(basePtr != nullptr); 268 const uint32_t offset = GetExportedVarOffset(slot); 269 char *destPtr = basePtr + offset; 270 271 // We want to look at dimension in terms of integer components, 272 // but dimLength is given in terms of bytes. 273 dimLength /= sizeof(int); 274 275 // Only a single dimension is currently supported. 276 rsAssert(dimLength == 1); 277 if (dimLength != 1) { 278 return; 279 } 280 281 // First do the increment loop. 282 size_t stride = elem->getSizeBytes(); 283 const char *cVal = reinterpret_cast<const char *>(data); 284 for (uint32_t i = 0; i < dims[0]; i++) { 285 elem->incRefs(cVal); 286 cVal += stride; 287 } 288 289 // Decrement loop comes after (to prevent race conditions). 290 char *oldVal = destPtr; 291 for (uint32_t i = 0; i < dims[0]; i++) { 292 elem->decRefs(oldVal); 293 oldVal += stride; 294 } 295 296 memcpy(destPtr, data, dataLength); 297 } 298 299 void RSoVScript::setGlobalBind(uint32_t slot, Allocation *data) { 300 ALOGV("%s succeeded.", __FUNCTION__); 301 // TODO: implement this 302 } 303 304 void RSoVScript::setGlobalObj(uint32_t slot, ObjectBase *obj) { 305 mCpuScript->setGlobalObj(slot, obj); 306 ALOGV("%s succeeded.", __FUNCTION__); 307 } 308 309 Allocation *RSoVScript::getAllocationForPointer(const void *ptr) const { 310 // TODO: implement this 311 return nullptr; 312 } 313 314 int RSoVScript::getGlobalEntries() const { 315 // TODO: implement this 316 return 0; 317 } 318 319 const char *RSoVScript::getGlobalName(int i) const { 320 // TODO: implement this 321 return nullptr; 322 } 323 324 const void *RSoVScript::getGlobalAddress(int i) const { 325 // TODO: implement this 326 return nullptr; 327 } 328 329 size_t RSoVScript::getGlobalSize(int i) const { 330 // TODO: implement this 331 return 0; 332 } 333 334 uint32_t RSoVScript::getGlobalProperties(int i) const { 335 // TODO: implement this 336 return 0; 337 } 338 339 void RSoVScript::InitDescriptorAndPipelineLayouts(uint32_t inLen) { 340 // TODO: kernels with zero output allocations 341 std::vector<VkDescriptorSetLayoutBinding> bindings( 342 inLen + 3, { 343 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 344 .descriptorCount = 1, 345 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT, 346 }); 347 for (uint32_t i = 0; i < inLen + 3; i++) { 348 bindings[i].binding = i; 349 } 350 351 VkDescriptorSetLayoutCreateInfo descriptor_layout = { 352 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, 353 .pNext = nullptr, 354 .flags = 0, 355 .bindingCount = inLen + 3, 356 .pBindings = bindings.data(), 357 }; 358 359 VkResult res; 360 361 mDescLayout.resize(NUM_DESCRIPTOR_SETS); 362 res = vkCreateDescriptorSetLayout(mDevice, &descriptor_layout, NULL, 363 mDescLayout.data()); 364 rsAssert(res == VK_SUCCESS); 365 366 /* Now use the descriptor layout to create a pipeline layout */ 367 VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo = { 368 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 369 .pNext = nullptr, 370 .pushConstantRangeCount = 0, 371 .pPushConstantRanges = nullptr, 372 .setLayoutCount = NUM_DESCRIPTOR_SETS, 373 .pSetLayouts = mDescLayout.data(), 374 }; 375 376 res = vkCreatePipelineLayout(mDevice, &pPipelineLayoutCreateInfo, NULL, 377 &mPipelineLayout); 378 rsAssert(res == VK_SUCCESS); 379 } 380 381 void RSoVScript::InitShader(uint32_t slot) { 382 VkResult res; 383 384 mShaderStage.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 385 mShaderStage.pNext = nullptr; 386 mShaderStage.pSpecializationInfo = nullptr; 387 mShaderStage.flags = 0; 388 mShaderStage.stage = VK_SHADER_STAGE_COMPUTE_BIT; 389 390 const char **RSKernelNames = mME->getExportForEachNameList(); 391 size_t RSKernelNum = mME->getExportForEachSignatureCount(); 392 rsAssert(slot < RSKernelNum); 393 rsAssert(RSKernelNames); 394 rsAssert(RSKernelNames[slot]); 395 // ALOGV("slot = %d kernel name = %s", slot, RSKernelNames[slot]); 396 std::string entryName("entry_"); 397 entryName.append(RSKernelNames[slot]); 398 399 mShaderStage.pName = strndup(entryName.c_str(), entryName.size()); 400 401 VkShaderModuleCreateInfo moduleCreateInfo = { 402 .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO, 403 .pNext = nullptr, 404 .flags = 0, 405 .codeSize = mSPIRVWords.size() * sizeof(unsigned int), 406 .pCode = mSPIRVWords.data(), 407 }; 408 res = vkCreateShaderModule(mDevice, &moduleCreateInfo, NULL, 409 &mShaderStage.module); 410 rsAssert(res == VK_SUCCESS); 411 } 412 413 void RSoVScript::InitDescriptorPool(uint32_t inLen) { 414 VkResult res; 415 // 1 global buffer, 1 global allocation metadata buffer, 1 output allocation, 416 // and inLen input allocations 417 VkDescriptorPoolSize type_count[] = {{ 418 .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, .descriptorCount = inLen + 3, 419 }}; 420 421 VkDescriptorPoolCreateInfo descriptor_pool = { 422 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 423 .pNext = nullptr, 424 .maxSets = 1, 425 .poolSizeCount = NELEM(type_count), 426 .pPoolSizes = type_count, 427 }; 428 429 res = vkCreateDescriptorPool(mDevice, &descriptor_pool, NULL, &mDescPool); 430 rsAssert(res == VK_SUCCESS); 431 } 432 433 // Iterate through a list of global allocations that are used inside the module 434 // and marshal their type information to a dedicated Vulkan Buffer 435 void RSoVScript::MarshalTypeInfo(void) { 436 // Marshal global allocation metadata to the device 437 auto *cs = getCpuScript(); 438 int nr_globals = mGAMapping->size(); 439 if (mGlobalAllocationMetadata == nullptr) { 440 mGlobalAllocationMetadata.reset( 441 new RSoVBuffer(mRSoV, sizeof(struct rsovTypeInfo) * nr_globals)); 442 } 443 struct rsovTypeInfo *mappedMetadata = 444 (struct rsovTypeInfo *)mGlobalAllocationMetadata->getHostPtr(); 445 for (int i = 0; i < nr_globals; ++i) { 446 if (getGlobalRsType(cs->getGlobalProperties(i)) == 447 RsDataType::RS_TYPE_ALLOCATION) { 448 ALOGV("global variable %d is an allocation!", i); 449 const void *host_buf; 450 cs->getGlobalVar(i, (void *)&host_buf, sizeof(host_buf)); 451 if (!host_buf) continue; 452 const android::renderscript::Allocation *GA = 453 static_cast<const android::renderscript::Allocation *>(host_buf); 454 const android::renderscript::Type *T = GA->getType(); 455 rsAssert(T); 456 457 auto global_it = mGAMapping->find(cs->getGlobalName(i)); 458 rsAssert(global_it != (*mGAMapping).end()); 459 int id = global_it->second; 460 ALOGV("global allocation %s is mapped to ID %d", cs->getGlobalName(i), 461 id); 462 // TODO: marshal other properties 463 mappedMetadata[id].x_size = T->getDimX(); 464 mappedMetadata[id].y_size = T->getDimY(); 465 mappedMetadata[id].z_size = T->getDimZ(); 466 } 467 } 468 } 469 470 void RSoVScript::InitDescriptorSet( 471 const std::vector<RSoVAllocation *> &inputAllocations, 472 RSoVAllocation *outputAllocation) { 473 VkResult res; 474 475 VkDescriptorSetAllocateInfo alloc_info = { 476 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 477 .pNext = NULL, 478 .descriptorPool = mDescPool, 479 .descriptorSetCount = NUM_DESCRIPTOR_SETS, 480 .pSetLayouts = mDescLayout.data(), 481 }; 482 483 mDescSet.resize(NUM_DESCRIPTOR_SETS); 484 res = vkAllocateDescriptorSets(mDevice, &alloc_info, mDescSet.data()); 485 rsAssert(res == VK_SUCCESS); 486 487 std::vector<VkWriteDescriptorSet> writes{ 488 // Global variables 489 { 490 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 491 .dstSet = mDescSet[0], 492 .dstBinding = 0, 493 .dstArrayElement = 0, 494 .descriptorCount = 1, 495 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 496 .pBufferInfo = mGlobals->getBufferInfo(), 497 }, 498 // Metadata for global Allocations 499 { 500 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 501 .dstSet = mDescSet[0], 502 .dstBinding = 1, 503 .dstArrayElement = 0, 504 .descriptorCount = 1, 505 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 506 .pBufferInfo = mGlobalAllocationMetadata->getBufferInfo(), 507 }, 508 // Output Allocation 509 { 510 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 511 .dstSet = mDescSet[0], 512 .dstBinding = 2, 513 .dstArrayElement = 0, 514 .descriptorCount = 1, 515 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 516 .pBufferInfo = outputAllocation->getBuffer()->getBufferInfo(), 517 }, 518 }; 519 520 // Input Allocations 521 for (uint32_t i = 0; i < inputAllocations.size(); ++i) { 522 writes.push_back({ 523 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, 524 .dstSet = mDescSet[0], 525 .dstBinding = 3 + i, // input allocations start from binding #3 526 .dstArrayElement = 0, 527 .descriptorCount = 1, 528 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 529 .pBufferInfo = inputAllocations[i]->getBuffer()->getBufferInfo(), 530 }); 531 } 532 533 vkUpdateDescriptorSets(mDevice, writes.size(), writes.data(), 0, NULL); 534 } 535 536 void RSoVScript::InitPipeline() { 537 // DEPENDS on mShaderStage, i.e., InitShader() 538 539 VkResult res; 540 541 VkComputePipelineCreateInfo pipeline_info = { 542 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 543 .pNext = nullptr, 544 .layout = mPipelineLayout, 545 .basePipelineHandle = VK_NULL_HANDLE, 546 .basePipelineIndex = 0, 547 .flags = 0, 548 .stage = mShaderStage, 549 }; 550 res = vkCreateComputePipelines(mDevice, VK_NULL_HANDLE, 1, &pipeline_info, 551 NULL, &mComputePipeline); 552 rsAssert(res == VK_SUCCESS); 553 } 554 555 void RSoVScript::runForEach( 556 uint32_t slot, uint32_t inLen, 557 const std::vector<RSoVAllocation *> &inputAllocations, 558 RSoVAllocation *outputAllocation) { 559 VkResult res; 560 561 InitShader(slot); 562 InitDescriptorPool(inLen); 563 InitDescriptorAndPipelineLayouts(inLen); 564 MarshalTypeInfo(); 565 InitDescriptorSet(inputAllocations, outputAllocation); 566 // InitPipelineCache(); 567 InitPipeline(); 568 569 VkCommandBuffer cmd; 570 571 VkCommandBufferAllocateInfo cmd_info = { 572 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 573 .pNext = nullptr, 574 .commandPool = mRSoV->getCmdPool(), 575 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, 576 .commandBufferCount = 1, 577 }; 578 579 res = vkAllocateCommandBuffers(mDevice, &cmd_info, &cmd); 580 rsAssert(res == VK_SUCCESS); 581 582 VkCommandBufferBeginInfo cmd_buf_info = { 583 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 584 .pNext = nullptr, 585 .flags = 0, 586 .pInheritanceInfo = nullptr, 587 }; 588 589 res = vkBeginCommandBuffer(cmd, &cmd_buf_info); 590 rsAssert(res == VK_SUCCESS); 591 592 vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mComputePipeline); 593 594 vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, mPipelineLayout, 595 0, mDescSet.size(), mDescSet.data(), 0, nullptr); 596 // Assuming all input allocations are of the same dimensionality 597 const uint32_t width = inputAllocations[0]->getWidth(); 598 const uint32_t height = rsMax(inputAllocations[0]->getHeight(), 1U); 599 const uint32_t depth = rsMax(inputAllocations[0]->getDepth(), 1U); 600 vkCmdDispatch(cmd, width, height, depth); 601 602 res = vkEndCommandBuffer(cmd); 603 assert(res == VK_SUCCESS); 604 605 VkSubmitInfo submit_info = { 606 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 607 .commandBufferCount = 1, 608 .pCommandBuffers = &cmd, 609 }; 610 611 VkFence fence; 612 613 VkFenceCreateInfo fenceInfo = { 614 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, 615 .pNext = nullptr, 616 .flags = 0, 617 }; 618 619 vkCreateFence(mDevice, &fenceInfo, NULL, &fence); 620 621 vkQueueSubmit(mRSoV->getQueue(), 1, &submit_info, fence); 622 623 // Make sure command buffer is finished 624 do { 625 res = vkWaitForFences(mDevice, 1, &fence, VK_TRUE, 100000); 626 } while (res == VK_TIMEOUT); 627 628 rsAssert(res == VK_SUCCESS); 629 630 vkDestroyFence(mDevice, fence, NULL); 631 632 // TODO: shall we reuse command buffers? 633 VkCommandBuffer cmd_bufs[] = {cmd}; 634 vkFreeCommandBuffers(mDevice, mRSoV->getCmdPool(), 1, cmd_bufs); 635 636 vkDestroyPipeline(mDevice, mComputePipeline, nullptr); 637 for (int i = 0; i < NUM_DESCRIPTOR_SETS; i++) 638 vkDestroyDescriptorSetLayout(mDevice, mDescLayout[i], nullptr); 639 vkDestroyPipelineLayout(mDevice, mPipelineLayout, nullptr); 640 vkFreeDescriptorSets(mDevice, mDescPool, NUM_DESCRIPTOR_SETS, 641 mDescSet.data()); 642 vkDestroyDescriptorPool(mDevice, mDescPool, nullptr); 643 free((void *)mShaderStage.pName); 644 vkDestroyShaderModule(mDevice, mShaderStage.module, nullptr); 645 } 646 647 } // namespace rsov 648 } // namespace renderscript 649 } // namespace android 650 651 using android::renderscript::Allocation; 652 using android::renderscript::Context; 653 using android::renderscript::Element; 654 using android::renderscript::ObjectBase; 655 using android::renderscript::RsdCpuReference; 656 using android::renderscript::Script; 657 using android::renderscript::ScriptC; 658 using android::renderscript::rs_script; 659 using android::renderscript::rsov::RSoVContext; 660 using android::renderscript::rsov::RSoVScript; 661 using android::renderscript::rsov::compileBitcode; 662 663 namespace { 664 // A class to parse global allocation metadata; essentially a subset of JSON 665 // it would look like {"__RSoV_GA": {"g":42}} 666 // The result is stored in a refence to a map<string, int> 667 class ParseMD { 668 public: 669 ParseMD(std::string s, std::map<std::string, int> &map) 670 : mString(s), mMapping(map) {} 671 672 bool parse(void) { 673 // remove outermose two pairs of braces 674 mString = removeBraces(mString); 675 if (mString.empty()) { 676 return false; 677 } 678 679 mString = removeBraces(mString); 680 if (mString.empty()) { 681 return false; 682 } 683 684 // Now we are supposed to have a comma-separated list that looks like: 685 // "foo":42, "bar":56 686 split<','>(mString, [&](auto s) { 687 split<':'>(s, nullptr, [&](auto pair) { 688 rsAssert(pair.size() == 2); 689 std::string ga_name = removeQuotes(pair[0]); 690 int id = atoi(pair[1].c_str()); 691 ALOGV("ParseMD: global allocation %s has ID %d", ga_name.c_str(), id); 692 mMapping[ga_name] = id; 693 }); 694 }); 695 return true; 696 } 697 698 private: 699 template <char L, char R> 700 static std::string removeMatching(const std::string &s) { 701 auto leftCBrace = s.find(L); 702 if (leftCBrace == std::string::npos) { 703 return ""; 704 } 705 leftCBrace++; 706 return s.substr(leftCBrace, s.rfind(R) - leftCBrace); 707 } 708 709 static std::string removeBraces(const std::string &s) { 710 return removeMatching<'{', '}'>(s); 711 } 712 713 static std::string removeQuotes(const std::string &s) { 714 return removeMatching<'"', '"'>(s); 715 } 716 717 // Splitting a string, and call "each" and/or "all" with individal elements 718 // and a vector of all tokenized elements 719 template <char D> 720 static void split(const std::string &s, 721 std::function<void(const std::string &)> each, 722 std::function<void(const std::vector<const std::string> &)> 723 all = nullptr) { 724 std::vector<const std::string> result; 725 for (std::string::size_type pos = 0; pos < s.size(); pos++) { 726 std::string::size_type begin = pos; 727 728 while (pos <= s.size() && s[pos] != D) pos++; 729 std::string found = s.substr(begin, pos - begin); 730 if (each) each(found); 731 if (all) result.push_back(found); 732 } 733 if (all) all(result); 734 } 735 736 std::string mString; 737 std::map<std::string, int> &mMapping; 738 }; 739 740 } // namespace 741 742 class ExtractRSoVMD : public android::spirit::DoNothingVisitor { 743 public: 744 ExtractRSoVMD() : mGAMapping(new std::map<std::string, int>) {} 745 746 void visit(android::spirit::StringInst *s) { 747 ALOGV("ExtractRSoVMD: string = %s", s->mOperand1.c_str()); 748 std::map<std::string, int> mapping; 749 ParseMD p(s->mOperand1, mapping); 750 if (p.parse()) { 751 *mGAMapping = std::move(mapping); 752 } 753 } 754 755 std::map<std::string, int> *takeMapping(void) { return mGAMapping.release(); } 756 757 private: 758 std::unique_ptr<std::map<std::string, int> > mGAMapping; 759 }; 760 761 bool rsovScriptInit(const Context *rsc, ScriptC *script, char const *resName, 762 char const *cacheDir, uint8_t const *bitcode, 763 size_t bitcodeSize, uint32_t flags) { 764 RSoVHal *hal = static_cast<RSoVHal *>(rsc->mHal.drv); 765 766 std::unique_ptr<bcinfo::MetadataExtractor> bitcodeMetadata( 767 new bcinfo::MetadataExtractor((const char *)bitcode, bitcodeSize)); 768 if (!bitcodeMetadata || !bitcodeMetadata->extract()) { 769 ALOGE("Could not extract metadata from bitcode from %s", resName); 770 return false; 771 } 772 773 std::vector<uint8_t> modifiedBitcode; 774 auto spvWords = 775 compileBitcode(resName, cacheDir, (const char *)bitcode, bitcodeSize, modifiedBitcode); 776 if (!spvWords.empty() && !modifiedBitcode.empty()) { 777 // Extract compiler metadata on allocation->binding mapping 778 android::spirit::Module *module = 779 android::spirit::Deserialize<android::spirit::Module>(spvWords); 780 rsAssert(module); 781 ExtractRSoVMD ga_md; 782 module->accept(&ga_md); 783 784 RSoVScript *rsovScript = 785 new RSoVScript(hal->mRSoV, std::move(spvWords), 786 bitcodeMetadata.release(), ga_md.takeMapping()); 787 if (rsovScript) { 788 std::string modifiedResName(resName); 789 modifiedResName.append("_modified"); 790 RsdCpuReference::CpuScript *cs = hal->mCpuRef->createScript( 791 script, modifiedResName.c_str(), cacheDir, modifiedBitcode.data(), 792 modifiedBitcode.size(), flags); 793 if (cs != nullptr) { 794 cs->populateScript(script); 795 rsovScript->setCpuScript(cs); 796 RSoVScript::initScriptOnRSoV(script, rsovScript); 797 return true; 798 } 799 } 800 } 801 802 ALOGD("Failed creating an RSoV script for %s", resName); 803 // Fall back to CPU driver instead 804 805 std::unique_ptr<RsdCpuReference::CpuScript> cs(hal->mCpuRef->createScript( 806 script, resName, cacheDir, bitcode, bitcodeSize, flags)); 807 if (cs == nullptr) { 808 ALOGE("Failed creating a CPU script %p for %s (%p)", cs.get(), resName, 809 script); 810 return false; 811 } 812 cs->populateScript(script); 813 814 RSoVScript::initScriptOnCpu(script, cs.release()); 815 816 return true; 817 } 818 819 bool rsovInitIntrinsic(const Context *rsc, Script *s, RsScriptIntrinsicID iid, 820 Element *e) { 821 RSoVHal *dc = (RSoVHal *)rsc->mHal.drv; 822 RsdCpuReference::CpuScript *cs = dc->mCpuRef->createIntrinsic(s, iid, e); 823 if (cs == nullptr) { 824 return false; 825 } 826 s->mHal.drv = cs; 827 cs->populateScript(s); 828 return true; 829 } 830 831 void rsovScriptInvokeForEach(const Context *rsc, Script *s, uint32_t slot, 832 const Allocation *ain, Allocation *aout, 833 const void *usr, size_t usrLen, 834 const RsScriptCall *sc) { 835 if (ain == nullptr) { 836 rsovScriptInvokeForEachMulti(rsc, s, slot, nullptr, 0, aout, usr, usrLen, 837 sc); 838 } else { 839 const Allocation *ains[1] = {ain}; 840 841 rsovScriptInvokeForEachMulti(rsc, s, slot, ains, 1, aout, usr, usrLen, sc); 842 } 843 } 844 845 void rsovScriptInvokeForEachMulti(const Context *rsc, Script *s, uint32_t slot, 846 const Allocation **ains, size_t inLen, 847 Allocation *aout, const void *usr, 848 size_t usrLen, const RsScriptCall *sc) { 849 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 850 cs->invokeForEach(slot, ains, inLen, aout, usr, usrLen, sc); 851 } 852 853 int rsovScriptInvokeRoot(const Context *dc, Script *s) { 854 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 855 return cs->invokeRoot(); 856 } 857 858 void rsovScriptInvokeInit(const Context *dc, Script *s) { 859 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 860 cs->invokeInit(); 861 } 862 863 void rsovScriptInvokeFreeChildren(const Context *dc, Script *s) { 864 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 865 cs->invokeFreeChildren(); 866 } 867 868 void rsovScriptInvokeFunction(const Context *dc, Script *s, uint32_t slot, 869 const void *params, size_t paramLength) { 870 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 871 cs->invokeFunction(slot, params, paramLength); 872 } 873 874 void rsovScriptInvokeReduce(const Context *dc, Script *s, uint32_t slot, 875 const Allocation **ains, size_t inLen, 876 Allocation *aout, const RsScriptCall *sc) { 877 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 878 cs->invokeReduce(slot, ains, inLen, aout, sc); 879 } 880 881 void rsovScriptSetGlobalVar(const Context *dc, const Script *s, uint32_t slot, 882 void *data, size_t dataLength) { 883 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 884 cs->setGlobalVar(slot, data, dataLength); 885 } 886 887 void rsovScriptGetGlobalVar(const Context *dc, const Script *s, uint32_t slot, 888 void *data, size_t dataLength) { 889 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 890 cs->getGlobalVar(slot, data, dataLength); 891 } 892 893 void rsovScriptSetGlobalVarWithElemDims( 894 const Context *dc, const Script *s, uint32_t slot, void *data, 895 size_t dataLength, const android::renderscript::Element *elem, 896 const uint32_t *dims, size_t dimLength) { 897 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 898 cs->setGlobalVarWithElemDims(slot, data, dataLength, elem, dims, dimLength); 899 } 900 901 void rsovScriptSetGlobalBind(const Context *dc, const Script *s, uint32_t slot, 902 Allocation *data) { 903 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 904 cs->setGlobalBind(slot, data); 905 } 906 907 void rsovScriptSetGlobalObj(const Context *dc, const Script *s, uint32_t slot, 908 ObjectBase *data) { 909 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 910 cs->setGlobalObj(slot, data); 911 } 912 913 void rsovScriptDestroy(const Context *dc, Script *s) { 914 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)s->mHal.drv; 915 delete cs; 916 s->mHal.drv = nullptr; 917 } 918 919 Allocation *rsovScriptGetAllocationForPointer( 920 const android::renderscript::Context *dc, 921 const android::renderscript::Script *sc, const void *ptr) { 922 RsdCpuReference::CpuScript *cs = (RsdCpuReference::CpuScript *)sc->mHal.drv; 923 return cs->getAllocationForPointer(ptr); 924 } 925 926 void rsovScriptUpdateCachedObject(const Context *rsc, const Script *script, 927 rs_script *obj) { 928 obj->p = script; 929 #ifdef __LP64__ 930 obj->unused1 = nullptr; 931 obj->unused2 = nullptr; 932 obj->unused3 = nullptr; 933 #endif 934 } 935