1 /* 2 * Copyright (C) 2011-2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsCpuCore.h" 18 #include "rsCpuScript.h" 19 #include "rsCpuExecutable.h" 20 21 #ifdef RS_COMPATIBILITY_LIB 22 #include <stdio.h> 23 #include <sys/stat.h> 24 #include <unistd.h> 25 #else 26 #include "rsCppUtils.h" 27 28 #include <bcc/Config.h> 29 #include <bcinfo/MetadataExtractor.h> 30 31 #include <zlib.h> 32 #include <sys/file.h> 33 #include <sys/types.h> 34 #include <unistd.h> 35 36 #include <string> 37 #include <vector> 38 #endif 39 40 #include <set> 41 #include <string> 42 #include <dlfcn.h> 43 #include <stdlib.h> 44 #include <string.h> 45 #include <iostream> 46 #include <sstream> 47 48 namespace { 49 50 static const bool kDebugGlobalVariables = false; 51 52 static bool allocationLODIsNull(const android::renderscript::Allocation *alloc) { 53 // Even if alloc != nullptr, mallocPtr could be null if 54 // IO_OUTPUT/IO_INPUT with no bound surface. 55 return alloc && alloc->mHal.drvState.lod[0].mallocPtr == nullptr; 56 } 57 58 #ifndef RS_COMPATIBILITY_LIB 59 60 static void setCompileArguments(std::vector<const char*>* args, 61 const std::string& bcFileName, 62 const char* cacheDir, const char* resName, 63 const char* core_lib, bool useRSDebugContext, 64 const char* bccPluginName, bool emitGlobalInfo, 65 int optLevel, bool emitGlobalInfoSkipConstant) { 66 rsAssert(cacheDir && resName && core_lib); 67 args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH); 68 args->push_back("-unroll-runtime"); 69 args->push_back("-scalarize-load-store"); 70 if (emitGlobalInfo) { 71 args->push_back("-rs-global-info"); 72 if (emitGlobalInfoSkipConstant) { 73 args->push_back("-rs-global-info-skip-constant"); 74 } 75 } 76 args->push_back("-o"); 77 args->push_back(resName); 78 args->push_back("-output_path"); 79 args->push_back(cacheDir); 80 args->push_back("-bclib"); 81 args->push_back(core_lib); 82 args->push_back("-mtriple"); 83 args->push_back(DEFAULT_TARGET_TRIPLE_STRING); 84 args->push_back("-O"); 85 86 switch (optLevel) { 87 case 0: 88 args->push_back("0"); 89 break; 90 case 3: 91 args->push_back("3"); 92 break; 93 default: 94 ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel); 95 args->push_back("3"); 96 break; 97 } 98 99 // Enable workaround for A53 codegen by default. 100 #if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND) 101 args->push_back("-aarch64-fix-cortex-a53-835769"); 102 #endif 103 104 // Execute the bcc compiler. 105 if (useRSDebugContext) { 106 args->push_back("-rs-debug-ctx"); 107 } else { 108 // Only load additional libraries for compiles that don't use 109 // the debug context. 110 if (bccPluginName && strlen(bccPluginName) > 0) { 111 args->push_back("-load"); 112 args->push_back(bccPluginName); 113 } 114 } 115 116 args->push_back("-fPIC"); 117 args->push_back("-embedRSInfo"); 118 119 args->push_back(bcFileName.c_str()); 120 args->push_back(nullptr); 121 } 122 123 static bool compileBitcode(const std::string &bcFileName, 124 const char *bitcode, 125 size_t bitcodeSize, 126 std::vector<const char *> &compileArguments) { 127 rsAssert(bitcode && bitcodeSize); 128 129 FILE *bcfile = fopen(bcFileName.c_str(), "w"); 130 if (!bcfile) { 131 ALOGE("Could not write to %s", bcFileName.c_str()); 132 return false; 133 } 134 size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile); 135 fclose(bcfile); 136 if (nwritten != bitcodeSize) { 137 ALOGE("Could not write %zu bytes to %s", bitcodeSize, 138 bcFileName.c_str()); 139 return false; 140 } 141 142 return android::renderscript::rsuExecuteCommand( 143 android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH, 144 compileArguments.size()-1, compileArguments.data()); 145 } 146 147 // The checksum is unnecessary under a few conditions, since the primary 148 // use-case for it is debugging. If we are loading something from the 149 // system partition (read-only), we know that it was precompiled as part of 150 // application ahead of time (and thus the checksum is completely 151 // unnecessary). The checksum is also unnecessary on release (non-debug) 152 // builds, as the only way to get a shared object is to have compiled the 153 // script once already. On a release build, there is no way to adjust the 154 // other libraries/dependencies, and so the only reason to recompile would 155 // be for a source APK change or an OTA. In either case, the APK would be 156 // reinstalled, which would already clear the code_cache/ directory. 157 bool isChecksumNeeded(const char *cacheDir) { 158 if ((::strcmp(SYSLIBPATH, cacheDir) == 0) || 159 (::strcmp(SYSLIBPATH_VENDOR, cacheDir) == 0)) 160 return false; 161 char buf[PROP_VALUE_MAX]; 162 android::renderscript::property_get("ro.debuggable", buf, ""); 163 return (buf[0] == '1'); 164 } 165 166 bool addFileToChecksum(const char *fileName, uint32_t &checksum) { 167 int FD = open(fileName, O_RDONLY); 168 if (FD == -1) { 169 ALOGE("Cannot open file \'%s\' to compute checksum", fileName); 170 return false; 171 } 172 173 char buf[256]; 174 while (true) { 175 ssize_t nread = read(FD, buf, sizeof(buf)); 176 if (nread < 0) { // bail out on failed read 177 ALOGE("Error while computing checksum for file \'%s\'", fileName); 178 return false; 179 } 180 181 checksum = adler32(checksum, (const unsigned char *) buf, nread); 182 if (static_cast<size_t>(nread) < sizeof(buf)) // EOF 183 break; 184 } 185 186 if (close(FD) != 0) { 187 ALOGE("Cannot close file \'%s\' after computing checksum", fileName); 188 return false; 189 } 190 return true; 191 } 192 193 #endif // !defined(RS_COMPATIBILITY_LIB) 194 } // namespace 195 196 namespace android { 197 namespace renderscript { 198 199 #ifndef RS_COMPATIBILITY_LIB 200 201 uint32_t constructBuildChecksum(uint8_t const *bitcode, size_t bitcodeSize, 202 const char *commandLine, 203 const char** bccFiles, size_t numFiles) { 204 uint32_t checksum = adler32(0L, Z_NULL, 0); 205 206 // include checksum of bitcode 207 if (bitcode != nullptr && bitcodeSize > 0) { 208 checksum = adler32(checksum, bitcode, bitcodeSize); 209 } 210 211 // include checksum of command line arguments 212 checksum = adler32(checksum, (const unsigned char *) commandLine, 213 strlen(commandLine)); 214 215 // include checksum of bccFiles 216 for (size_t i = 0; i < numFiles; i++) { 217 const char* bccFile = bccFiles[i]; 218 if (bccFile[0] != 0 && !addFileToChecksum(bccFile, checksum)) { 219 // return empty checksum instead of something partial/corrupt 220 return 0; 221 } 222 } 223 224 return checksum; 225 } 226 227 #endif // !RS_COMPATIBILITY_LIB 228 229 RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) { 230 mCtx = ctx; 231 mScript = s; 232 233 mScriptSO = nullptr; 234 235 mRoot = nullptr; 236 mRootExpand = nullptr; 237 mInit = nullptr; 238 mFreeChildren = nullptr; 239 mScriptExec = nullptr; 240 241 mBoundAllocs = nullptr; 242 mIntrinsicData = nullptr; 243 mIsThreadable = true; 244 245 mBuildChecksum = 0; 246 mChecksumNeeded = false; 247 } 248 249 bool RsdCpuScriptImpl::storeRSInfoFromSO() { 250 // The shared object may have an invalid build checksum. 251 // Validate and fail early. 252 mScriptExec = ScriptExecutable::createFromSharedObject( 253 mScriptSO, mChecksumNeeded ? mBuildChecksum : 0); 254 255 if (mScriptExec == nullptr) { 256 return false; 257 } 258 259 mRoot = (RootFunc_t) dlsym(mScriptSO, "root"); 260 if (mRoot) { 261 //ALOGE("Found root(): %p", mRoot); 262 } 263 mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand"); 264 if (mRootExpand) { 265 //ALOGE("Found root.expand(): %p", mRootExpand); 266 } 267 mInit = (InitOrDtorFunc_t) dlsym(mScriptSO, "init"); 268 if (mInit) { 269 //ALOGE("Found init(): %p", mInit); 270 } 271 mFreeChildren = (InitOrDtorFunc_t) dlsym(mScriptSO, ".rs.dtor"); 272 if (mFreeChildren) { 273 //ALOGE("Found .rs.dtor(): %p", mFreeChildren); 274 } 275 276 size_t varCount = mScriptExec->getExportedVariableCount(); 277 if (varCount > 0) { 278 mBoundAllocs = new Allocation *[varCount]; 279 memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs)); 280 } 281 282 mIsThreadable = mScriptExec->getThreadable(); 283 //ALOGE("Script isThreadable? %d", mIsThreadable); 284 285 if (kDebugGlobalVariables) { 286 mScriptExec->dumpGlobalInfo(); 287 } 288 289 return true; 290 } 291 292 bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir, 293 uint8_t const *bitcode, size_t bitcodeSize, 294 uint32_t flags, char const *bccPluginName) { 295 //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, 296 // bitcode, bitcodeSize, flags, lookupFunc); 297 //ALOGE("rsdScriptInit %p %p", rsc, script); 298 299 mCtx->lockMutex(); 300 #ifndef RS_COMPATIBILITY_LIB 301 bool useRSDebugContext = false; 302 303 bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize); 304 if (!bitcodeMetadata.extract()) { 305 ALOGE("Could not extract metadata from bitcode"); 306 mCtx->unlockMutex(); 307 return false; 308 } 309 310 const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize); 311 312 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 313 useRSDebugContext = true; 314 } 315 316 int optLevel = mCtx->getContext()->getOptLevel(); 317 318 std::string bcFileName(cacheDir); 319 bcFileName.append("/"); 320 bcFileName.append(resName); 321 bcFileName.append(".bc"); 322 323 std::vector<const char*> compileArguments; 324 bool emitGlobalInfo = mCtx->getEmbedGlobalInfo(); 325 bool emitGlobalInfoSkipConstant = mCtx->getEmbedGlobalInfoSkipConstant(); 326 setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib, 327 useRSDebugContext, bccPluginName, emitGlobalInfo, 328 optLevel, emitGlobalInfoSkipConstant); 329 330 mChecksumNeeded = isChecksumNeeded(cacheDir); 331 if (mChecksumNeeded) { 332 std::vector<const char *> bccFiles = { BCC_EXE_PATH, 333 core_lib, 334 }; 335 336 // The last argument of compileArguments is a nullptr, so remove 1 from 337 // the size. 338 std::unique_ptr<const char> compileCommandLine( 339 rsuJoinStrings(compileArguments.size()-1, compileArguments.data())); 340 341 mBuildChecksum = constructBuildChecksum(bitcode, bitcodeSize, 342 compileCommandLine.get(), 343 bccFiles.data(), bccFiles.size()); 344 345 if (mBuildChecksum == 0) { 346 // cannot compute checksum but verification is enabled 347 mCtx->unlockMutex(); 348 return false; 349 } 350 } 351 else { 352 // add a dummy/constant as a checksum if verification is disabled 353 mBuildChecksum = 0xabadcafe; 354 } 355 356 // Append build checksum to commandline 357 // Handle the terminal nullptr in compileArguments 358 compileArguments.pop_back(); 359 compileArguments.push_back("-build-checksum"); 360 std::stringstream ss; 361 ss << std::hex << mBuildChecksum; 362 std::string checksumStr(ss.str()); 363 compileArguments.push_back(checksumStr.c_str()); 364 compileArguments.push_back(nullptr); 365 366 const bool reuse = !is_force_recompile() && !useRSDebugContext; 367 if (reuse) { 368 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 369 370 // Read RS info from the shared object to detect checksum mismatch 371 if (mScriptSO != nullptr && !storeRSInfoFromSO()) { 372 dlclose(mScriptSO); 373 mScriptSO = nullptr; 374 } 375 } 376 377 // If reuse is desired and we can't, it's either not there or out of date. 378 // We compile the bit code and try loading again. 379 if (mScriptSO == nullptr) { 380 if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize, 381 compileArguments)) 382 { 383 ALOGE("bcc: FAILS to compile '%s'", resName); 384 mCtx->unlockMutex(); 385 return false; 386 } 387 388 std::string SOPath; 389 390 if (!SharedLibraryUtils::createSharedLibrary( 391 mCtx->getContext()->getDriverName(), cacheDir, resName, reuse, 392 &SOPath)) { 393 ALOGE("Linker: Failed to link object file '%s'", resName); 394 mCtx->unlockMutex(); 395 return false; 396 } 397 398 if (reuse) { 399 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName); 400 } else { 401 mScriptSO = SharedLibraryUtils::loadAndDeleteSharedLibrary(SOPath.c_str()); 402 } 403 if (mScriptSO == nullptr) { 404 ALOGE("Unable to load '%s'", resName); 405 mCtx->unlockMutex(); 406 return false; 407 } 408 409 // Read RS symbol information from the .so. 410 if (!storeRSInfoFromSO()) { 411 goto error; 412 } 413 } 414 415 mBitcodeFilePath.assign(bcFileName.c_str()); 416 417 #else // RS_COMPATIBILITY_LIB is defined 418 const char *nativeLibDir = mCtx->getContext()->getNativeLibDir(); 419 mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir); 420 421 if (!mScriptSO) { 422 goto error; 423 } 424 425 if (!storeRSInfoFromSO()) { 426 goto error; 427 } 428 #endif 429 mCtx->unlockMutex(); 430 return true; 431 432 error: 433 434 mCtx->unlockMutex(); 435 if (mScriptSO) { 436 dlclose(mScriptSO); 437 mScriptSO = nullptr; 438 } 439 return false; 440 } 441 442 #ifndef RS_COMPATIBILITY_LIB 443 444 const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode, 445 size_t bitcodeSize) { 446 const char* defaultLib = SYSLIBPATH_BC"/libclcore.bc"; 447 448 // If we're debugging, use the debug library. 449 if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) { 450 return SYSLIBPATH_BC"/libclcore_debug.bc"; 451 } 452 453 if (ME.hasDebugInfo()) { 454 return SYSLIBPATH_BC"/libclcore_g.bc"; 455 } 456 457 // If a callback has been registered to specify a library, use that. 458 RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback(); 459 if (selectRTCallback != nullptr) { 460 return selectRTCallback((const char*)bitcode, bitcodeSize); 461 } 462 463 // Check for a platform specific library 464 #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON) 465 enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision(); 466 if (prec == bcinfo::RS_FP_Relaxed) { 467 // NEON-capable ARMv7a devices can use an accelerated math library 468 // for all reduced precision scripts. 469 // ARMv8 does not use NEON, as ASIMD can be used with all precision 470 // levels. 471 return SYSLIBPATH_BC"/libclcore_neon.bc"; 472 } else { 473 return defaultLib; 474 } 475 #elif defined(__i386__) || defined(__x86_64__) 476 // x86 devices will use an optimized library. 477 return SYSLIBPATH_BC"/libclcore_x86.bc"; 478 #else 479 return defaultLib; 480 #endif 481 } 482 483 #endif 484 485 void RsdCpuScriptImpl::populateScript(Script *script) { 486 // Copy info over to runtime 487 script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount(); 488 script->mHal.info.exportedReduceCount = mScriptExec->getExportedReduceCount(); 489 script->mHal.info.exportedForEachCount = mScriptExec->getExportedForEachCount(); 490 script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount(); 491 script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();; 492 script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys(); 493 script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues(); 494 495 // Bug, need to stash in metadata 496 if (mRootExpand) { 497 script->mHal.info.root = mRootExpand; 498 } else { 499 script->mHal.info.root = mRoot; 500 } 501 } 502 503 // Set up the launch dimensions, and write the values of the launch 504 // dimensions into the mtls start/end fields. 505 // 506 // Inputs: 507 // baseDim - base shape of the input 508 // sc - used to constrain the launch dimensions 509 // 510 // Returns: 511 // True on success, false on failure to set up 512 bool RsdCpuScriptImpl::setUpMtlsDimensions(MTLaunchStructCommon *mtls, 513 const RsLaunchDimensions &baseDim, 514 const RsScriptCall *sc) { 515 rsAssert(mtls); 516 517 #define SET_UP_DIMENSION(DIM_FIELD, SC_FIELD) do { \ 518 if (!sc || (sc->SC_FIELD##End == 0)) { \ 519 mtls->end.DIM_FIELD = baseDim.DIM_FIELD; \ 520 } else { \ 521 mtls->start.DIM_FIELD = \ 522 rsMin(baseDim.DIM_FIELD, sc->SC_FIELD##Start); \ 523 mtls->end.DIM_FIELD = \ 524 rsMin(baseDim.DIM_FIELD, sc->SC_FIELD##End); \ 525 if (mtls->start.DIM_FIELD >= mtls->end.DIM_FIELD) { \ 526 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, \ 527 "Failed to launch kernel; Invalid " \ 528 #SC_FIELD "Start or " #SC_FIELD "End."); \ 529 return false; \ 530 } \ 531 }} while(0) 532 533 SET_UP_DIMENSION(x, x); 534 SET_UP_DIMENSION(y, y); 535 SET_UP_DIMENSION(z, z); 536 // Checks and setup of fields other than x, y, z are ignored, since those 537 // fields are not used in the runtime and are not visible in the Java API. 538 #undef SET_UP_DIMENSION 539 540 return true; 541 } 542 543 // Preliminary work to prepare a general reduce-style kernel for launch. 544 bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation ** ains, 545 uint32_t inLen, 546 const Allocation * aout, 547 const RsScriptCall *sc, 548 MTLaunchStructReduce *mtls) { 549 rsAssert(ains && (inLen >= 1) && aout); 550 memset(mtls, 0, sizeof(MTLaunchStructReduce)); 551 mtls->dimPtr = &mtls->redp.dim; 552 553 for (int index = inLen; --index >= 0;) { 554 if (allocationLODIsNull(ains[index])) { 555 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 556 "reduce called with null in allocations"); 557 return false; 558 } 559 } 560 561 if (allocationLODIsNull(aout)) { 562 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 563 "reduce called with null out allocation"); 564 return false; 565 } 566 567 const Allocation *ain0 = ains[0]; 568 const Type *inType = ain0->getType(); 569 570 mtls->redp.dim.x = inType->getDimX(); 571 mtls->redp.dim.y = inType->getDimY(); 572 mtls->redp.dim.z = inType->getDimZ(); 573 574 for (int Index = inLen; --Index >= 1;) { 575 if (!ain0->hasSameDims(ains[Index])) { 576 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 577 "Failed to launch reduction kernel;" 578 "dimensions of input allocations do not match."); 579 return false; 580 } 581 } 582 583 if (!setUpMtlsDimensions(mtls, mtls->redp.dim, sc)) { 584 return false; 585 } 586 587 // The X & Y walkers always want 0-1 min even if dim is not present 588 mtls->end.x = rsMax((uint32_t)1, mtls->end.x); 589 mtls->end.y = rsMax((uint32_t)1, mtls->end.y); 590 591 mtls->rs = mCtx; 592 593 mtls->mSliceNum = 0; 594 mtls->mSliceSize = 1; 595 mtls->isThreadable = mIsThreadable; 596 597 // Set up output, 598 mtls->redp.outLen = 1; 599 mtls->redp.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; 600 mtls->redp.outStride[0] = aout->getType()->getElementSizeBytes(); 601 602 // Set up input. 603 memcpy(mtls->ains, ains, inLen * sizeof(ains[0])); 604 mtls->redp.inLen = inLen; 605 for (int index = inLen; --index >= 0;) { 606 mtls->redp.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr; 607 mtls->redp.inStride[index] = ains[index]->getType()->getElementSizeBytes(); 608 } 609 610 // All validation passed, ok to launch threads 611 return true; 612 } 613 614 // Preliminary work to prepare a forEach-style kernel for launch. 615 bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains, 616 uint32_t inLen, 617 Allocation * aout, 618 const void * usr, uint32_t usrLen, 619 const RsScriptCall *sc, 620 MTLaunchStructForEach *mtls) { 621 if (ains == nullptr && inLen != 0) { 622 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 623 "rsForEach called with none-zero inLen with null in allocations"); 624 return false; 625 } 626 627 memset(mtls, 0, sizeof(MTLaunchStructForEach)); 628 mtls->dimPtr = &mtls->fep.dim; 629 630 for (int index = inLen; --index >= 0;) { 631 if (allocationLODIsNull(ains[index])) { 632 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 633 "rsForEach called with null in allocations"); 634 return false; 635 } 636 } 637 638 if (allocationLODIsNull(aout)) { 639 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 640 "rsForEach called with null out allocations"); 641 return false; 642 } 643 644 // The only situation where ains[j] is null is when inLen==1 and j==0; 645 // and that can only happen for an old-style kernel in API level 11~13, 646 // where the input allocation cannot be skipped if the output allocation is specified. 647 if (inLen != 0) 648 rsAssert((inLen == 1) || (ains[0] != nullptr)); 649 650 if (inLen > 0 && ains[0]) { 651 const Allocation *ain0 = ains[0]; 652 const Type *inType = ain0->getType(); 653 654 mtls->fep.dim.x = inType->getDimX(); 655 mtls->fep.dim.y = inType->getDimY(); 656 mtls->fep.dim.z = inType->getDimZ(); 657 658 for (int Index = inLen; --Index >= 1;) { 659 if (!ain0->hasSameDims(ains[Index])) { 660 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 661 "Failed to launch kernel; dimensions of input " 662 "allocations do not match."); 663 return false; 664 } 665 } 666 } else if (aout != nullptr) { 667 const Type *outType = aout->getType(); 668 669 mtls->fep.dim.x = outType->getDimX(); 670 mtls->fep.dim.y = outType->getDimY(); 671 mtls->fep.dim.z = outType->getDimZ(); 672 673 } else if (sc != nullptr) { 674 mtls->fep.dim.x = sc->xEnd; 675 mtls->fep.dim.y = sc->yEnd; 676 mtls->fep.dim.z = 0; 677 } else { 678 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 679 "rsForEach called with null allocations"); 680 return false; 681 } 682 683 if (inLen > 0 && aout != nullptr) { 684 if (ains[0] && !ains[0]->hasSameDims(aout)) { 685 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, 686 "Failed to launch kernel; dimensions of input and output allocations do not match."); 687 688 return false; 689 } 690 } 691 692 if (!setUpMtlsDimensions(mtls, mtls->fep.dim, sc)) { 693 return false; 694 } 695 696 // The X & Y walkers always want 0-1 min even if dim is not present 697 mtls->end.x = rsMax((uint32_t)1, mtls->end.x); 698 mtls->end.y = rsMax((uint32_t)1, mtls->end.y); 699 mtls->rs = mCtx; 700 if (ains) { 701 memcpy(mtls->ains, ains, inLen * sizeof(ains[0])); 702 } 703 mtls->aout[0] = aout; 704 mtls->fep.usr = usr; 705 mtls->fep.usrLen = usrLen; 706 mtls->mSliceSize = 1; 707 mtls->mSliceNum = 0; 708 709 mtls->isThreadable = mIsThreadable; 710 711 if (inLen > 0) { 712 mtls->fep.inLen = inLen; 713 for (int index = inLen; --index >= 0;) { 714 if (ains[index] == nullptr) { 715 // In old style kernels, the first and only input allocation could be null. 716 // Not allowed in newer styles. 717 rsAssert(inLen == 1 && index == 0); 718 continue; 719 } 720 mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr; 721 mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes(); 722 } 723 } 724 725 if (aout != nullptr) { 726 mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr; 727 mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes(); 728 } 729 730 // All validation passed, ok to launch threads 731 return true; 732 } 733 734 735 void RsdCpuScriptImpl::invokeForEach(uint32_t slot, 736 const Allocation ** ains, 737 uint32_t inLen, 738 Allocation * aout, 739 const void * usr, 740 uint32_t usrLen, 741 const RsScriptCall *sc) { 742 743 MTLaunchStructForEach mtls; 744 745 if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) { 746 forEachKernelSetup(slot, &mtls); 747 748 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 749 mCtx->launchForEach(ains, inLen, aout, sc, &mtls); 750 mCtx->setTLS(oldTLS); 751 } 752 } 753 754 void RsdCpuScriptImpl::invokeReduce(uint32_t slot, 755 const Allocation ** ains, uint32_t inLen, 756 Allocation *aout, 757 const RsScriptCall *sc) { 758 MTLaunchStructReduce mtls; 759 760 if (reduceMtlsSetup(ains, inLen, aout, sc, &mtls)) { 761 reduceKernelSetup(slot, &mtls); 762 RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this); 763 mCtx->launchReduce(ains, inLen, aout, &mtls); 764 mCtx->setTLS(oldTLS); 765 } 766 } 767 768 void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls) { 769 mtls->script = this; 770 mtls->fep.slot = slot; 771 mtls->kernel = mScriptExec->getForEachFunction(slot); 772 rsAssert(mtls->kernel != nullptr); 773 } 774 775 void RsdCpuScriptImpl::reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls) { 776 mtls->script = this; 777 mtls->redp.slot = slot; 778 779 const ReduceDescription *desc = mScriptExec->getReduceDescription(slot); 780 mtls->accumFunc = desc->accumFunc; 781 mtls->initFunc = desc->initFunc; // might legally be nullptr 782 mtls->combFunc = desc->combFunc; // might legally be nullptr 783 mtls->outFunc = desc->outFunc; // might legally be nullptr 784 mtls->accumSize = desc->accumSize; 785 786 rsAssert(mtls->accumFunc != nullptr); 787 } 788 789 int RsdCpuScriptImpl::invokeRoot() { 790 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 791 int ret = mRoot(); 792 mCtx->setTLS(oldTLS); 793 return ret; 794 } 795 796 void RsdCpuScriptImpl::invokeInit() { 797 if (mInit) { 798 mInit(); 799 } 800 } 801 802 void RsdCpuScriptImpl::invokeFreeChildren() { 803 if (mFreeChildren) { 804 mFreeChildren(); 805 } 806 } 807 808 void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params, 809 size_t paramLength) { 810 //ALOGE("invoke %i %p %zu", slot, params, paramLength); 811 void * ap = nullptr; 812 813 #if defined(__x86_64__) 814 // The invoked function could have input parameter of vector type for example float4 which 815 // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform. 816 // So try to align void* params before passing them into RS exported function. 817 818 if ((uint8_t)(uint64_t)params & 0x0F) { 819 if ((ap = (void*)memalign(16, paramLength)) != nullptr) { 820 memcpy(ap, params, paramLength); 821 } else { 822 ALOGE("x86_64: invokeFunction memalign error, still use params which" 823 " is not 16 bytes aligned."); 824 } 825 } 826 #endif 827 828 RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this); 829 reinterpret_cast<void (*)(const void *, uint32_t)>( 830 mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength); 831 832 #if defined(__x86_64__) 833 free(ap); 834 #endif 835 836 mCtx->setTLS(oldTLS); 837 } 838 839 void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) { 840 //rsAssert(!script->mFieldIsObject[slot]); 841 //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength); 842 843 //if (mIntrinsicID) { 844 //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength); 845 //return; 846 //} 847 848 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 849 if (!destPtr) { 850 //ALOGV("Calling setVar on slot = %i which is null", slot); 851 return; 852 } 853 854 memcpy(destPtr, data, dataLength); 855 } 856 857 void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) { 858 //rsAssert(!script->mFieldIsObject[slot]); 859 //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength); 860 861 int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 862 if (!srcPtr) { 863 //ALOGV("Calling setVar on slot = %i which is null", slot); 864 return; 865 } 866 memcpy(data, srcPtr, dataLength); 867 } 868 869 870 void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength, 871 const Element *elem, 872 const uint32_t *dims, size_t dimLength) { 873 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 874 if (!destPtr) { 875 //ALOGV("Calling setVar on slot = %i which is null", slot); 876 return; 877 } 878 879 // We want to look at dimension in terms of integer components, 880 // but dimLength is given in terms of bytes. 881 dimLength /= sizeof(int); 882 883 // Only a single dimension is currently supported. 884 rsAssert(dimLength == 1); 885 if (dimLength == 1) { 886 // First do the increment loop. 887 size_t stride = elem->getSizeBytes(); 888 const char *cVal = reinterpret_cast<const char *>(data); 889 for (uint32_t i = 0; i < dims[0]; i++) { 890 elem->incRefs(cVal); 891 cVal += stride; 892 } 893 894 // Decrement loop comes after (to prevent race conditions). 895 char *oldVal = reinterpret_cast<char *>(destPtr); 896 for (uint32_t i = 0; i < dims[0]; i++) { 897 elem->decRefs(oldVal); 898 oldVal += stride; 899 } 900 } 901 902 memcpy(destPtr, data, dataLength); 903 } 904 905 void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) { 906 907 //rsAssert(!script->mFieldIsObject[slot]); 908 //ALOGE("setGlobalBind %i %p", slot, data); 909 910 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 911 if (!destPtr) { 912 //ALOGV("Calling setVar on slot = %i which is null", slot); 913 return; 914 } 915 916 void *ptr = nullptr; 917 mBoundAllocs[slot] = data; 918 if (data) { 919 ptr = data->mHal.drvState.lod[0].mallocPtr; 920 } 921 memcpy(destPtr, &ptr, sizeof(void *)); 922 } 923 924 void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) { 925 926 //rsAssert(script->mFieldIsObject[slot]); 927 //ALOGE("setGlobalObj %i %p", slot, data); 928 929 int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot)); 930 if (!destPtr) { 931 //ALOGV("Calling setVar on slot = %i which is null", slot); 932 return; 933 } 934 935 rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data); 936 } 937 938 const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const { 939 return mScriptExec->getFieldName(slot); 940 } 941 942 RsdCpuScriptImpl::~RsdCpuScriptImpl() { 943 delete mScriptExec; 944 delete[] mBoundAllocs; 945 if (mScriptSO) { 946 dlclose(mScriptSO); 947 } 948 } 949 950 Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const { 951 if (!ptr) { 952 return nullptr; 953 } 954 955 for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) { 956 Allocation *a = mBoundAllocs[ct]; 957 if (!a) continue; 958 if (a->mHal.drvState.lod[0].mallocPtr == ptr) { 959 return a; 960 } 961 } 962 ALOGE("rsGetAllocation, failed to find %p", ptr); 963 return nullptr; 964 } 965 966 int RsdCpuScriptImpl::getGlobalEntries() const { 967 return mScriptExec->getGlobalEntries(); 968 } 969 970 const char * RsdCpuScriptImpl::getGlobalName(int i) const { 971 return mScriptExec->getGlobalName(i); 972 } 973 974 const void * RsdCpuScriptImpl::getGlobalAddress(int i) const { 975 return mScriptExec->getGlobalAddress(i); 976 } 977 978 size_t RsdCpuScriptImpl::getGlobalSize(int i) const { 979 return mScriptExec->getGlobalSize(i); 980 } 981 982 uint32_t RsdCpuScriptImpl::getGlobalProperties(int i) const { 983 return mScriptExec->getGlobalProperties(i); 984 } 985 986 void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains, 987 uint32_t inLen, Allocation * aout, 988 const void * usr, uint32_t usrLen, 989 const RsScriptCall *sc) {} 990 991 void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains, 992 uint32_t inLen, Allocation * aout, 993 const void * usr, uint32_t usrLen, 994 const RsScriptCall *sc) {} 995 996 997 } // namespace renderscript 998 } // namespace android 999