Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2011-2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuCore.h"
     18 #include "rsCpuScript.h"
     19 #include "rsCpuExecutable.h"
     20 
     21 #ifdef RS_COMPATIBILITY_LIB
     22     #include <stdio.h>
     23     #include <sys/stat.h>
     24     #include <unistd.h>
     25 #else
     26     #include "rsCppUtils.h"
     27 
     28     #include <bcc/Config/Config.h>
     29     #include <bcinfo/MetadataExtractor.h>
     30     #include <cutils/properties.h>
     31 
     32     #include <zlib.h>
     33     #include <sys/file.h>
     34     #include <sys/types.h>
     35     #include <unistd.h>
     36 
     37     #include <string>
     38     #include <vector>
     39 #endif
     40 
     41 #include <set>
     42 #include <string>
     43 #include <dlfcn.h>
     44 #include <stdlib.h>
     45 #include <string.h>
     46 #include <iostream>
     47 #include <sstream>
     48 
     49 namespace {
     50 
     51 static const bool kDebugGlobalVariables = false;
     52 
     53 static bool allocationLODIsNull(const android::renderscript::Allocation *alloc) {
     54   // Even if alloc != nullptr, mallocPtr could be null if
     55   // IO_OUTPUT/IO_INPUT with no bound surface.
     56   return alloc && alloc->mHal.drvState.lod[0].mallocPtr == nullptr;
     57 }
     58 
     59 #ifndef RS_COMPATIBILITY_LIB
     60 
     61 static bool is_force_recompile() {
     62 #ifdef RS_SERVER
     63   return false;
     64 #else
     65   char buf[PROPERTY_VALUE_MAX];
     66 
     67   // Re-compile if floating point precision has been overridden.
     68   property_get("debug.rs.precision", buf, "");
     69   if (buf[0] != '\0') {
     70     return true;
     71   }
     72 
     73   // Re-compile if debug.rs.forcerecompile is set.
     74   property_get("debug.rs.forcerecompile", buf, "0");
     75   if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
     76     return true;
     77   } else {
     78     return false;
     79   }
     80 #endif  // RS_SERVER
     81 }
     82 
     83 static void setCompileArguments(std::vector<const char*>* args,
     84                                 const std::string& bcFileName,
     85                                 const char* cacheDir, const char* resName,
     86                                 const char* core_lib, bool useRSDebugContext,
     87                                 const char* bccPluginName, bool emitGlobalInfo,
     88                                 int optLevel, bool emitGlobalInfoSkipConstant) {
     89     rsAssert(cacheDir && resName && core_lib);
     90     args->push_back(android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH);
     91     args->push_back("-unroll-runtime");
     92     args->push_back("-scalarize-load-store");
     93     if (emitGlobalInfo) {
     94         args->push_back("-rs-global-info");
     95         if (emitGlobalInfoSkipConstant) {
     96             args->push_back("-rs-global-info-skip-constant");
     97         }
     98     }
     99     args->push_back("-o");
    100     args->push_back(resName);
    101     args->push_back("-output_path");
    102     args->push_back(cacheDir);
    103     args->push_back("-bclib");
    104     args->push_back(core_lib);
    105     args->push_back("-mtriple");
    106     args->push_back(DEFAULT_TARGET_TRIPLE_STRING);
    107     args->push_back("-O");
    108 
    109     switch (optLevel) {
    110     case 0:
    111         args->push_back("0");
    112         break;
    113     case 3:
    114         args->push_back("3");
    115         break;
    116     default:
    117         ALOGW("Expected optimization level of 0 or 3. Received %d", optLevel);
    118         args->push_back("3");
    119         break;
    120     }
    121 
    122     // Enable workaround for A53 codegen by default.
    123 #if defined(__aarch64__) && !defined(DISABLE_A53_WORKAROUND)
    124     args->push_back("-aarch64-fix-cortex-a53-835769");
    125 #endif
    126 
    127     // Execute the bcc compiler.
    128     if (useRSDebugContext) {
    129         args->push_back("-rs-debug-ctx");
    130     } else {
    131         // Only load additional libraries for compiles that don't use
    132         // the debug context.
    133         if (bccPluginName && strlen(bccPluginName) > 0) {
    134             args->push_back("-load");
    135             args->push_back(bccPluginName);
    136         }
    137     }
    138 
    139     args->push_back("-fPIC");
    140     args->push_back("-embedRSInfo");
    141 
    142     args->push_back(bcFileName.c_str());
    143     args->push_back(nullptr);
    144 }
    145 
    146 static bool compileBitcode(const std::string &bcFileName,
    147                            const char *bitcode,
    148                            size_t bitcodeSize,
    149                            std::vector<const char *> &compileArguments) {
    150     rsAssert(bitcode && bitcodeSize);
    151 
    152     FILE *bcfile = fopen(bcFileName.c_str(), "w");
    153     if (!bcfile) {
    154         ALOGE("Could not write to %s", bcFileName.c_str());
    155         return false;
    156     }
    157     size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
    158     fclose(bcfile);
    159     if (nwritten != bitcodeSize) {
    160         ALOGE("Could not write %zu bytes to %s", bitcodeSize,
    161               bcFileName.c_str());
    162         return false;
    163     }
    164 
    165     return android::renderscript::rsuExecuteCommand(
    166                    android::renderscript::RsdCpuScriptImpl::BCC_EXE_PATH,
    167                    compileArguments.size()-1, compileArguments.data());
    168 }
    169 
    170 // The checksum is unnecessary under a few conditions, since the primary
    171 // use-case for it is debugging. If we are loading something from the
    172 // system partition (read-only), we know that it was precompiled as part of
    173 // application ahead of time (and thus the checksum is completely
    174 // unnecessary). The checksum is also unnecessary on release (non-debug)
    175 // builds, as the only way to get a shared object is to have compiled the
    176 // script once already. On a release build, there is no way to adjust the
    177 // other libraries/dependencies, and so the only reason to recompile would
    178 // be for a source APK change or an OTA. In either case, the APK would be
    179 // reinstalled, which would already clear the code_cache/ directory.
    180 bool isChecksumNeeded(const char *cacheDir) {
    181     if ((::strcmp(SYSLIBPATH, cacheDir) == 0) ||
    182         (::strcmp(SYSLIBPATH_VENDOR, cacheDir) == 0))
    183         return false;
    184     char buf[PROPERTY_VALUE_MAX];
    185     property_get("ro.debuggable", buf, "");
    186     return (buf[0] == '1');
    187 }
    188 
    189 bool addFileToChecksum(const char *fileName, uint32_t &checksum) {
    190     int FD = open(fileName, O_RDONLY);
    191     if (FD == -1) {
    192         ALOGE("Cannot open file \'%s\' to compute checksum", fileName);
    193         return false;
    194     }
    195 
    196     char buf[256];
    197     while (true) {
    198         ssize_t nread = read(FD, buf, sizeof(buf));
    199         if (nread < 0) { // bail out on failed read
    200             ALOGE("Error while computing checksum for file \'%s\'", fileName);
    201             return false;
    202         }
    203 
    204         checksum = adler32(checksum, (const unsigned char *) buf, nread);
    205         if (static_cast<size_t>(nread) < sizeof(buf)) // EOF
    206             break;
    207     }
    208 
    209     if (close(FD) != 0) {
    210         ALOGE("Cannot close file \'%s\' after computing checksum", fileName);
    211         return false;
    212     }
    213     return true;
    214 }
    215 
    216 #endif  // !defined(RS_COMPATIBILITY_LIB)
    217 }  // namespace
    218 
    219 namespace android {
    220 namespace renderscript {
    221 
    222 #ifndef RS_COMPATIBILITY_LIB
    223 
    224 uint32_t constructBuildChecksum(uint8_t const *bitcode, size_t bitcodeSize,
    225                                 const char *commandLine,
    226                                 const char** bccFiles, size_t numFiles) {
    227     uint32_t checksum = adler32(0L, Z_NULL, 0);
    228 
    229     // include checksum of bitcode
    230     if (bitcode != nullptr && bitcodeSize > 0) {
    231         checksum = adler32(checksum, bitcode, bitcodeSize);
    232     }
    233 
    234     // include checksum of command line arguments
    235     checksum = adler32(checksum, (const unsigned char *) commandLine,
    236                        strlen(commandLine));
    237 
    238     // include checksum of bccFiles
    239     for (size_t i = 0; i < numFiles; i++) {
    240         const char* bccFile = bccFiles[i];
    241         if (bccFile[0] != 0 && !addFileToChecksum(bccFile, checksum)) {
    242             // return empty checksum instead of something partial/corrupt
    243             return 0;
    244         }
    245     }
    246 
    247     return checksum;
    248 }
    249 
    250 #endif  // !RS_COMPATIBILITY_LIB
    251 
    252 RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
    253     mCtx = ctx;
    254     mScript = s;
    255 
    256     mScriptSO = nullptr;
    257 
    258     mRoot = nullptr;
    259     mRootExpand = nullptr;
    260     mInit = nullptr;
    261     mFreeChildren = nullptr;
    262     mScriptExec = nullptr;
    263 
    264     mBoundAllocs = nullptr;
    265     mIntrinsicData = nullptr;
    266     mIsThreadable = true;
    267 
    268     mBuildChecksum = 0;
    269     mChecksumNeeded = false;
    270 }
    271 
    272 bool RsdCpuScriptImpl::storeRSInfoFromSO() {
    273     // The shared object may have an invalid build checksum.
    274     // Validate and fail early.
    275     mScriptExec = ScriptExecutable::createFromSharedObject(
    276             mScriptSO, mChecksumNeeded ? mBuildChecksum : 0);
    277 
    278     if (mScriptExec == nullptr) {
    279         return false;
    280     }
    281 
    282     mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
    283     if (mRoot) {
    284         //ALOGE("Found root(): %p", mRoot);
    285     }
    286     mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
    287     if (mRootExpand) {
    288         //ALOGE("Found root.expand(): %p", mRootExpand);
    289     }
    290     mInit = (InitOrDtorFunc_t) dlsym(mScriptSO, "init");
    291     if (mInit) {
    292         //ALOGE("Found init(): %p", mInit);
    293     }
    294     mFreeChildren = (InitOrDtorFunc_t) dlsym(mScriptSO, ".rs.dtor");
    295     if (mFreeChildren) {
    296         //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
    297     }
    298 
    299     size_t varCount = mScriptExec->getExportedVariableCount();
    300     if (varCount > 0) {
    301         mBoundAllocs = new Allocation *[varCount];
    302         memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
    303     }
    304 
    305     mIsThreadable = mScriptExec->getThreadable();
    306     //ALOGE("Script isThreadable? %d", mIsThreadable);
    307 
    308     if (kDebugGlobalVariables) {
    309         mScriptExec->dumpGlobalInfo();
    310     }
    311 
    312     return true;
    313 }
    314 
    315 bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
    316                             uint8_t const *bitcode, size_t bitcodeSize,
    317                             uint32_t flags, char const *bccPluginName) {
    318     //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir,
    319     // bitcode, bitcodeSize, flags, lookupFunc);
    320     //ALOGE("rsdScriptInit %p %p", rsc, script);
    321 
    322     mCtx->lockMutex();
    323 #ifndef RS_COMPATIBILITY_LIB
    324     bool useRSDebugContext = false;
    325 
    326     bcinfo::MetadataExtractor bitcodeMetadata((const char *) bitcode, bitcodeSize);
    327     if (!bitcodeMetadata.extract()) {
    328         ALOGE("Could not extract metadata from bitcode");
    329         mCtx->unlockMutex();
    330         return false;
    331     }
    332 
    333     const char* core_lib = findCoreLib(bitcodeMetadata, (const char*)bitcode, bitcodeSize);
    334 
    335     if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
    336         useRSDebugContext = true;
    337     }
    338 
    339     int optLevel = mCtx->getContext()->getOptLevel();
    340 
    341     std::string bcFileName(cacheDir);
    342     bcFileName.append("/");
    343     bcFileName.append(resName);
    344     bcFileName.append(".bc");
    345 
    346     std::vector<const char*> compileArguments;
    347     bool emitGlobalInfo = mCtx->getEmbedGlobalInfo();
    348     bool emitGlobalInfoSkipConstant = mCtx->getEmbedGlobalInfoSkipConstant();
    349     setCompileArguments(&compileArguments, bcFileName, cacheDir, resName, core_lib,
    350                         useRSDebugContext, bccPluginName, emitGlobalInfo,
    351                         optLevel, emitGlobalInfoSkipConstant);
    352 
    353     mChecksumNeeded = isChecksumNeeded(cacheDir);
    354     if (mChecksumNeeded) {
    355         std::vector<const char *> bccFiles = { BCC_EXE_PATH,
    356                                                core_lib,
    357                                              };
    358 
    359         // The last argument of compileArguments is a nullptr, so remove 1 from
    360         // the size.
    361         std::unique_ptr<const char> compileCommandLine(
    362             rsuJoinStrings(compileArguments.size()-1, compileArguments.data()));
    363 
    364         mBuildChecksum = constructBuildChecksum(bitcode, bitcodeSize,
    365                                                 compileCommandLine.get(),
    366                                                 bccFiles.data(), bccFiles.size());
    367 
    368         if (mBuildChecksum == 0) {
    369             // cannot compute checksum but verification is enabled
    370             mCtx->unlockMutex();
    371             return false;
    372         }
    373     }
    374     else {
    375         // add a dummy/constant as a checksum if verification is disabled
    376         mBuildChecksum = 0xabadcafe;
    377     }
    378 
    379     // Append build checksum to commandline
    380     // Handle the terminal nullptr in compileArguments
    381     compileArguments.pop_back();
    382     compileArguments.push_back("-build-checksum");
    383     std::stringstream ss;
    384     ss << std::hex << mBuildChecksum;
    385     compileArguments.push_back(ss.str().c_str());
    386     compileArguments.push_back(nullptr);
    387 
    388     if (!is_force_recompile() && !useRSDebugContext) {
    389         mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
    390 
    391         // Read RS info from the shared object to detect checksum mismatch
    392         if (mScriptSO != nullptr && !storeRSInfoFromSO()) {
    393             dlclose(mScriptSO);
    394             mScriptSO = nullptr;
    395         }
    396     }
    397 
    398     // If we can't, it's either not there or out of date.  We compile the bit code and try loading
    399     // again.
    400     if (mScriptSO == nullptr) {
    401         if (!compileBitcode(bcFileName, (const char*)bitcode, bitcodeSize,
    402                             compileArguments))
    403         {
    404             ALOGE("bcc: FAILS to compile '%s'", resName);
    405             mCtx->unlockMutex();
    406             return false;
    407         }
    408 
    409         if (!SharedLibraryUtils::createSharedLibrary(mCtx->getContext()->getDriverName(),
    410                                                      cacheDir, resName)) {
    411             ALOGE("Linker: Failed to link object file '%s'", resName);
    412             mCtx->unlockMutex();
    413             return false;
    414         }
    415 
    416         mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName);
    417         if (mScriptSO == nullptr) {
    418             ALOGE("Unable to load '%s'", resName);
    419             mCtx->unlockMutex();
    420             return false;
    421         }
    422 
    423         // Read RS symbol information from the .so.
    424         if (!storeRSInfoFromSO()) {
    425             goto error;
    426         }
    427     }
    428 
    429     mBitcodeFilePath.setTo(bcFileName.c_str());
    430 
    431 #else  // RS_COMPATIBILITY_LIB is defined
    432     const char *nativeLibDir = mCtx->getContext()->getNativeLibDir();
    433     mScriptSO = SharedLibraryUtils::loadSharedLibrary(cacheDir, resName, nativeLibDir);
    434 
    435     if (!mScriptSO) {
    436         goto error;
    437     }
    438 
    439     if (!storeRSInfoFromSO()) {
    440         goto error;
    441     }
    442 #endif
    443     mCtx->unlockMutex();
    444     return true;
    445 
    446 error:
    447 
    448     mCtx->unlockMutex();
    449     if (mScriptSO) {
    450         dlclose(mScriptSO);
    451         mScriptSO = nullptr;
    452     }
    453     return false;
    454 }
    455 
    456 #ifndef RS_COMPATIBILITY_LIB
    457 
    458 const char* RsdCpuScriptImpl::findCoreLib(const bcinfo::MetadataExtractor& ME, const char* bitcode,
    459                                           size_t bitcodeSize) {
    460     const char* defaultLib = SYSLIBPATH"/libclcore.bc";
    461 
    462     // If we're debugging, use the debug library.
    463     if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
    464         return SYSLIBPATH"/libclcore_debug.bc";
    465     }
    466 
    467     if (ME.hasDebugInfo()) {
    468         return SYSLIBPATH"/libclcore_g.bc";
    469     }
    470 
    471     // If a callback has been registered to specify a library, use that.
    472     RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
    473     if (selectRTCallback != nullptr) {
    474         return selectRTCallback((const char*)bitcode, bitcodeSize);
    475     }
    476 
    477     // Check for a platform specific library
    478 #if defined(ARCH_ARM_HAVE_NEON) && !defined(DISABLE_CLCORE_NEON)
    479     enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
    480     if (prec == bcinfo::RS_FP_Relaxed) {
    481         // NEON-capable ARMv7a devices can use an accelerated math library
    482         // for all reduced precision scripts.
    483         // ARMv8 does not use NEON, as ASIMD can be used with all precision
    484         // levels.
    485         return SYSLIBPATH"/libclcore_neon.bc";
    486     } else {
    487         return defaultLib;
    488     }
    489 #elif defined(__i386__) || defined(__x86_64__)
    490     // x86 devices will use an optimized library.
    491     return SYSLIBPATH"/libclcore_x86.bc";
    492 #else
    493     return defaultLib;
    494 #endif
    495 }
    496 
    497 #endif
    498 
    499 void RsdCpuScriptImpl::populateScript(Script *script) {
    500     // Copy info over to runtime
    501     script->mHal.info.exportedFunctionCount = mScriptExec->getExportedFunctionCount();
    502     script->mHal.info.exportedReduceCount = mScriptExec->getExportedReduceCount();
    503     script->mHal.info.exportedForEachCount = mScriptExec->getExportedForEachCount();
    504     script->mHal.info.exportedVariableCount = mScriptExec->getExportedVariableCount();
    505     script->mHal.info.exportedPragmaCount = mScriptExec->getPragmaCount();;
    506     script->mHal.info.exportedPragmaKeyList = mScriptExec->getPragmaKeys();
    507     script->mHal.info.exportedPragmaValueList = mScriptExec->getPragmaValues();
    508 
    509     // Bug, need to stash in metadata
    510     if (mRootExpand) {
    511         script->mHal.info.root = mRootExpand;
    512     } else {
    513         script->mHal.info.root = mRoot;
    514     }
    515 }
    516 
    517 // Set up the launch dimensions, and write the values of the launch
    518 // dimensions into the mtls start/end fields.
    519 //
    520 // Inputs:
    521 //    baseDim - base shape of the input
    522 //         sc - used to constrain the launch dimensions
    523 //
    524 // Returns:
    525 //   True on success, false on failure to set up
    526 bool RsdCpuScriptImpl::setUpMtlsDimensions(MTLaunchStructCommon *mtls,
    527                                            const RsLaunchDimensions &baseDim,
    528                                            const RsScriptCall *sc) {
    529     rsAssert(mtls);
    530 
    531 #define SET_UP_DIMENSION(DIM_FIELD, SC_FIELD) do {            \
    532     if (!sc || (sc->SC_FIELD##End == 0)) {                    \
    533         mtls->end.DIM_FIELD = baseDim.DIM_FIELD;              \
    534     } else {                                                  \
    535         mtls->start.DIM_FIELD =                               \
    536             rsMin(baseDim.DIM_FIELD, sc->SC_FIELD##Start);    \
    537         mtls->end.DIM_FIELD =                                 \
    538             rsMin(baseDim.DIM_FIELD, sc->SC_FIELD##End);      \
    539         if (mtls->start.DIM_FIELD >= mtls->end.DIM_FIELD) {   \
    540             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, \
    541                 "Failed to launch kernel; Invalid "           \
    542                 #SC_FIELD "Start or " #SC_FIELD "End.");      \
    543             return false;                                     \
    544         }                                                     \
    545     }} while(0)
    546 
    547     SET_UP_DIMENSION(x, x);
    548     SET_UP_DIMENSION(y, y);
    549     SET_UP_DIMENSION(z, z);
    550     // Checks and setup of fields other than x, y, z are ignored, since those
    551     // fields are not used in the runtime and are not visible in the Java API.
    552 #undef SET_UP_DIMENSION
    553 
    554     return true;
    555 }
    556 
    557 // Preliminary work to prepare a general reduce-style kernel for launch.
    558 bool RsdCpuScriptImpl::reduceMtlsSetup(const Allocation ** ains,
    559                                        uint32_t inLen,
    560                                        const Allocation * aout,
    561                                        const RsScriptCall *sc,
    562                                        MTLaunchStructReduce *mtls) {
    563     rsAssert(ains && (inLen >= 1) && aout);
    564     memset(mtls, 0, sizeof(MTLaunchStructReduce));
    565     mtls->dimPtr = &mtls->redp.dim;
    566 
    567     for (int index = inLen; --index >= 0;) {
    568         if (allocationLODIsNull(ains[index])) {
    569             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    570                                          "reduce called with null in allocations");
    571             return false;
    572         }
    573     }
    574 
    575     if (allocationLODIsNull(aout)) {
    576         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    577                                      "reduce called with null out allocation");
    578         return false;
    579     }
    580 
    581     const Allocation *ain0   = ains[0];
    582     const Type       *inType = ain0->getType();
    583 
    584     mtls->redp.dim.x = inType->getDimX();
    585     mtls->redp.dim.y = inType->getDimY();
    586     mtls->redp.dim.z = inType->getDimZ();
    587 
    588     for (int Index = inLen; --Index >= 1;) {
    589         if (!ain0->hasSameDims(ains[Index])) {
    590             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    591                                          "Failed to launch reduction kernel;"
    592                                          "dimensions of input allocations do not match.");
    593             return false;
    594         }
    595     }
    596 
    597     if (!setUpMtlsDimensions(mtls, mtls->redp.dim, sc)) {
    598         return false;
    599     }
    600 
    601     // The X & Y walkers always want 0-1 min even if dim is not present
    602     mtls->end.x = rsMax((uint32_t)1, mtls->end.x);
    603     mtls->end.y = rsMax((uint32_t)1, mtls->end.y);
    604 
    605     mtls->rs = mCtx;
    606 
    607     mtls->mSliceNum    = 0;
    608     mtls->mSliceSize   = 1;
    609     mtls->isThreadable = mIsThreadable;
    610 
    611     // Set up output,
    612     mtls->redp.outLen = 1;
    613     mtls->redp.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
    614     mtls->redp.outStride[0] = aout->getType()->getElementSizeBytes();
    615 
    616     // Set up input.
    617     memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
    618     mtls->redp.inLen = inLen;
    619     for (int index = inLen; --index >= 0;) {
    620         mtls->redp.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
    621         mtls->redp.inStride[index] = ains[index]->getType()->getElementSizeBytes();
    622     }
    623 
    624     // All validation passed, ok to launch threads
    625     return true;
    626 }
    627 
    628 // Preliminary work to prepare a forEach-style kernel for launch.
    629 bool RsdCpuScriptImpl::forEachMtlsSetup(const Allocation ** ains,
    630                                         uint32_t inLen,
    631                                         Allocation * aout,
    632                                         const void * usr, uint32_t usrLen,
    633                                         const RsScriptCall *sc,
    634                                         MTLaunchStructForEach *mtls) {
    635     if (ains == nullptr && inLen != 0) {
    636         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    637           "rsForEach called with none-zero inLen with null in allocations");
    638         return false;
    639     }
    640 
    641     memset(mtls, 0, sizeof(MTLaunchStructForEach));
    642     mtls->dimPtr = &mtls->fep.dim;
    643 
    644     for (int index = inLen; --index >= 0;) {
    645         if (allocationLODIsNull(ains[index])) {
    646             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    647                                          "rsForEach called with null in allocations");
    648             return false;
    649         }
    650     }
    651 
    652     if (allocationLODIsNull(aout)) {
    653         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    654                                      "rsForEach called with null out allocations");
    655         return false;
    656     }
    657 
    658     if (inLen > 0) {
    659         const Allocation *ain0   = ains[0];
    660         const Type       *inType = ain0->getType();
    661 
    662         mtls->fep.dim.x = inType->getDimX();
    663         mtls->fep.dim.y = inType->getDimY();
    664         mtls->fep.dim.z = inType->getDimZ();
    665 
    666         for (int Index = inLen; --Index >= 1;) {
    667             if (!ain0->hasSameDims(ains[Index])) {
    668                 mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    669                   "Failed to launch kernel; dimensions of input"
    670                   "allocations do not match.");
    671                 return false;
    672             }
    673         }
    674     } else if (aout != nullptr) {
    675         const Type *outType = aout->getType();
    676 
    677         mtls->fep.dim.x = outType->getDimX();
    678         mtls->fep.dim.y = outType->getDimY();
    679         mtls->fep.dim.z = outType->getDimZ();
    680 
    681     } else if (sc != nullptr) {
    682         mtls->fep.dim.x = sc->xEnd;
    683         mtls->fep.dim.y = sc->yEnd;
    684         mtls->fep.dim.z = 0;
    685     } else {
    686         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    687                                      "rsForEach called with null allocations");
    688         return false;
    689     }
    690 
    691     if (inLen > 0 && aout != nullptr) {
    692         if (!ains[0]->hasSameDims(aout)) {
    693             mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT,
    694               "Failed to launch kernel; dimensions of input and output allocations do not match.");
    695 
    696             return false;
    697         }
    698     }
    699 
    700     if (!setUpMtlsDimensions(mtls, mtls->fep.dim, sc)) {
    701         return false;
    702     }
    703 
    704     // The X & Y walkers always want 0-1 min even if dim is not present
    705     mtls->end.x    = rsMax((uint32_t)1, mtls->end.x);
    706     mtls->end.y    = rsMax((uint32_t)1, mtls->end.y);
    707     mtls->rs       = mCtx;
    708     if (ains) {
    709         memcpy(mtls->ains, ains, inLen * sizeof(ains[0]));
    710     }
    711     mtls->aout[0]    = aout;
    712     mtls->fep.usr    = usr;
    713     mtls->fep.usrLen = usrLen;
    714     mtls->mSliceSize = 1;
    715     mtls->mSliceNum  = 0;
    716 
    717     mtls->isThreadable  = mIsThreadable;
    718 
    719     if (inLen > 0) {
    720         mtls->fep.inLen = inLen;
    721         for (int index = inLen; --index >= 0;) {
    722             mtls->fep.inPtr[index] = (const uint8_t*)ains[index]->mHal.drvState.lod[0].mallocPtr;
    723             mtls->fep.inStride[index] = ains[index]->getType()->getElementSizeBytes();
    724         }
    725     }
    726 
    727     if (aout != nullptr) {
    728         mtls->fep.outPtr[0] = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
    729         mtls->fep.outStride[0] = aout->getType()->getElementSizeBytes();
    730     }
    731 
    732     // All validation passed, ok to launch threads
    733     return true;
    734 }
    735 
    736 
    737 void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
    738                                      const Allocation ** ains,
    739                                      uint32_t inLen,
    740                                      Allocation * aout,
    741                                      const void * usr,
    742                                      uint32_t usrLen,
    743                                      const RsScriptCall *sc) {
    744 
    745     MTLaunchStructForEach mtls;
    746 
    747     if (forEachMtlsSetup(ains, inLen, aout, usr, usrLen, sc, &mtls)) {
    748         forEachKernelSetup(slot, &mtls);
    749 
    750         RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
    751         mCtx->launchForEach(ains, inLen, aout, sc, &mtls);
    752         mCtx->setTLS(oldTLS);
    753     }
    754 }
    755 
    756 void RsdCpuScriptImpl::invokeReduce(uint32_t slot,
    757                                     const Allocation ** ains, uint32_t inLen,
    758                                     Allocation *aout,
    759                                     const RsScriptCall *sc) {
    760   MTLaunchStructReduce mtls;
    761 
    762   if (reduceMtlsSetup(ains, inLen, aout, sc, &mtls)) {
    763     reduceKernelSetup(slot, &mtls);
    764     RsdCpuScriptImpl *oldTLS = mCtx->setTLS(this);
    765     mCtx->launchReduce(ains, inLen, aout, &mtls);
    766     mCtx->setTLS(oldTLS);
    767   }
    768 }
    769 
    770 void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStructForEach *mtls) {
    771     mtls->script = this;
    772     mtls->fep.slot = slot;
    773     mtls->kernel = mScriptExec->getForEachFunction(slot);
    774     rsAssert(mtls->kernel != nullptr);
    775 }
    776 
    777 void RsdCpuScriptImpl::reduceKernelSetup(uint32_t slot, MTLaunchStructReduce *mtls) {
    778     mtls->script = this;
    779     mtls->redp.slot = slot;
    780 
    781     const ReduceDescription *desc = mScriptExec->getReduceDescription(slot);
    782     mtls->accumFunc = desc->accumFunc;
    783     mtls->initFunc  = desc->initFunc;   // might legally be nullptr
    784     mtls->combFunc  = desc->combFunc;   // might legally be nullptr
    785     mtls->outFunc   = desc->outFunc;    // might legally be nullptr
    786     mtls->accumSize = desc->accumSize;
    787 
    788     rsAssert(mtls->accumFunc != nullptr);
    789 }
    790 
    791 int RsdCpuScriptImpl::invokeRoot() {
    792     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
    793     int ret = mRoot();
    794     mCtx->setTLS(oldTLS);
    795     return ret;
    796 }
    797 
    798 void RsdCpuScriptImpl::invokeInit() {
    799     if (mInit) {
    800         mInit();
    801     }
    802 }
    803 
    804 void RsdCpuScriptImpl::invokeFreeChildren() {
    805     if (mFreeChildren) {
    806         mFreeChildren();
    807     }
    808 }
    809 
    810 void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
    811                                       size_t paramLength) {
    812     //ALOGE("invoke %i %p %zu", slot, params, paramLength);
    813     void * ap = nullptr;
    814 
    815 #if defined(__x86_64__)
    816     // The invoked function could have input parameter of vector type for example float4 which
    817     // requires void* params to be 16 bytes aligned when using SSE instructions for x86_64 platform.
    818     // So try to align void* params before passing them into RS exported function.
    819 
    820     if ((uint8_t)(uint64_t)params & 0x0F) {
    821         if ((ap = (void*)memalign(16, paramLength)) != nullptr) {
    822             memcpy(ap, params, paramLength);
    823         } else {
    824             ALOGE("x86_64: invokeFunction memalign error, still use params which"
    825                   " is not 16 bytes aligned.");
    826         }
    827     }
    828 #endif
    829 
    830     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
    831     reinterpret_cast<void (*)(const void *, uint32_t)>(
    832         mScriptExec->getInvokeFunction(slot))(ap? (const void *) ap: params, paramLength);
    833 
    834 #if defined(__x86_64__)
    835     free(ap);
    836 #endif
    837 
    838     mCtx->setTLS(oldTLS);
    839 }
    840 
    841 void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
    842     //rsAssert(!script->mFieldIsObject[slot]);
    843     //ALOGE("setGlobalVar %i %p %zu", slot, data, dataLength);
    844 
    845     //if (mIntrinsicID) {
    846         //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
    847         //return;
    848     //}
    849 
    850     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
    851     if (!destPtr) {
    852         //ALOGV("Calling setVar on slot = %i which is null", slot);
    853         return;
    854     }
    855 
    856     memcpy(destPtr, data, dataLength);
    857 }
    858 
    859 void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
    860     //rsAssert(!script->mFieldIsObject[slot]);
    861     //ALOGE("getGlobalVar %i %p %zu", slot, data, dataLength);
    862 
    863     int32_t *srcPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
    864     if (!srcPtr) {
    865         //ALOGV("Calling setVar on slot = %i which is null", slot);
    866         return;
    867     }
    868     memcpy(data, srcPtr, dataLength);
    869 }
    870 
    871 
    872 void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
    873                                                 const Element *elem,
    874                                                 const uint32_t *dims, size_t dimLength) {
    875     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
    876     if (!destPtr) {
    877         //ALOGV("Calling setVar on slot = %i which is null", slot);
    878         return;
    879     }
    880 
    881     // We want to look at dimension in terms of integer components,
    882     // but dimLength is given in terms of bytes.
    883     dimLength /= sizeof(int);
    884 
    885     // Only a single dimension is currently supported.
    886     rsAssert(dimLength == 1);
    887     if (dimLength == 1) {
    888         // First do the increment loop.
    889         size_t stride = elem->getSizeBytes();
    890         const char *cVal = reinterpret_cast<const char *>(data);
    891         for (uint32_t i = 0; i < dims[0]; i++) {
    892             elem->incRefs(cVal);
    893             cVal += stride;
    894         }
    895 
    896         // Decrement loop comes after (to prevent race conditions).
    897         char *oldVal = reinterpret_cast<char *>(destPtr);
    898         for (uint32_t i = 0; i < dims[0]; i++) {
    899             elem->decRefs(oldVal);
    900             oldVal += stride;
    901         }
    902     }
    903 
    904     memcpy(destPtr, data, dataLength);
    905 }
    906 
    907 void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
    908 
    909     //rsAssert(!script->mFieldIsObject[slot]);
    910     //ALOGE("setGlobalBind %i %p", slot, data);
    911 
    912     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
    913     if (!destPtr) {
    914         //ALOGV("Calling setVar on slot = %i which is null", slot);
    915         return;
    916     }
    917 
    918     void *ptr = nullptr;
    919     mBoundAllocs[slot] = data;
    920     if (data) {
    921         ptr = data->mHal.drvState.lod[0].mallocPtr;
    922     }
    923     memcpy(destPtr, &ptr, sizeof(void *));
    924 }
    925 
    926 void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
    927 
    928     //rsAssert(script->mFieldIsObject[slot]);
    929     //ALOGE("setGlobalObj %i %p", slot, data);
    930 
    931     int32_t *destPtr = reinterpret_cast<int32_t *>(mScriptExec->getFieldAddress(slot));
    932     if (!destPtr) {
    933         //ALOGV("Calling setVar on slot = %i which is null", slot);
    934         return;
    935     }
    936 
    937     rsrSetObject(mCtx->getContext(), (rs_object_base *)destPtr, data);
    938 }
    939 
    940 const char* RsdCpuScriptImpl::getFieldName(uint32_t slot) const {
    941     return mScriptExec->getFieldName(slot);
    942 }
    943 
    944 RsdCpuScriptImpl::~RsdCpuScriptImpl() {
    945     delete mScriptExec;
    946     delete[] mBoundAllocs;
    947     if (mScriptSO) {
    948         dlclose(mScriptSO);
    949     }
    950 }
    951 
    952 Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
    953     if (!ptr) {
    954         return nullptr;
    955     }
    956 
    957     for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
    958         Allocation *a = mBoundAllocs[ct];
    959         if (!a) continue;
    960         if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
    961             return a;
    962         }
    963     }
    964     ALOGE("rsGetAllocation, failed to find %p", ptr);
    965     return nullptr;
    966 }
    967 
    968 int RsdCpuScriptImpl::getGlobalEntries() const {
    969     return mScriptExec->getGlobalEntries();
    970 }
    971 
    972 const char * RsdCpuScriptImpl::getGlobalName(int i) const {
    973     return mScriptExec->getGlobalName(i);
    974 }
    975 
    976 const void * RsdCpuScriptImpl::getGlobalAddress(int i) const {
    977     return mScriptExec->getGlobalAddress(i);
    978 }
    979 
    980 size_t RsdCpuScriptImpl::getGlobalSize(int i) const {
    981     return mScriptExec->getGlobalSize(i);
    982 }
    983 
    984 uint32_t RsdCpuScriptImpl::getGlobalProperties(int i) const {
    985     return mScriptExec->getGlobalProperties(i);
    986 }
    987 
    988 void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation ** ains,
    989                                  uint32_t inLen, Allocation * aout,
    990                                  const void * usr, uint32_t usrLen,
    991                                  const RsScriptCall *sc) {}
    992 
    993 void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation ** ains,
    994                                   uint32_t inLen, Allocation * aout,
    995                                   const void * usr, uint32_t usrLen,
    996                                   const RsScriptCall *sc) {}
    997 
    998 
    999 }
   1000 }
   1001