Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2011-2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuCore.h"
     18 #include "rsCpuScript.h"
     19 
     20 #ifdef RS_COMPATIBILITY_LIB
     21     #include <set>
     22     #include <string>
     23     #include <dlfcn.h>
     24     #include <stdio.h>
     25     #include <stdlib.h>
     26     #include <string.h>
     27     #include <sys/stat.h>
     28     #include <unistd.h>
     29 #else
     30     #include <bcc/BCCContext.h>
     31     #include <bcc/Renderscript/RSCompilerDriver.h>
     32     #include <bcc/Renderscript/RSExecutable.h>
     33     #include <bcc/Renderscript/RSInfo.h>
     34     #include <bcinfo/MetadataExtractor.h>
     35     #include <cutils/properties.h>
     36 
     37     #include <sys/types.h>
     38     #include <sys/wait.h>
     39     #include <unistd.h>
     40 #endif
     41 
     42 namespace {
     43 #ifdef RS_COMPATIBILITY_LIB
     44 
     45 // Create a len length string containing random characters from [A-Za-z0-9].
     46 static std::string getRandomString(size_t len) {
     47     char buf[len + 1];
     48     for (size_t i = 0; i < len; i++) {
     49         uint32_t r = arc4random() & 0xffff;
     50         r %= 62;
     51         if (r < 26) {
     52             // lowercase
     53             buf[i] = 'a' + r;
     54         } else if (r < 52) {
     55             // uppercase
     56             buf[i] = 'A' + (r - 26);
     57         } else {
     58             // Use a number
     59             buf[i] = '0' + (r - 52);
     60         }
     61     }
     62     buf[len] = '\0';
     63     return std::string(buf);
     64 }
     65 
     66 // Check if a path exists and attempt to create it if it doesn't.
     67 static bool ensureCacheDirExists(const char *path) {
     68     if (access(path, R_OK | W_OK | X_OK) == 0) {
     69         // Done if we can rwx the directory
     70         return true;
     71     }
     72     if (mkdir(path, 0700) == 0) {
     73         return true;
     74     }
     75     return false;
     76 }
     77 
     78 // Attempt to load the shared library from origName, but then fall back to
     79 // creating the symlinked shared library if necessary (to ensure instancing).
     80 // This function returns the dlopen()-ed handle if successful.
     81 static void *loadSOHelper(const char *origName, const char *cacheDir,
     82                           const char *resName) {
     83     // Keep track of which .so libraries have been loaded. Once a library is
     84     // in the set (per-process granularity), we must instead make a symlink to
     85     // the original shared object (randomly named .so file) and load that one
     86     // instead. If we don't do this, we end up aliasing global data between
     87     // the various Script instances (which are supposed to be completely
     88     // independent).
     89     static std::set<std::string> LoadedLibraries;
     90 
     91     void *loaded = NULL;
     92 
     93     // Skip everything if we don't even have the original library available.
     94     if (access(origName, F_OK) != 0) {
     95         return NULL;
     96     }
     97 
     98     // Common path is that we have not loaded this Script/library before.
     99     if (LoadedLibraries.find(origName) == LoadedLibraries.end()) {
    100         loaded = dlopen(origName, RTLD_NOW | RTLD_LOCAL);
    101         if (loaded) {
    102             LoadedLibraries.insert(origName);
    103         }
    104         return loaded;
    105     }
    106 
    107     std::string newName(cacheDir);
    108     newName.append("/com.android.renderscript.cache/");
    109 
    110     if (!ensureCacheDirExists(newName.c_str())) {
    111         ALOGE("Could not verify or create cache dir: %s", cacheDir);
    112         return NULL;
    113     }
    114 
    115     // Construct an appropriately randomized filename for the symlink.
    116     newName.append("librs.");
    117     newName.append(resName);
    118     newName.append("#");
    119     newName.append(getRandomString(6));  // 62^6 potential filename variants.
    120     newName.append(".so");
    121 
    122     int r = symlink(origName, newName.c_str());
    123     if (r != 0) {
    124         ALOGE("Could not create symlink %s -> %s", newName.c_str(), origName);
    125         return NULL;
    126     }
    127     loaded = dlopen(newName.c_str(), RTLD_NOW | RTLD_LOCAL);
    128     r = unlink(newName.c_str());
    129     if (r != 0) {
    130         ALOGE("Could not unlink symlink %s", newName.c_str());
    131     }
    132     if (loaded) {
    133         LoadedLibraries.insert(newName.c_str());
    134     }
    135 
    136     return loaded;
    137 }
    138 
    139 // Load the shared library referred to by cacheDir and resName. If we have
    140 // already loaded this library, we instead create a new symlink (in the
    141 // cache dir) and then load that. We then immediately destroy the symlink.
    142 // This is required behavior to implement script instancing for the support
    143 // library, since shared objects are loaded and de-duped by name only.
    144 static void *loadSharedLibrary(const char *cacheDir, const char *resName) {
    145     void *loaded = NULL;
    146     //arc4random_stir();
    147 #ifndef RS_SERVER
    148     std::string scriptSOName(cacheDir);
    149     size_t cutPos = scriptSOName.rfind("cache");
    150     if (cutPos != std::string::npos) {
    151         scriptSOName.erase(cutPos);
    152     } else {
    153         ALOGE("Found peculiar cacheDir (missing \"cache\"): %s", cacheDir);
    154     }
    155     scriptSOName.append("/lib/librs.");
    156 #else
    157     std::string scriptSOName("lib");
    158 #endif
    159     scriptSOName.append(resName);
    160     scriptSOName.append(".so");
    161 
    162     // We should check if we can load the library from the standard app
    163     // location for shared libraries first.
    164     loaded = loadSOHelper(scriptSOName.c_str(), cacheDir, resName);
    165 
    166     if (loaded == NULL) {
    167         ALOGE("Unable to open shared library (%s): %s",
    168               scriptSOName.c_str(), dlerror());
    169 
    170         // One final attempt to find the library in "/system/lib".
    171         // We do this to allow bundled applications to use the compatibility
    172         // library fallback path. Those applications don't have a private
    173         // library path, so they need to install to the system directly.
    174         // Note that this is really just a testing path.
    175         android::String8 scriptSONameSystem("/system/lib/librs.");
    176         scriptSONameSystem.append(resName);
    177         scriptSONameSystem.append(".so");
    178         loaded = loadSOHelper(scriptSONameSystem.c_str(), cacheDir,
    179                               resName);
    180         if (loaded == NULL) {
    181             ALOGE("Unable to open system shared library (%s): %s",
    182                   scriptSONameSystem.c_str(), dlerror());
    183         }
    184     }
    185 
    186     return loaded;
    187 }
    188 
    189 
    190 #else
    191 static bool is_force_recompile() {
    192 #ifdef RS_SERVER
    193   return false;
    194 #else
    195   char buf[PROPERTY_VALUE_MAX];
    196 
    197   // Re-compile if floating point precision has been overridden.
    198   property_get("debug.rs.precision", buf, "");
    199   if (buf[0] != '\0') {
    200     return true;
    201   }
    202 
    203   // Re-compile if debug.rs.forcerecompile is set.
    204   property_get("debug.rs.forcerecompile", buf, "0");
    205   if ((::strcmp(buf, "1") == 0) || (::strcmp(buf, "true") == 0)) {
    206     return true;
    207   } else {
    208     return false;
    209   }
    210 #endif  // RS_SERVER
    211 }
    212 
    213 //#define EXTERNAL_BCC_COMPILER 1
    214 #ifdef EXTERNAL_BCC_COMPILER
    215 const static char *BCC_EXE_PATH = "/system/bin/bcc";
    216 
    217 static bool compileBitcode(const char *cacheDir,
    218                            const char *resName,
    219                            const char *bitcode,
    220                            size_t bitcodeSize,
    221                            const char *core_lib) {
    222     rsAssert(cacheDir && resName && bitcode && bitcodeSize && core_lib);
    223 
    224     android::String8 bcFilename(cacheDir);
    225     bcFilename.append("/");
    226     bcFilename.append(resName);
    227     bcFilename.append(".bc");
    228     FILE *bcfile = fopen(bcFilename.string(), "w");
    229     if (!bcfile) {
    230         ALOGE("Could not write to %s", bcFilename.string());
    231         return false;
    232     }
    233     size_t nwritten = fwrite(bitcode, 1, bitcodeSize, bcfile);
    234     fclose(bcfile);
    235     if (nwritten != bitcodeSize) {
    236         ALOGE("Could not write %zu bytes to %s", bitcodeSize,
    237               bcFilename.string());
    238         return false;
    239     }
    240 
    241     pid_t pid = fork();
    242     switch (pid) {
    243     case -1: {  // Error occurred (we attempt no recovery)
    244         ALOGE("Couldn't fork for bcc compiler execution");
    245         return false;
    246     }
    247     case 0: {  // Child process
    248         // Execute the bcc compiler.
    249         execl(BCC_EXE_PATH,
    250               BCC_EXE_PATH,
    251               "-o", resName,
    252               "-output_path", cacheDir,
    253               "-bclib", core_lib,
    254               bcFilename.string(),
    255               (char *) NULL);
    256         ALOGE("execl() failed: %s", strerror(errno));
    257         abort();
    258         return false;
    259     }
    260     default: {  // Parent process (actual driver)
    261         // Wait on child process to finish compiling the source.
    262         int status = 0;
    263         pid_t w = waitpid(pid, &status, 0);
    264         if (w == -1) {
    265             ALOGE("Could not wait for bcc compiler");
    266             return false;
    267         }
    268 
    269         if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
    270             return true;
    271         }
    272 
    273         ALOGE("bcc compiler terminated unexpectedly");
    274         return false;
    275     }
    276     }
    277 }
    278 #endif  // EXTERNAL_BCC_COMPILER
    279 
    280 #endif  // !defined(RS_COMPATIBILITY_LIB)
    281 }  // namespace
    282 
    283 namespace android {
    284 namespace renderscript {
    285 
    286 
    287 #ifdef RS_COMPATIBILITY_LIB
    288 #define MAXLINE 500
    289 #define MAKE_STR_HELPER(S) #S
    290 #define MAKE_STR(S) MAKE_STR_HELPER(S)
    291 #define EXPORT_VAR_STR "exportVarCount: "
    292 #define EXPORT_VAR_STR_LEN strlen(EXPORT_VAR_STR)
    293 #define EXPORT_FUNC_STR "exportFuncCount: "
    294 #define EXPORT_FUNC_STR_LEN strlen(EXPORT_FUNC_STR)
    295 #define EXPORT_FOREACH_STR "exportForEachCount: "
    296 #define EXPORT_FOREACH_STR_LEN strlen(EXPORT_FOREACH_STR)
    297 #define OBJECT_SLOT_STR "objectSlotCount: "
    298 #define OBJECT_SLOT_STR_LEN strlen(OBJECT_SLOT_STR)
    299 
    300 // Copy up to a newline or size chars from str -> s, updating str
    301 // Returns s when successful and NULL when '\0' is finally reached.
    302 static char* strgets(char *s, int size, const char **ppstr) {
    303     if (!ppstr || !*ppstr || **ppstr == '\0' || size < 1) {
    304         return NULL;
    305     }
    306 
    307     int i;
    308     for (i = 0; i < (size - 1); i++) {
    309         s[i] = **ppstr;
    310         (*ppstr)++;
    311         if (s[i] == '\0') {
    312             return s;
    313         } else if (s[i] == '\n') {
    314             s[i+1] = '\0';
    315             return s;
    316         }
    317     }
    318 
    319     // size has been exceeded.
    320     s[i] = '\0';
    321 
    322     return s;
    323 }
    324 #endif
    325 
    326 RsdCpuScriptImpl::RsdCpuScriptImpl(RsdCpuReferenceImpl *ctx, const Script *s) {
    327     mCtx = ctx;
    328     mScript = s;
    329 
    330 #ifdef RS_COMPATIBILITY_LIB
    331     mScriptSO = NULL;
    332     mInvokeFunctions = NULL;
    333     mForEachFunctions = NULL;
    334     mFieldAddress = NULL;
    335     mFieldIsObject = NULL;
    336     mForEachSignatures = NULL;
    337 #else
    338     mCompilerContext = NULL;
    339     mCompilerDriver = NULL;
    340     mExecutable = NULL;
    341 #endif
    342 
    343     mRoot = NULL;
    344     mRootExpand = NULL;
    345     mInit = NULL;
    346     mFreeChildren = NULL;
    347 
    348 
    349     mBoundAllocs = NULL;
    350     mIntrinsicData = NULL;
    351     mIsThreadable = true;
    352 }
    353 
    354 
    355 bool RsdCpuScriptImpl::init(char const *resName, char const *cacheDir,
    356                             uint8_t const *bitcode, size_t bitcodeSize,
    357                             uint32_t flags) {
    358     //ALOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
    359     //ALOGE("rsdScriptInit %p %p", rsc, script);
    360 
    361     mCtx->lockMutex();
    362 
    363 #ifndef RS_COMPATIBILITY_LIB
    364     bcc::RSExecutable *exec = NULL;
    365 
    366     mCompilerContext = NULL;
    367     mCompilerDriver = NULL;
    368     mExecutable = NULL;
    369 
    370     mCompilerContext = new bcc::BCCContext();
    371     if (mCompilerContext == NULL) {
    372         ALOGE("bcc: FAILS to create compiler context (out of memory)");
    373         mCtx->unlockMutex();
    374         return false;
    375     }
    376 
    377     mCompilerDriver = new bcc::RSCompilerDriver();
    378     if (mCompilerDriver == NULL) {
    379         ALOGE("bcc: FAILS to create compiler driver (out of memory)");
    380         mCtx->unlockMutex();
    381         return false;
    382     }
    383 
    384     mCompilerDriver->setRSRuntimeLookupFunction(lookupRuntimeStub);
    385     mCompilerDriver->setRSRuntimeLookupContext(this);
    386 
    387     // Run any compiler setup functions we have been provided with.
    388     RSSetupCompilerCallback setupCompilerCallback =
    389             mCtx->getSetupCompilerCallback();
    390     if (setupCompilerCallback != NULL) {
    391         setupCompilerCallback(mCompilerDriver);
    392     }
    393 
    394     const char *core_lib = bcc::RSInfo::LibCLCorePath;
    395 
    396     bcinfo::MetadataExtractor ME((const char *) bitcode, bitcodeSize);
    397     if (!ME.extract()) {
    398         ALOGE("Could not extract metadata from bitcode");
    399         return false;
    400     }
    401 
    402     enum bcinfo::RSFloatPrecision prec = ME.getRSFloatPrecision();
    403     switch (prec) {
    404     case bcinfo::RS_FP_Imprecise:
    405     case bcinfo::RS_FP_Relaxed:
    406 #if defined(ARCH_ARM_HAVE_NEON)
    407         // NEON-capable devices can use an accelerated math library for all
    408         // reduced precision scripts.
    409         core_lib = bcc::RSInfo::LibCLCoreNEONPath;
    410 #endif
    411         break;
    412     case bcinfo::RS_FP_Full:
    413         break;
    414     default:
    415         ALOGE("Unknown precision for bitcode");
    416         return false;
    417     }
    418 
    419 #if defined(ARCH_X86_HAVE_SSE2)
    420     // SSE2- or above capable devices will use an optimized library.
    421     core_lib = bcc::RSInfo::LibCLCoreX86Path;
    422 #endif
    423 
    424     RSSelectRTCallback selectRTCallback = mCtx->getSelectRTCallback();
    425     if (selectRTCallback != NULL) {
    426         core_lib = selectRTCallback((const char *)bitcode, bitcodeSize);
    427     }
    428 
    429     if (mCtx->getContext()->getContextType() == RS_CONTEXT_TYPE_DEBUG) {
    430         // Use the libclcore_debug.bc instead of the default library.
    431         core_lib = bcc::RSInfo::LibCLCoreDebugPath;
    432         mCompilerDriver->setDebugContext(true);
    433         // Skip the cache lookup
    434     } else if (!is_force_recompile()) {
    435         // Attempt to just load the script from cache first if we can.
    436         exec = mCompilerDriver->loadScript(cacheDir, resName,
    437                                            (const char *)bitcode, bitcodeSize);
    438     }
    439 
    440     if (exec == NULL) {
    441 #ifdef EXTERNAL_BCC_COMPILER
    442         bool built = compileBitcode(cacheDir, resName, (const char *)bitcode,
    443                                     bitcodeSize, core_lib);
    444 #else
    445         bool built = mCompilerDriver->build(*mCompilerContext, cacheDir,
    446                                             resName, (const char *)bitcode,
    447                                             bitcodeSize, core_lib,
    448                                             mCtx->getLinkRuntimeCallback());
    449 #endif  // EXTERNAL_BCC_COMPILER
    450         if (built) {
    451             exec = mCompilerDriver->loadScript(cacheDir, resName,
    452                                                (const char *)bitcode,
    453                                                bitcodeSize);
    454         }
    455     }
    456 
    457     if (exec == NULL) {
    458         ALOGE("bcc: FAILS to prepare executable for '%s'", resName);
    459         mCtx->unlockMutex();
    460         return false;
    461     }
    462 
    463     mExecutable = exec;
    464 
    465     exec->setThreadable(mIsThreadable);
    466     if (!exec->syncInfo()) {
    467         ALOGW("bcc: FAILS to synchronize the RS info file to the disk");
    468     }
    469 
    470     mRoot = reinterpret_cast<int (*)()>(exec->getSymbolAddress("root"));
    471     mRootExpand =
    472         reinterpret_cast<int (*)()>(exec->getSymbolAddress("root.expand"));
    473     mInit = reinterpret_cast<void (*)()>(exec->getSymbolAddress("init"));
    474     mFreeChildren =
    475         reinterpret_cast<void (*)()>(exec->getSymbolAddress(".rs.dtor"));
    476 
    477 
    478     const bcc::RSInfo *info = &mExecutable->getInfo();
    479     if (info->getExportVarNames().size()) {
    480         mBoundAllocs = new Allocation *[info->getExportVarNames().size()];
    481         memset(mBoundAllocs, 0, sizeof(void *) * info->getExportVarNames().size());
    482     }
    483 
    484 #else
    485 
    486     mScriptSO = loadSharedLibrary(cacheDir, resName);
    487 
    488     if (mScriptSO) {
    489         char line[MAXLINE];
    490         mRoot = (RootFunc_t) dlsym(mScriptSO, "root");
    491         if (mRoot) {
    492             //ALOGE("Found root(): %p", mRoot);
    493         }
    494         mRootExpand = (RootFunc_t) dlsym(mScriptSO, "root.expand");
    495         if (mRootExpand) {
    496             //ALOGE("Found root.expand(): %p", mRootExpand);
    497         }
    498         mInit = (InvokeFunc_t) dlsym(mScriptSO, "init");
    499         if (mInit) {
    500             //ALOGE("Found init(): %p", mInit);
    501         }
    502         mFreeChildren = (InvokeFunc_t) dlsym(mScriptSO, ".rs.dtor");
    503         if (mFreeChildren) {
    504             //ALOGE("Found .rs.dtor(): %p", mFreeChildren);
    505         }
    506 
    507         const char *rsInfo = (const char *) dlsym(mScriptSO, ".rs.info");
    508         if (rsInfo) {
    509             //ALOGE("Found .rs.info(): %p - %s", rsInfo, rsInfo);
    510         }
    511 
    512         size_t varCount = 0;
    513         if (strgets(line, MAXLINE, &rsInfo) == NULL) {
    514             goto error;
    515         }
    516         if (sscanf(line, EXPORT_VAR_STR "%zu", &varCount) != 1) {
    517             ALOGE("Invalid export var count!: %s", line);
    518             goto error;
    519         }
    520 
    521         mExportedVariableCount = varCount;
    522         //ALOGE("varCount: %zu", varCount);
    523         if (varCount > 0) {
    524             // Start by creating/zeroing this member, since we don't want to
    525             // accidentally clean up invalid pointers later (if we error out).
    526             mFieldIsObject = new bool[varCount];
    527             if (mFieldIsObject == NULL) {
    528                 goto error;
    529             }
    530             memset(mFieldIsObject, 0, varCount * sizeof(*mFieldIsObject));
    531             mFieldAddress = new void*[varCount];
    532             if (mFieldAddress == NULL) {
    533                 goto error;
    534             }
    535             for (size_t i = 0; i < varCount; ++i) {
    536                 if (strgets(line, MAXLINE, &rsInfo) == NULL) {
    537                     goto error;
    538                 }
    539                 char *c = strrchr(line, '\n');
    540                 if (c) {
    541                     *c = '\0';
    542                 }
    543                 mFieldAddress[i] = dlsym(mScriptSO, line);
    544                 if (mFieldAddress[i] == NULL) {
    545                     ALOGE("Failed to find variable address for %s: %s",
    546                           line, dlerror());
    547                     // Not a critical error if we don't find a global variable.
    548                 }
    549                 else {
    550                     //ALOGE("Found variable %s at %p", line,
    551                     //mFieldAddress[i]);
    552                 }
    553             }
    554         }
    555 
    556         size_t funcCount = 0;
    557         if (strgets(line, MAXLINE, &rsInfo) == NULL) {
    558             goto error;
    559         }
    560         if (sscanf(line, EXPORT_FUNC_STR "%zu", &funcCount) != 1) {
    561             ALOGE("Invalid export func count!: %s", line);
    562             goto error;
    563         }
    564 
    565         mExportedFunctionCount = funcCount;
    566         //ALOGE("funcCount: %zu", funcCount);
    567 
    568         if (funcCount > 0) {
    569             mInvokeFunctions = new InvokeFunc_t[funcCount];
    570             if (mInvokeFunctions == NULL) {
    571                 goto error;
    572             }
    573             for (size_t i = 0; i < funcCount; ++i) {
    574                 if (strgets(line, MAXLINE, &rsInfo) == NULL) {
    575                     goto error;
    576                 }
    577                 char *c = strrchr(line, '\n');
    578                 if (c) {
    579                     *c = '\0';
    580                 }
    581 
    582                 mInvokeFunctions[i] = (InvokeFunc_t) dlsym(mScriptSO, line);
    583                 if (mInvokeFunctions[i] == NULL) {
    584                     ALOGE("Failed to get function address for %s(): %s",
    585                           line, dlerror());
    586                     goto error;
    587                 }
    588                 else {
    589                     //ALOGE("Found InvokeFunc_t %s at %p", line, mInvokeFunctions[i]);
    590                 }
    591             }
    592         }
    593 
    594         size_t forEachCount = 0;
    595         if (strgets(line, MAXLINE, &rsInfo) == NULL) {
    596             goto error;
    597         }
    598         if (sscanf(line, EXPORT_FOREACH_STR "%zu", &forEachCount) != 1) {
    599             ALOGE("Invalid export forEach count!: %s", line);
    600             goto error;
    601         }
    602 
    603         if (forEachCount > 0) {
    604 
    605             mForEachSignatures = new uint32_t[forEachCount];
    606             if (mForEachSignatures == NULL) {
    607                 goto error;
    608             }
    609             mForEachFunctions = new ForEachFunc_t[forEachCount];
    610             if (mForEachFunctions == NULL) {
    611                 goto error;
    612             }
    613             for (size_t i = 0; i < forEachCount; ++i) {
    614                 unsigned int tmpSig = 0;
    615                 char tmpName[MAXLINE];
    616 
    617                 if (strgets(line, MAXLINE, &rsInfo) == NULL) {
    618                     goto error;
    619                 }
    620                 if (sscanf(line, "%u - %" MAKE_STR(MAXLINE) "s",
    621                            &tmpSig, tmpName) != 2) {
    622                     ALOGE("Invalid export forEach!: %s", line);
    623                     goto error;
    624                 }
    625 
    626                 // Lookup the expanded ForEach kernel.
    627                 strncat(tmpName, ".expand", MAXLINE-1-strlen(tmpName));
    628                 mForEachSignatures[i] = tmpSig;
    629                 mForEachFunctions[i] =
    630                         (ForEachFunc_t) dlsym(mScriptSO, tmpName);
    631                 if (i != 0 && mForEachFunctions[i] == NULL) {
    632                     // Ignore missing root.expand functions.
    633                     // root() is always specified at location 0.
    634                     ALOGE("Failed to find forEach function address for %s: %s",
    635                           tmpName, dlerror());
    636                     goto error;
    637                 }
    638                 else {
    639                     //ALOGE("Found forEach %s at %p", tmpName, mForEachFunctions[i]);
    640                 }
    641             }
    642         }
    643 
    644         size_t objectSlotCount = 0;
    645         if (strgets(line, MAXLINE, &rsInfo) == NULL) {
    646             goto error;
    647         }
    648         if (sscanf(line, OBJECT_SLOT_STR "%zu", &objectSlotCount) != 1) {
    649             ALOGE("Invalid object slot count!: %s", line);
    650             goto error;
    651         }
    652 
    653         if (objectSlotCount > 0) {
    654             rsAssert(varCount > 0);
    655             for (size_t i = 0; i < objectSlotCount; ++i) {
    656                 uint32_t varNum = 0;
    657                 if (strgets(line, MAXLINE, &rsInfo) == NULL) {
    658                     goto error;
    659                 }
    660                 if (sscanf(line, "%u", &varNum) != 1) {
    661                     ALOGE("Invalid object slot!: %s", line);
    662                     goto error;
    663                 }
    664 
    665                 if (varNum < varCount) {
    666                     mFieldIsObject[varNum] = true;
    667                 }
    668             }
    669         }
    670 
    671         if (varCount > 0) {
    672             mBoundAllocs = new Allocation *[varCount];
    673             memset(mBoundAllocs, 0, varCount * sizeof(*mBoundAllocs));
    674         }
    675 
    676         if (mScriptSO == (void*)1) {
    677             //rsdLookupRuntimeStub(script, "acos");
    678         }
    679     } else {
    680         goto error;
    681     }
    682 #endif
    683 
    684     mCtx->unlockMutex();
    685     return true;
    686 
    687 #ifdef RS_COMPATIBILITY_LIB
    688 error:
    689 
    690     mCtx->unlockMutex();
    691     delete[] mInvokeFunctions;
    692     delete[] mForEachFunctions;
    693     delete[] mFieldAddress;
    694     delete[] mFieldIsObject;
    695     delete[] mForEachSignatures;
    696     delete[] mBoundAllocs;
    697     if (mScriptSO) {
    698         dlclose(mScriptSO);
    699     }
    700     return false;
    701 #endif
    702 }
    703 
    704 void RsdCpuScriptImpl::populateScript(Script *script) {
    705 #ifndef RS_COMPATIBILITY_LIB
    706     const bcc::RSInfo *info = &mExecutable->getInfo();
    707 
    708     // Copy info over to runtime
    709     script->mHal.info.exportedFunctionCount = info->getExportFuncNames().size();
    710     script->mHal.info.exportedVariableCount = info->getExportVarNames().size();
    711     script->mHal.info.exportedForeachFuncList = info->getExportForeachFuncs().array();
    712     script->mHal.info.exportedPragmaCount = info->getPragmas().size();
    713     script->mHal.info.exportedPragmaKeyList =
    714         const_cast<const char**>(mExecutable->getPragmaKeys().array());
    715     script->mHal.info.exportedPragmaValueList =
    716         const_cast<const char**>(mExecutable->getPragmaValues().array());
    717 
    718     if (mRootExpand) {
    719         script->mHal.info.root = mRootExpand;
    720     } else {
    721         script->mHal.info.root = mRoot;
    722     }
    723 #else
    724     // Copy info over to runtime
    725     script->mHal.info.exportedFunctionCount = mExportedFunctionCount;
    726     script->mHal.info.exportedVariableCount = mExportedVariableCount;
    727     script->mHal.info.exportedPragmaCount = 0;
    728     script->mHal.info.exportedPragmaKeyList = 0;
    729     script->mHal.info.exportedPragmaValueList = 0;
    730 
    731     // Bug, need to stash in metadata
    732     if (mRootExpand) {
    733         script->mHal.info.root = mRootExpand;
    734     } else {
    735         script->mHal.info.root = mRoot;
    736     }
    737 #endif
    738 }
    739 
    740 
    741 typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
    742 
    743 void RsdCpuScriptImpl::forEachMtlsSetup(const Allocation * ain, Allocation * aout,
    744                                         const void * usr, uint32_t usrLen,
    745                                         const RsScriptCall *sc,
    746                                         MTLaunchStruct *mtls) {
    747 
    748     memset(mtls, 0, sizeof(MTLaunchStruct));
    749 
    750     // possible for this to occur if IO_OUTPUT/IO_INPUT with no bound surface
    751     if (ain && (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr == NULL) {
    752         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null in allocations");
    753         return;
    754     }
    755     if (aout && (const uint8_t *)aout->mHal.drvState.lod[0].mallocPtr == NULL) {
    756         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null out allocations");
    757         return;
    758     }
    759 
    760     if (ain) {
    761         mtls->fep.dimX = ain->getType()->getDimX();
    762         mtls->fep.dimY = ain->getType()->getDimY();
    763         mtls->fep.dimZ = ain->getType()->getDimZ();
    764         //mtls->dimArray = ain->getType()->getDimArray();
    765     } else if (aout) {
    766         mtls->fep.dimX = aout->getType()->getDimX();
    767         mtls->fep.dimY = aout->getType()->getDimY();
    768         mtls->fep.dimZ = aout->getType()->getDimZ();
    769         //mtls->dimArray = aout->getType()->getDimArray();
    770     } else {
    771         mCtx->getContext()->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
    772         return;
    773     }
    774 
    775     if (!sc || (sc->xEnd == 0)) {
    776         mtls->xEnd = mtls->fep.dimX;
    777     } else {
    778         rsAssert(sc->xStart < mtls->fep.dimX);
    779         rsAssert(sc->xEnd <= mtls->fep.dimX);
    780         rsAssert(sc->xStart < sc->xEnd);
    781         mtls->xStart = rsMin(mtls->fep.dimX, sc->xStart);
    782         mtls->xEnd = rsMin(mtls->fep.dimX, sc->xEnd);
    783         if (mtls->xStart >= mtls->xEnd) return;
    784     }
    785 
    786     if (!sc || (sc->yEnd == 0)) {
    787         mtls->yEnd = mtls->fep.dimY;
    788     } else {
    789         rsAssert(sc->yStart < mtls->fep.dimY);
    790         rsAssert(sc->yEnd <= mtls->fep.dimY);
    791         rsAssert(sc->yStart < sc->yEnd);
    792         mtls->yStart = rsMin(mtls->fep.dimY, sc->yStart);
    793         mtls->yEnd = rsMin(mtls->fep.dimY, sc->yEnd);
    794         if (mtls->yStart >= mtls->yEnd) return;
    795     }
    796 
    797     if (!sc || (sc->zEnd == 0)) {
    798         mtls->zEnd = mtls->fep.dimZ;
    799     } else {
    800         rsAssert(sc->zStart < mtls->fep.dimZ);
    801         rsAssert(sc->zEnd <= mtls->fep.dimZ);
    802         rsAssert(sc->zStart < sc->zEnd);
    803         mtls->zStart = rsMin(mtls->fep.dimZ, sc->zStart);
    804         mtls->zEnd = rsMin(mtls->fep.dimZ, sc->zEnd);
    805         if (mtls->zStart >= mtls->zEnd) return;
    806     }
    807 
    808     mtls->xEnd = rsMax((uint32_t)1, mtls->xEnd);
    809     mtls->yEnd = rsMax((uint32_t)1, mtls->yEnd);
    810     mtls->zEnd = rsMax((uint32_t)1, mtls->zEnd);
    811     mtls->arrayEnd = rsMax((uint32_t)1, mtls->arrayEnd);
    812 
    813     rsAssert(!ain || (ain->getType()->getDimZ() == 0));
    814 
    815     mtls->rsc = mCtx;
    816     mtls->ain = ain;
    817     mtls->aout = aout;
    818     mtls->fep.usr = usr;
    819     mtls->fep.usrLen = usrLen;
    820     mtls->mSliceSize = 1;
    821     mtls->mSliceNum = 0;
    822 
    823     mtls->fep.ptrIn = NULL;
    824     mtls->fep.eStrideIn = 0;
    825     mtls->isThreadable = mIsThreadable;
    826 
    827     if (ain) {
    828         mtls->fep.ptrIn = (const uint8_t *)ain->mHal.drvState.lod[0].mallocPtr;
    829         mtls->fep.eStrideIn = ain->getType()->getElementSizeBytes();
    830         mtls->fep.yStrideIn = ain->mHal.drvState.lod[0].stride;
    831     }
    832 
    833     mtls->fep.ptrOut = NULL;
    834     mtls->fep.eStrideOut = 0;
    835     if (aout) {
    836         mtls->fep.ptrOut = (uint8_t *)aout->mHal.drvState.lod[0].mallocPtr;
    837         mtls->fep.eStrideOut = aout->getType()->getElementSizeBytes();
    838         mtls->fep.yStrideOut = aout->mHal.drvState.lod[0].stride;
    839     }
    840 }
    841 
    842 
    843 void RsdCpuScriptImpl::invokeForEach(uint32_t slot,
    844                                      const Allocation * ain,
    845                                      Allocation * aout,
    846                                      const void * usr,
    847                                      uint32_t usrLen,
    848                                      const RsScriptCall *sc) {
    849 
    850     MTLaunchStruct mtls;
    851     forEachMtlsSetup(ain, aout, usr, usrLen, sc, &mtls);
    852     forEachKernelSetup(slot, &mtls);
    853 
    854     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
    855     mCtx->launchThreads(ain, aout, sc, &mtls);
    856     mCtx->setTLS(oldTLS);
    857 }
    858 
    859 void RsdCpuScriptImpl::forEachKernelSetup(uint32_t slot, MTLaunchStruct *mtls) {
    860     mtls->script = this;
    861     mtls->fep.slot = slot;
    862 #ifndef RS_COMPATIBILITY_LIB
    863     rsAssert(slot < mExecutable->getExportForeachFuncAddrs().size());
    864     mtls->kernel = reinterpret_cast<ForEachFunc_t>(
    865                       mExecutable->getExportForeachFuncAddrs()[slot]);
    866     rsAssert(mtls->kernel != NULL);
    867     mtls->sig = mExecutable->getInfo().getExportForeachFuncs()[slot].second;
    868 #else
    869     mtls->kernel = reinterpret_cast<ForEachFunc_t>(mForEachFunctions[slot]);
    870     rsAssert(mtls->kernel != NULL);
    871     mtls->sig = mForEachSignatures[slot];
    872 #endif
    873 }
    874 
    875 int RsdCpuScriptImpl::invokeRoot() {
    876     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
    877     int ret = mRoot();
    878     mCtx->setTLS(oldTLS);
    879     return ret;
    880 }
    881 
    882 void RsdCpuScriptImpl::invokeInit() {
    883     if (mInit) {
    884         mInit();
    885     }
    886 }
    887 
    888 void RsdCpuScriptImpl::invokeFreeChildren() {
    889     if (mFreeChildren) {
    890         mFreeChildren();
    891     }
    892 }
    893 
    894 void RsdCpuScriptImpl::invokeFunction(uint32_t slot, const void *params,
    895                                       size_t paramLength) {
    896     //ALOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
    897 
    898     RsdCpuScriptImpl * oldTLS = mCtx->setTLS(this);
    899     reinterpret_cast<void (*)(const void *, uint32_t)>(
    900 #ifndef RS_COMPATIBILITY_LIB
    901         mExecutable->getExportFuncAddrs()[slot])(params, paramLength);
    902 #else
    903         mInvokeFunctions[slot])(params, paramLength);
    904 #endif
    905     mCtx->setTLS(oldTLS);
    906 }
    907 
    908 void RsdCpuScriptImpl::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
    909     //rsAssert(!script->mFieldIsObject[slot]);
    910     //ALOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
    911 
    912     //if (mIntrinsicID) {
    913         //mIntrinsicFuncs.setVar(dc, script, drv->mIntrinsicData, slot, data, dataLength);
    914         //return;
    915     //}
    916 
    917 #ifndef RS_COMPATIBILITY_LIB
    918     int32_t *destPtr = reinterpret_cast<int32_t *>(
    919                           mExecutable->getExportVarAddrs()[slot]);
    920 #else
    921     int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
    922 #endif
    923     if (!destPtr) {
    924         //ALOGV("Calling setVar on slot = %i which is null", slot);
    925         return;
    926     }
    927 
    928     memcpy(destPtr, data, dataLength);
    929 }
    930 
    931 void RsdCpuScriptImpl::getGlobalVar(uint32_t slot, void *data, size_t dataLength) {
    932     //rsAssert(!script->mFieldIsObject[slot]);
    933     //ALOGE("getGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
    934 
    935 #ifndef RS_COMPATIBILITY_LIB
    936     int32_t *srcPtr = reinterpret_cast<int32_t *>(
    937                           mExecutable->getExportVarAddrs()[slot]);
    938 #else
    939     int32_t *srcPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
    940 #endif
    941     if (!srcPtr) {
    942         //ALOGV("Calling setVar on slot = %i which is null", slot);
    943         return;
    944     }
    945     memcpy(data, srcPtr, dataLength);
    946 }
    947 
    948 
    949 void RsdCpuScriptImpl::setGlobalVarWithElemDims(uint32_t slot, const void *data, size_t dataLength,
    950                                                 const Element *elem,
    951                                                 const size_t *dims, size_t dimLength) {
    952 
    953 #ifndef RS_COMPATIBILITY_LIB
    954     int32_t *destPtr = reinterpret_cast<int32_t *>(
    955         mExecutable->getExportVarAddrs()[slot]);
    956 #else
    957     int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
    958 #endif
    959     if (!destPtr) {
    960         //ALOGV("Calling setVar on slot = %i which is null", slot);
    961         return;
    962     }
    963 
    964     // We want to look at dimension in terms of integer components,
    965     // but dimLength is given in terms of bytes.
    966     dimLength /= sizeof(int);
    967 
    968     // Only a single dimension is currently supported.
    969     rsAssert(dimLength == 1);
    970     if (dimLength == 1) {
    971         // First do the increment loop.
    972         size_t stride = elem->getSizeBytes();
    973         const char *cVal = reinterpret_cast<const char *>(data);
    974         for (size_t i = 0; i < dims[0]; i++) {
    975             elem->incRefs(cVal);
    976             cVal += stride;
    977         }
    978 
    979         // Decrement loop comes after (to prevent race conditions).
    980         char *oldVal = reinterpret_cast<char *>(destPtr);
    981         for (size_t i = 0; i < dims[0]; i++) {
    982             elem->decRefs(oldVal);
    983             oldVal += stride;
    984         }
    985     }
    986 
    987     memcpy(destPtr, data, dataLength);
    988 }
    989 
    990 void RsdCpuScriptImpl::setGlobalBind(uint32_t slot, Allocation *data) {
    991 
    992     //rsAssert(!script->mFieldIsObject[slot]);
    993     //ALOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
    994 
    995 #ifndef RS_COMPATIBILITY_LIB
    996     int32_t *destPtr = reinterpret_cast<int32_t *>(
    997                           mExecutable->getExportVarAddrs()[slot]);
    998 #else
    999     int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
   1000 #endif
   1001     if (!destPtr) {
   1002         //ALOGV("Calling setVar on slot = %i which is null", slot);
   1003         return;
   1004     }
   1005 
   1006     void *ptr = NULL;
   1007     mBoundAllocs[slot] = data;
   1008     if(data) {
   1009         ptr = data->mHal.drvState.lod[0].mallocPtr;
   1010     }
   1011     memcpy(destPtr, &ptr, sizeof(void *));
   1012 }
   1013 
   1014 void RsdCpuScriptImpl::setGlobalObj(uint32_t slot, ObjectBase *data) {
   1015 
   1016     //rsAssert(script->mFieldIsObject[slot]);
   1017     //ALOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
   1018 
   1019     //if (mIntrinsicID) {
   1020         //mIntrinsicFuncs.setVarObj(dc, script, drv->mIntrinsicData, slot, alloc);
   1021         //return;
   1022     //}
   1023 
   1024 #ifndef RS_COMPATIBILITY_LIB
   1025     int32_t *destPtr = reinterpret_cast<int32_t *>(
   1026                           mExecutable->getExportVarAddrs()[slot]);
   1027 #else
   1028     int32_t *destPtr = reinterpret_cast<int32_t *>(mFieldAddress[slot]);
   1029 #endif
   1030     if (!destPtr) {
   1031         //ALOGV("Calling setVar on slot = %i which is null", slot);
   1032         return;
   1033     }
   1034 
   1035     rsrSetObject(mCtx->getContext(), (ObjectBase **)destPtr, data);
   1036 }
   1037 
   1038 RsdCpuScriptImpl::~RsdCpuScriptImpl() {
   1039 #ifndef RS_COMPATIBILITY_LIB
   1040     if (mExecutable) {
   1041         Vector<void *>::const_iterator var_addr_iter =
   1042             mExecutable->getExportVarAddrs().begin();
   1043         Vector<void *>::const_iterator var_addr_end =
   1044             mExecutable->getExportVarAddrs().end();
   1045 
   1046         bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_iter =
   1047             mExecutable->getInfo().getObjectSlots().begin();
   1048         bcc::RSInfo::ObjectSlotListTy::const_iterator is_object_end =
   1049             mExecutable->getInfo().getObjectSlots().end();
   1050 
   1051         while ((var_addr_iter != var_addr_end) &&
   1052                (is_object_iter != is_object_end)) {
   1053             // The field address can be NULL if the script-side has optimized
   1054             // the corresponding global variable away.
   1055             ObjectBase **obj_addr =
   1056                 reinterpret_cast<ObjectBase **>(*var_addr_iter);
   1057             if (*is_object_iter) {
   1058                 if (*var_addr_iter != NULL) {
   1059                     rsrClearObject(mCtx->getContext(), obj_addr);
   1060                 }
   1061             }
   1062             var_addr_iter++;
   1063             is_object_iter++;
   1064         }
   1065     }
   1066 
   1067     if (mCompilerContext) {
   1068         delete mCompilerContext;
   1069     }
   1070     if (mCompilerDriver) {
   1071         delete mCompilerDriver;
   1072     }
   1073     if (mExecutable) {
   1074         delete mExecutable;
   1075     }
   1076     if (mBoundAllocs) {
   1077         delete[] mBoundAllocs;
   1078     }
   1079 #else
   1080     if (mFieldIsObject) {
   1081         for (size_t i = 0; i < mExportedVariableCount; ++i) {
   1082             if (mFieldIsObject[i]) {
   1083                 if (mFieldAddress[i] != NULL) {
   1084                     ObjectBase **obj_addr =
   1085                         reinterpret_cast<ObjectBase **>(mFieldAddress[i]);
   1086                     rsrClearObject(mCtx->getContext(), obj_addr);
   1087                 }
   1088             }
   1089         }
   1090     }
   1091 
   1092     if (mInvokeFunctions) delete[] mInvokeFunctions;
   1093     if (mForEachFunctions) delete[] mForEachFunctions;
   1094     if (mFieldAddress) delete[] mFieldAddress;
   1095     if (mFieldIsObject) delete[] mFieldIsObject;
   1096     if (mForEachSignatures) delete[] mForEachSignatures;
   1097     if (mBoundAllocs) delete[] mBoundAllocs;
   1098     if (mScriptSO) {
   1099         dlclose(mScriptSO);
   1100     }
   1101 #endif
   1102 }
   1103 
   1104 Allocation * RsdCpuScriptImpl::getAllocationForPointer(const void *ptr) const {
   1105     if (!ptr) {
   1106         return NULL;
   1107     }
   1108 
   1109     for (uint32_t ct=0; ct < mScript->mHal.info.exportedVariableCount; ct++) {
   1110         Allocation *a = mBoundAllocs[ct];
   1111         if (!a) continue;
   1112         if (a->mHal.drvState.lod[0].mallocPtr == ptr) {
   1113             return a;
   1114         }
   1115     }
   1116     ALOGE("rsGetAllocation, failed to find %p", ptr);
   1117     return NULL;
   1118 }
   1119 
   1120 void RsdCpuScriptImpl::preLaunch(uint32_t slot, const Allocation * ain,
   1121                        Allocation * aout, const void * usr,
   1122                        uint32_t usrLen, const RsScriptCall *sc)
   1123 {
   1124 }
   1125 
   1126 void RsdCpuScriptImpl::postLaunch(uint32_t slot, const Allocation * ain,
   1127                         Allocation * aout, const void * usr,
   1128                         uint32_t usrLen, const RsScriptCall *sc)
   1129 {
   1130 }
   1131 
   1132 
   1133 }
   1134 }
   1135