Home | History | Annotate | Download | only in driver
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsdCore.h"
     19 #include "rsdBcc.h"
     20 #include "rsdRuntime.h"
     21 
     22 #include <bcinfo/MetadataExtractor.h>
     23 
     24 #include "rsContext.h"
     25 #include "rsScriptC.h"
     26 
     27 #include "utils/Timers.h"
     28 #include "utils/StopWatch.h"
     29 extern "C" {
     30 #include "libdex/ZipArchive.h"
     31 }
     32 
     33 
     34 using namespace android;
     35 using namespace android::renderscript;
     36 
     37 struct DrvScript {
     38     int (*mRoot)();
     39     void (*mInit)();
     40     void (*mFreeChildren)();
     41 
     42     BCCScriptRef mBccScript;
     43 
     44     bcinfo::MetadataExtractor *ME;
     45 
     46     InvokeFunc_t *mInvokeFunctions;
     47     void ** mFieldAddress;
     48     bool * mFieldIsObject;
     49     const uint32_t *mExportForEachSignatureList;
     50 
     51     const uint8_t * mScriptText;
     52     uint32_t mScriptTextLength;
     53 };
     54 
     55 
     56 static Script * setTLS(Script *sc) {
     57     ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(rsdgThreadTLSKey);
     58     rsAssert(tls);
     59     Script *old = tls->mScript;
     60     tls->mScript = sc;
     61     return old;
     62 }
     63 
     64 
     65 bool rsdScriptInit(const Context *rsc,
     66                      ScriptC *script,
     67                      char const *resName,
     68                      char const *cacheDir,
     69                      uint8_t const *bitcode,
     70                      size_t bitcodeSize,
     71                      uint32_t flags) {
     72     //LOGE("rsdScriptCreate %p %p %p %p %i %i %p", rsc, resName, cacheDir, bitcode, bitcodeSize, flags, lookupFunc);
     73 
     74     pthread_mutex_lock(&rsdgInitMutex);
     75     char *cachePath = NULL;
     76     size_t exportFuncCount = 0;
     77     size_t exportVarCount = 0;
     78     size_t objectSlotCount = 0;
     79     size_t exportForEachSignatureCount = 0;
     80 
     81     DrvScript *drv = (DrvScript *)calloc(1, sizeof(DrvScript));
     82     if (drv == NULL) {
     83         goto error;
     84     }
     85     script->mHal.drv = drv;
     86 
     87     drv->mBccScript = bccCreateScript();
     88     script->mHal.info.isThreadable = true;
     89     drv->mScriptText = bitcode;
     90     drv->mScriptTextLength = bitcodeSize;
     91 
     92 
     93     drv->ME = new bcinfo::MetadataExtractor((const char*)drv->mScriptText,
     94                                             drv->mScriptTextLength);
     95     if (!drv->ME->extract()) {
     96       LOGE("bcinfo: failed to read script metadata");
     97       goto error;
     98     }
     99 
    100     //LOGE("mBccScript %p", script->mBccScript);
    101 
    102     if (bccRegisterSymbolCallback(drv->mBccScript, &rsdLookupRuntimeStub, script) != 0) {
    103         LOGE("bcc: FAILS to register symbol callback");
    104         goto error;
    105     }
    106 
    107     if (bccReadBC(drv->mBccScript,
    108                   resName,
    109                   (char const *)drv->mScriptText,
    110                   drv->mScriptTextLength, 0) != 0) {
    111         LOGE("bcc: FAILS to read bitcode");
    112         goto error;
    113     }
    114 
    115     if (bccLinkFile(drv->mBccScript, "/system/lib/libclcore.bc", 0) != 0) {
    116         LOGE("bcc: FAILS to link bitcode");
    117         goto error;
    118     }
    119 
    120     if (bccPrepareExecutable(drv->mBccScript, cacheDir, resName, 0) != 0) {
    121         LOGE("bcc: FAILS to prepare executable");
    122         goto error;
    123     }
    124 
    125     free(cachePath);
    126 
    127     drv->mRoot = reinterpret_cast<int (*)()>(bccGetFuncAddr(drv->mBccScript, "root"));
    128     drv->mInit = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, "init"));
    129     drv->mFreeChildren = reinterpret_cast<void (*)()>(bccGetFuncAddr(drv->mBccScript, ".rs.dtor"));
    130 
    131     exportFuncCount = drv->ME->getExportFuncCount();
    132     if (exportFuncCount > 0) {
    133         drv->mInvokeFunctions = (InvokeFunc_t*) calloc(exportFuncCount,
    134                                                        sizeof(InvokeFunc_t));
    135         bccGetExportFuncList(drv->mBccScript, exportFuncCount,
    136                              (void **) drv->mInvokeFunctions);
    137     } else {
    138         drv->mInvokeFunctions = NULL;
    139     }
    140 
    141     exportVarCount = drv->ME->getExportVarCount();
    142     if (exportVarCount > 0) {
    143         drv->mFieldAddress = (void **) calloc(exportVarCount, sizeof(void*));
    144         drv->mFieldIsObject = (bool *) calloc(exportVarCount, sizeof(bool));
    145         bccGetExportVarList(drv->mBccScript, exportVarCount,
    146                             (void **) drv->mFieldAddress);
    147     } else {
    148         drv->mFieldAddress = NULL;
    149         drv->mFieldIsObject = NULL;
    150     }
    151 
    152     objectSlotCount = drv->ME->getObjectSlotCount();
    153     if (objectSlotCount > 0) {
    154         const uint32_t *objectSlotList = drv->ME->getObjectSlotList();
    155         for (uint32_t ct=0; ct < objectSlotCount; ct++) {
    156             drv->mFieldIsObject[objectSlotList[ct]] = true;
    157         }
    158     }
    159 
    160     exportForEachSignatureCount = drv->ME->getExportForEachSignatureCount();
    161     rsAssert(exportForEachSignatureCount <= 1);
    162     drv->mExportForEachSignatureList = drv->ME->getExportForEachSignatureList();
    163 
    164     // Copy info over to runtime
    165     script->mHal.info.exportedFunctionCount = drv->ME->getExportFuncCount();
    166     script->mHal.info.exportedVariableCount = drv->ME->getExportVarCount();
    167     script->mHal.info.exportedPragmaCount = drv->ME->getPragmaCount();
    168     script->mHal.info.exportedPragmaKeyList = drv->ME->getPragmaKeyList();
    169     script->mHal.info.exportedPragmaValueList = drv->ME->getPragmaValueList();
    170     script->mHal.info.root = drv->mRoot;
    171 
    172     pthread_mutex_unlock(&rsdgInitMutex);
    173     return true;
    174 
    175 error:
    176 
    177     pthread_mutex_unlock(&rsdgInitMutex);
    178     if (drv->ME) {
    179         delete drv->ME;
    180         drv->ME = NULL;
    181     }
    182     free(drv);
    183     return false;
    184 
    185 }
    186 
    187 typedef struct {
    188     Context *rsc;
    189     Script *script;
    190     uint32_t sig;
    191     const Allocation * ain;
    192     Allocation * aout;
    193     const void * usr;
    194     size_t usrLen;
    195 
    196     uint32_t mSliceSize;
    197     volatile int mSliceNum;
    198 
    199     const uint8_t *ptrIn;
    200     uint32_t eStrideIn;
    201     uint8_t *ptrOut;
    202     uint32_t eStrideOut;
    203 
    204     uint32_t xStart;
    205     uint32_t xEnd;
    206     uint32_t yStart;
    207     uint32_t yEnd;
    208     uint32_t zStart;
    209     uint32_t zEnd;
    210     uint32_t arrayStart;
    211     uint32_t arrayEnd;
    212 
    213     uint32_t dimX;
    214     uint32_t dimY;
    215     uint32_t dimZ;
    216     uint32_t dimArray;
    217 } MTLaunchStruct;
    218 typedef void (*rs_t)(const void *, void *, const void *, uint32_t, uint32_t, uint32_t, uint32_t);
    219 
    220 static void wc_xy(void *usr, uint32_t idx) {
    221     MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
    222     RsForEachStubParamStruct p;
    223     memset(&p, 0, sizeof(p));
    224     p.usr = mtls->usr;
    225     p.usr_len = mtls->usrLen;
    226     RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
    227     uint32_t sig = mtls->sig;
    228 
    229     while (1) {
    230         uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
    231         uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
    232         uint32_t yEnd = yStart + mtls->mSliceSize;
    233         yEnd = rsMin(yEnd, mtls->yEnd);
    234         if (yEnd <= yStart) {
    235             return;
    236         }
    237 
    238         //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
    239         //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
    240         for (p.y = yStart; p.y < yEnd; p.y++) {
    241             uint32_t offset = mtls->dimX * p.y;
    242             uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
    243             const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
    244 
    245             for (p.x = mtls->xStart; p.x < mtls->xEnd; p.x++) {
    246                 p.in = xPtrIn;
    247                 p.out = xPtrOut;
    248                 dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
    249                 xPtrIn += mtls->eStrideIn;
    250                 xPtrOut += mtls->eStrideOut;
    251             }
    252         }
    253     }
    254 }
    255 
    256 static void wc_x(void *usr, uint32_t idx) {
    257     MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
    258     RsForEachStubParamStruct p;
    259     memset(&p, 0, sizeof(p));
    260     p.usr = mtls->usr;
    261     p.usr_len = mtls->usrLen;
    262     RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
    263     uint32_t sig = mtls->sig;
    264 
    265     while (1) {
    266         uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
    267         uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
    268         uint32_t xEnd = xStart + mtls->mSliceSize;
    269         xEnd = rsMin(xEnd, mtls->xEnd);
    270         if (xEnd <= xStart) {
    271             return;
    272         }
    273 
    274         //LOGE("usr idx %i, x %i,%i  y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
    275         //LOGE("usr ptr in %p,  out %p", mtls->ptrIn, mtls->ptrOut);
    276         uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
    277         const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
    278         for (p.x = xStart; p.x < xEnd; p.x++) {
    279             p.in = xPtrIn;
    280             p.out = xPtrOut;
    281             dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
    282             xPtrIn += mtls->eStrideIn;
    283             xPtrOut += mtls->eStrideOut;
    284         }
    285     }
    286 }
    287 
    288 void rsdScriptInvokeForEach(const Context *rsc,
    289                             Script *s,
    290                             uint32_t slot,
    291                             const Allocation * ain,
    292                             Allocation * aout,
    293                             const void * usr,
    294                             uint32_t usrLen,
    295                             const RsScriptCall *sc) {
    296 
    297     RsdHal * dc = (RsdHal *)rsc->mHal.drv;
    298 
    299     MTLaunchStruct mtls;
    300     memset(&mtls, 0, sizeof(mtls));
    301 
    302     DrvScript *drv = (DrvScript *)s->mHal.drv;
    303     // We only support slot 0 (root) at this point in time.
    304     rsAssert(slot == 0);
    305     mtls.sig = 0x1f;  // temp fix for old apps, full table in slang_rs_export_foreach.cpp
    306     if (drv->mExportForEachSignatureList) {
    307         mtls.sig = drv->mExportForEachSignatureList[slot];
    308     }
    309     if (ain) {
    310         mtls.dimX = ain->getType()->getDimX();
    311         mtls.dimY = ain->getType()->getDimY();
    312         mtls.dimZ = ain->getType()->getDimZ();
    313         //mtls.dimArray = ain->getType()->getDimArray();
    314     } else if (aout) {
    315         mtls.dimX = aout->getType()->getDimX();
    316         mtls.dimY = aout->getType()->getDimY();
    317         mtls.dimZ = aout->getType()->getDimZ();
    318         //mtls.dimArray = aout->getType()->getDimArray();
    319     } else {
    320         rsc->setError(RS_ERROR_BAD_SCRIPT, "rsForEach called with null allocations");
    321         return;
    322     }
    323 
    324     if (!sc || (sc->xEnd == 0)) {
    325         mtls.xEnd = mtls.dimX;
    326     } else {
    327         rsAssert(sc->xStart < mtls.dimX);
    328         rsAssert(sc->xEnd <= mtls.dimX);
    329         rsAssert(sc->xStart < sc->xEnd);
    330         mtls.xStart = rsMin(mtls.dimX, sc->xStart);
    331         mtls.xEnd = rsMin(mtls.dimX, sc->xEnd);
    332         if (mtls.xStart >= mtls.xEnd) return;
    333     }
    334 
    335     if (!sc || (sc->yEnd == 0)) {
    336         mtls.yEnd = mtls.dimY;
    337     } else {
    338         rsAssert(sc->yStart < mtls.dimY);
    339         rsAssert(sc->yEnd <= mtls.dimY);
    340         rsAssert(sc->yStart < sc->yEnd);
    341         mtls.yStart = rsMin(mtls.dimY, sc->yStart);
    342         mtls.yEnd = rsMin(mtls.dimY, sc->yEnd);
    343         if (mtls.yStart >= mtls.yEnd) return;
    344     }
    345 
    346     mtls.xEnd = rsMax((uint32_t)1, mtls.xEnd);
    347     mtls.yEnd = rsMax((uint32_t)1, mtls.yEnd);
    348     mtls.zEnd = rsMax((uint32_t)1, mtls.zEnd);
    349     mtls.arrayEnd = rsMax((uint32_t)1, mtls.arrayEnd);
    350 
    351     rsAssert(!ain || (ain->getType()->getDimZ() == 0));
    352 
    353     Context *mrsc = (Context *)rsc;
    354     Script * oldTLS = setTLS(s);
    355 
    356     mtls.rsc = mrsc;
    357     mtls.ain = ain;
    358     mtls.aout = aout;
    359     mtls.script = s;
    360     mtls.usr = usr;
    361     mtls.usrLen = usrLen;
    362     mtls.mSliceSize = 10;
    363     mtls.mSliceNum = 0;
    364 
    365     mtls.ptrIn = NULL;
    366     mtls.eStrideIn = 0;
    367     if (ain) {
    368         mtls.ptrIn = (const uint8_t *)ain->getPtr();
    369         mtls.eStrideIn = ain->getType()->getElementSizeBytes();
    370     }
    371 
    372     mtls.ptrOut = NULL;
    373     mtls.eStrideOut = 0;
    374     if (aout) {
    375         mtls.ptrOut = (uint8_t *)aout->getPtr();
    376         mtls.eStrideOut = aout->getType()->getElementSizeBytes();
    377     }
    378 
    379     if ((dc->mWorkers.mCount > 1) && s->mHal.info.isThreadable) {
    380         if (mtls.dimY > 1) {
    381             rsdLaunchThreads(mrsc, wc_xy, &mtls);
    382         } else {
    383             rsdLaunchThreads(mrsc, wc_x, &mtls);
    384         }
    385 
    386         //LOGE("launch 1");
    387     } else {
    388         RsForEachStubParamStruct p;
    389         memset(&p, 0, sizeof(p));
    390         p.usr = mtls.usr;
    391         p.usr_len = mtls.usrLen;
    392         uint32_t sig = mtls.sig;
    393 
    394         //LOGE("launch 3");
    395         for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
    396             for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
    397                 for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
    398                     uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
    399                                       mtls.dimX * mtls.dimY * p.z +
    400                                       mtls.dimX * p.y;
    401                     uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
    402                     const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
    403 
    404                     for (p.x = mtls.xStart; p.x < mtls.xEnd; p.x++) {
    405                         p.in = xPtrIn;
    406                         p.out = xPtrOut;
    407                         dc->mForEachLaunch[sig](&s->mHal.info.root, &p);
    408                         xPtrIn += mtls.eStrideIn;
    409                         xPtrOut += mtls.eStrideOut;
    410                     }
    411                 }
    412             }
    413         }
    414     }
    415 
    416     setTLS(oldTLS);
    417 }
    418 
    419 
    420 int rsdScriptInvokeRoot(const Context *dc, Script *script) {
    421     DrvScript *drv = (DrvScript *)script->mHal.drv;
    422 
    423     Script * oldTLS = setTLS(script);
    424     int ret = drv->mRoot();
    425     setTLS(oldTLS);
    426 
    427     return ret;
    428 }
    429 
    430 void rsdScriptInvokeInit(const Context *dc, Script *script) {
    431     DrvScript *drv = (DrvScript *)script->mHal.drv;
    432 
    433     if (drv->mInit) {
    434         drv->mInit();
    435     }
    436 }
    437 
    438 void rsdScriptInvokeFreeChildren(const Context *dc, Script *script) {
    439     DrvScript *drv = (DrvScript *)script->mHal.drv;
    440 
    441     if (drv->mFreeChildren) {
    442         drv->mFreeChildren();
    443     }
    444 }
    445 
    446 void rsdScriptInvokeFunction(const Context *dc, Script *script,
    447                             uint32_t slot,
    448                             const void *params,
    449                             size_t paramLength) {
    450     DrvScript *drv = (DrvScript *)script->mHal.drv;
    451     //LOGE("invoke %p %p %i %p %i", dc, script, slot, params, paramLength);
    452 
    453     Script * oldTLS = setTLS(script);
    454     ((void (*)(const void *, uint32_t))
    455         drv->mInvokeFunctions[slot])(params, paramLength);
    456     setTLS(oldTLS);
    457 }
    458 
    459 void rsdScriptSetGlobalVar(const Context *dc, const Script *script,
    460                            uint32_t slot, void *data, size_t dataLength) {
    461     DrvScript *drv = (DrvScript *)script->mHal.drv;
    462     //rsAssert(!script->mFieldIsObject[slot]);
    463     //LOGE("setGlobalVar %p %p %i %p %i", dc, script, slot, data, dataLength);
    464 
    465     int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
    466     if (!destPtr) {
    467         //LOGV("Calling setVar on slot = %i which is null", slot);
    468         return;
    469     }
    470 
    471     memcpy(destPtr, data, dataLength);
    472 }
    473 
    474 void rsdScriptSetGlobalBind(const Context *dc, const Script *script, uint32_t slot, void *data) {
    475     DrvScript *drv = (DrvScript *)script->mHal.drv;
    476     //rsAssert(!script->mFieldIsObject[slot]);
    477     //LOGE("setGlobalBind %p %p %i %p", dc, script, slot, data);
    478 
    479     int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
    480     if (!destPtr) {
    481         //LOGV("Calling setVar on slot = %i which is null", slot);
    482         return;
    483     }
    484 
    485     memcpy(destPtr, &data, sizeof(void *));
    486 }
    487 
    488 void rsdScriptSetGlobalObj(const Context *dc, const Script *script, uint32_t slot, ObjectBase *data) {
    489     DrvScript *drv = (DrvScript *)script->mHal.drv;
    490     //rsAssert(script->mFieldIsObject[slot]);
    491     //LOGE("setGlobalObj %p %p %i %p", dc, script, slot, data);
    492 
    493     int32_t *destPtr = ((int32_t **)drv->mFieldAddress)[slot];
    494     if (!destPtr) {
    495         //LOGV("Calling setVar on slot = %i which is null", slot);
    496         return;
    497     }
    498 
    499     rsrSetObject(dc, script, (ObjectBase **)destPtr, data);
    500 }
    501 
    502 void rsdScriptDestroy(const Context *dc, Script *script) {
    503     DrvScript *drv = (DrvScript *)script->mHal.drv;
    504 
    505     if (drv->mFieldAddress) {
    506         size_t exportVarCount = drv->ME->getExportVarCount();
    507         for (size_t ct = 0; ct < exportVarCount; ct++) {
    508             if (drv->mFieldIsObject[ct]) {
    509                 // The field address can be NULL if the script-side has
    510                 // optimized the corresponding global variable away.
    511                 if (drv->mFieldAddress[ct]) {
    512                     rsrClearObject(dc, script, (ObjectBase **)drv->mFieldAddress[ct]);
    513                 }
    514             }
    515         }
    516         free(drv->mFieldAddress);
    517         drv->mFieldAddress = NULL;
    518         free(drv->mFieldIsObject);
    519         drv->mFieldIsObject = NULL;    }
    520 
    521     if (drv->mInvokeFunctions) {
    522         free(drv->mInvokeFunctions);
    523         drv->mInvokeFunctions = NULL;
    524     }
    525 
    526     delete drv->ME;
    527     drv->ME = NULL;
    528 
    529     free(drv);
    530     script->mHal.drv = NULL;
    531 
    532 }
    533 
    534 
    535