Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuCore.h"
     18 #include "rsCpuScript.h"
     19 #include "rsScriptGroup.h"
     20 #include "rsCpuScriptGroup.h"
     21 //#include "rsdBcc.h"
     22 //#include "rsdAllocation.h"
     23 
     24 using namespace android;
     25 using namespace android::renderscript;
     26 
     27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
     28     mCtx = ctx;
     29     mSG = sg;
     30 }
     31 
     32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
     33 
     34 }
     35 
     36 bool CpuScriptGroupImpl::init() {
     37     return true;
     38 }
     39 
     40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
     41 }
     42 
     43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
     44 }
     45 
     46 
     47 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
     48                                       uint32_t xstart, uint32_t xend,
     49                                       uint32_t instep, uint32_t outstep);
     50 
     51 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
     52                                          uint32_t xstart, uint32_t xend,
     53                                          uint32_t instep, uint32_t outstep) {
     54 
     55 
     56     const ScriptList *sl = (const ScriptList *)p->usr;
     57     RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
     58     const void *oldUsr = p->usr;
     59 
     60     for(size_t ct=0; ct < sl->count; ct++) {
     61         ScriptGroupRootFunc_t func;
     62         func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
     63         mp->usr = sl->usrPtrs[ct];
     64 
     65         mp->ptrIn = NULL;
     66         mp->in = NULL;
     67         mp->ptrOut = NULL;
     68         mp->out = NULL;
     69 
     70         uint32_t istep = 0;
     71         uint32_t ostep = 0;
     72 
     73         if (sl->ins[ct]) {
     74             mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
     75             istep = sl->ins[ct]->mHal.state.elementSizeBytes;
     76             mp->in = mp->ptrIn;
     77             if (sl->inExts[ct]) {
     78                 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
     79             } else {
     80                 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
     81                     mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
     82                 }
     83             }
     84         }
     85 
     86         if (sl->outs[ct]) {
     87             mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
     88             mp->out = mp->ptrOut;
     89             ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
     90             if (sl->outExts[ct]) {
     91                 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
     92             } else {
     93                 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
     94                     mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
     95                 }
     96             }
     97         }
     98 
     99         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
    100         func(p, xstart, xend, istep, ostep);
    101     }
    102     //ALOGE("script group root");
    103 
    104     //ConvolveParams *cp = (ConvolveParams *)p->usr;
    105 
    106     mp->usr = oldUsr;
    107 }
    108 
    109 
    110 
    111 void CpuScriptGroupImpl::execute() {
    112     Vector<Allocation *> ins;
    113     Vector<bool> inExts;
    114     Vector<Allocation *> outs;
    115     Vector<bool> outExts;
    116     Vector<const ScriptKernelID *> kernels;
    117     bool fieldDep = false;
    118 
    119     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
    120         ScriptGroup::Node *n = mSG->mNodes[ct];
    121         Script *s = n->mKernels[0]->mScript;
    122         if (s->hasObjectSlots()) {
    123             // Disable the ScriptGroup optimization if we have global RS
    124             // objects that might interfere between kernels.
    125             fieldDep = true;
    126         }
    127 
    128         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
    129 
    130         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
    131             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
    132                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
    133                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
    134             }
    135         }
    136 
    137         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
    138             const ScriptKernelID *k = n->mKernels[ct2];
    139             Allocation *ain = NULL;
    140             Allocation *aout = NULL;
    141             bool inExt = false;
    142             bool outExt = false;
    143 
    144             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
    145                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
    146                     ain = n->mInputs[ct3]->mAlloc.get();
    147                     break;
    148                 }
    149             }
    150             if (ain == NULL) {
    151                 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
    152                     if (mSG->mInputs[ct3]->mKernel == k) {
    153                         ain = mSG->mInputs[ct3]->mAlloc.get();
    154                         inExt = true;
    155                         break;
    156                     }
    157                 }
    158             }
    159 
    160             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
    161                 if (n->mOutputs[ct3]->mSource.get() == k) {
    162                     aout = n->mOutputs[ct3]->mAlloc.get();
    163                     if(n->mOutputs[ct3]->mDstField.get() != NULL) {
    164                         fieldDep = true;
    165                     }
    166                     break;
    167                 }
    168             }
    169             if (aout == NULL) {
    170                 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
    171                     if (mSG->mOutputs[ct3]->mKernel == k) {
    172                         aout = mSG->mOutputs[ct3]->mAlloc.get();
    173                         outExt = true;
    174                         break;
    175                     }
    176                 }
    177             }
    178 
    179             rsAssert((k->mHasKernelOutput == (aout != NULL)) &&
    180                      (k->mHasKernelInput == (ain != NULL)));
    181 
    182             ins.add(ain);
    183             inExts.add(inExt);
    184             outs.add(aout);
    185             outExts.add(outExt);
    186             kernels.add(k);
    187         }
    188 
    189     }
    190 
    191     MTLaunchStruct mtls;
    192 
    193     if(fieldDep) {
    194         for (size_t ct=0; ct < ins.size(); ct++) {
    195             Script *s = kernels[ct]->mScript;
    196             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    197             uint32_t slot = kernels[ct]->mSlot;
    198 
    199             si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
    200             si->forEachKernelSetup(slot, &mtls);
    201             si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
    202             mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
    203             si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
    204         }
    205     } else {
    206         ScriptList sl;
    207         sl.ins = ins.array();
    208         sl.outs = outs.array();
    209         sl.kernels = kernels.array();
    210         sl.count = kernels.size();
    211 
    212         Vector<const void *> usrPtrs;
    213         Vector<const void *> fnPtrs;
    214         Vector<uint32_t> sigs;
    215         for (size_t ct=0; ct < kernels.size(); ct++) {
    216             Script *s = kernels[ct]->mScript;
    217             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    218 
    219             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
    220             fnPtrs.add((void *)mtls.kernel);
    221             usrPtrs.add(mtls.fep.usr);
    222             sigs.add(mtls.fep.usrLen);
    223             si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
    224         }
    225         sl.sigs = sigs.array();
    226         sl.usrPtrs = usrPtrs.array();
    227         sl.fnPtrs = fnPtrs.array();
    228         sl.inExts = inExts.array();
    229         sl.outExts = outExts.array();
    230 
    231         Script *s = kernels[0]->mScript;
    232         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    233         si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
    234         mtls.script = NULL;
    235         mtls.kernel = (void (*)())&scriptGroupRoot;
    236         mtls.fep.usr = &sl;
    237         mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
    238 
    239         for (size_t ct=0; ct < kernels.size(); ct++) {
    240             Script *s = kernels[ct]->mScript;
    241             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    242             si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
    243         }
    244     }
    245 }
    246 
    247 
    248