Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuCore.h"
     18 #include "rsCpuScript.h"
     19 #include "rsScriptGroup.h"
     20 #include "rsCpuScriptGroup.h"
     21 //#include "rsdBcc.h"
     22 //#include "rsdAllocation.h"
     23 
     24 using namespace android;
     25 using namespace android::renderscript;
     26 
     27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
     28     mCtx = ctx;
     29     mSG = sg;
     30 }
     31 
     32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
     33 
     34 }
     35 
     36 bool CpuScriptGroupImpl::init() {
     37     return true;
     38 }
     39 
     40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
     41 }
     42 
     43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
     44 }
     45 
     46 
     47 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
     48                                       uint32_t xstart, uint32_t xend,
     49                                       uint32_t instep, uint32_t outstep);
     50 
     51 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
     52                                          uint32_t xstart, uint32_t xend,
     53                                          uint32_t instep, uint32_t outstep) {
     54 
     55 
     56     const ScriptList *sl = (const ScriptList *)p->usr;
     57     RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
     58     const void *oldUsr = p->usr;
     59 
     60     for(size_t ct=0; ct < sl->count; ct++) {
     61         ScriptGroupRootFunc_t func;
     62         func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
     63         mp->usr = sl->usrPtrs[ct];
     64 
     65         mp->ptrIn = NULL;
     66         mp->in = NULL;
     67         mp->ptrOut = NULL;
     68         mp->out = NULL;
     69 
     70         if (sl->ins[ct]) {
     71             mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
     72             mp->in = mp->ptrIn;
     73             if (sl->inExts[ct]) {
     74                 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
     75             } else {
     76                 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
     77                     mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
     78                 }
     79             }
     80         }
     81 
     82         if (sl->outs[ct]) {
     83             mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
     84             mp->out = mp->ptrOut;
     85             if (sl->outExts[ct]) {
     86                 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
     87             } else {
     88                 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
     89                     mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
     90                 }
     91             }
     92         }
     93 
     94         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
     95         func(p, xstart, xend, instep, outstep);
     96     }
     97     //ALOGE("script group root");
     98 
     99     //ConvolveParams *cp = (ConvolveParams *)p->usr;
    100 
    101     mp->usr = oldUsr;
    102 }
    103 
    104 
    105 
    106 void CpuScriptGroupImpl::execute() {
    107     Vector<Allocation *> ins;
    108     Vector<bool> inExts;
    109     Vector<Allocation *> outs;
    110     Vector<bool> outExts;
    111     Vector<const ScriptKernelID *> kernels;
    112     bool fieldDep = false;
    113 
    114     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
    115         ScriptGroup::Node *n = mSG->mNodes[ct];
    116         Script *s = n->mKernels[0]->mScript;
    117 
    118         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
    119 
    120         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
    121             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
    122                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
    123                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
    124             }
    125         }
    126 
    127         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
    128             const ScriptKernelID *k = n->mKernels[ct2];
    129             Allocation *ain = NULL;
    130             Allocation *aout = NULL;
    131             bool inExt = false;
    132             bool outExt = false;
    133 
    134             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
    135                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
    136                     ain = n->mInputs[ct3]->mAlloc.get();
    137                     //ALOGE(" link in %p", ain);
    138                 }
    139             }
    140             for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
    141                 if (mSG->mInputs[ct3]->mKernel == k) {
    142                     ain = mSG->mInputs[ct3]->mAlloc.get();
    143                     inExt = true;
    144                     //ALOGE(" io in %p", ain);
    145                 }
    146             }
    147 
    148             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
    149                 if (n->mOutputs[ct3]->mSource.get() == k) {
    150                     aout = n->mOutputs[ct3]->mAlloc.get();
    151                     if(n->mOutputs[ct3]->mDstField.get() != NULL) {
    152                         fieldDep = true;
    153                     }
    154                     //ALOGE(" link out %p", aout);
    155                 }
    156             }
    157             for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
    158                 if (mSG->mOutputs[ct3]->mKernel == k) {
    159                     aout = mSG->mOutputs[ct3]->mAlloc.get();
    160                     outExt = true;
    161                     //ALOGE(" io out %p", aout);
    162                 }
    163             }
    164 
    165             if ((k->mHasKernelOutput == (aout != NULL)) &&
    166                 (k->mHasKernelInput == (ain != NULL))) {
    167                 ins.add(ain);
    168                 inExts.add(inExt);
    169                 outs.add(aout);
    170                 outExts.add(outExt);
    171                 kernels.add(k);
    172             }
    173         }
    174 
    175     }
    176 
    177     MTLaunchStruct mtls;
    178 
    179     if(fieldDep) {
    180         for (size_t ct=0; ct < ins.size(); ct++) {
    181             Script *s = kernels[ct]->mScript;
    182             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    183             uint32_t slot = kernels[ct]->mSlot;
    184 
    185             si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
    186             si->forEachKernelSetup(slot, &mtls);
    187             mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
    188         }
    189     } else {
    190         ScriptList sl;
    191         sl.ins = ins.array();
    192         sl.outs = outs.array();
    193         sl.kernels = kernels.array();
    194         sl.count = kernels.size();
    195 
    196         Vector<const void *> usrPtrs;
    197         Vector<const void *> fnPtrs;
    198         Vector<uint32_t> sigs;
    199         for (size_t ct=0; ct < kernels.size(); ct++) {
    200             Script *s = kernels[ct]->mScript;
    201             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    202 
    203             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
    204             fnPtrs.add((void *)mtls.kernel);
    205             usrPtrs.add(mtls.fep.usr);
    206             sigs.add(mtls.fep.usrLen);
    207         }
    208         sl.sigs = sigs.array();
    209         sl.usrPtrs = usrPtrs.array();
    210         sl.fnPtrs = fnPtrs.array();
    211         sl.inExts = inExts.array();
    212         sl.outExts = outExts.array();
    213 
    214         Script *s = kernels[0]->mScript;
    215         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    216         si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
    217         mtls.script = NULL;
    218         mtls.kernel = (void (*)())&scriptGroupRoot;
    219         mtls.fep.usr = &sl;
    220         mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
    221     }
    222 }
    223 
    224 
    225