Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuCore.h"
     18 #include "rsCpuScript.h"
     19 #include "rsScriptGroup.h"
     20 #include "rsCpuScriptGroup.h"
     21 //#include "rsdBcc.h"
     22 //#include "rsdAllocation.h"
     23 
     24 using namespace android;
     25 using namespace android::renderscript;
     26 
     27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) {
     28     mCtx = ctx;
     29     mSG = sg;
     30 }
     31 
     32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
     33 
     34 }
     35 
     36 bool CpuScriptGroupImpl::init() {
     37     return true;
     38 }
     39 
     40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
     41 }
     42 
     43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
     44 }
     45 
     46 
     47 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p,
     48                                       uint32_t xstart, uint32_t xend,
     49                                       uint32_t instep, uint32_t outstep);
     50 
     51 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p,
     52                                          uint32_t xstart, uint32_t xend,
     53                                          uint32_t instep, uint32_t outstep) {
     54 
     55 
     56     const ScriptList *sl = (const ScriptList *)p->usr;
     57     RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p;
     58     const void *oldUsr = p->usr;
     59 
     60     for(size_t ct=0; ct < sl->count; ct++) {
     61         ScriptGroupRootFunc_t func;
     62         func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
     63         mp->usr = sl->usrPtrs[ct];
     64 
     65         mp->ptrIn = NULL;
     66         mp->in = NULL;
     67         mp->ptrOut = NULL;
     68         mp->out = NULL;
     69 
     70         uint32_t istep = 0;
     71         uint32_t ostep = 0;
     72 
     73         if (sl->ins[ct]) {
     74             mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
     75             istep = sl->ins[ct]->mHal.state.elementSizeBytes;
     76             mp->in = mp->ptrIn;
     77             if (sl->inExts[ct]) {
     78                 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y;
     79             } else {
     80                 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) {
     81                     mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid;
     82                 }
     83             }
     84         }
     85 
     86         if (sl->outs[ct]) {
     87             mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
     88             mp->out = mp->ptrOut;
     89             ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
     90             if (sl->outExts[ct]) {
     91                 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y;
     92             } else {
     93                 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) {
     94                     mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid;
     95                 }
     96             }
     97         }
     98 
     99         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
    100         func(p, xstart, xend, istep, ostep);
    101     }
    102     //ALOGE("script group root");
    103 
    104     //ConvolveParams *cp = (ConvolveParams *)p->usr;
    105 
    106     mp->usr = oldUsr;
    107 }
    108 
    109 
    110 
    111 void CpuScriptGroupImpl::execute() {
    112     Vector<Allocation *> ins;
    113     Vector<bool> inExts;
    114     Vector<Allocation *> outs;
    115     Vector<bool> outExts;
    116     Vector<const ScriptKernelID *> kernels;
    117     bool fieldDep = false;
    118 
    119     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
    120         ScriptGroup::Node *n = mSG->mNodes[ct];
    121         Script *s = n->mKernels[0]->mScript;
    122         if (s->hasObjectSlots()) {
    123             // Disable the ScriptGroup optimization if we have global RS
    124             // objects that might interfere between kernels.
    125             fieldDep = true;
    126         }
    127 
    128         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
    129 
    130         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
    131             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
    132                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
    133                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
    134             }
    135         }
    136 
    137         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
    138             const ScriptKernelID *k = n->mKernels[ct2];
    139             Allocation *ain = NULL;
    140             Allocation *aout = NULL;
    141             bool inExt = false;
    142             bool outExt = false;
    143 
    144             if (k->mScript->hasObjectSlots()) {
    145                 // Disable the ScriptGroup optimization if we have global RS
    146                 // objects that might interfere between kernels.
    147                 fieldDep = true;
    148             }
    149 
    150             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
    151                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
    152                     ain = n->mInputs[ct3]->mAlloc.get();
    153                     //ALOGE(" link in %p", ain);
    154                 }
    155             }
    156             for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
    157                 if (mSG->mInputs[ct3]->mKernel == k) {
    158                     ain = mSG->mInputs[ct3]->mAlloc.get();
    159                     inExt = true;
    160                     //ALOGE(" io in %p", ain);
    161                 }
    162             }
    163 
    164             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
    165                 if (n->mOutputs[ct3]->mSource.get() == k) {
    166                     aout = n->mOutputs[ct3]->mAlloc.get();
    167                     if(n->mOutputs[ct3]->mDstField.get() != NULL) {
    168                         fieldDep = true;
    169                     }
    170                     //ALOGE(" link out %p", aout);
    171                 }
    172             }
    173             for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
    174                 if (mSG->mOutputs[ct3]->mKernel == k) {
    175                     aout = mSG->mOutputs[ct3]->mAlloc.get();
    176                     outExt = true;
    177                     //ALOGE(" io out %p", aout);
    178                 }
    179             }
    180 
    181             if ((k->mHasKernelOutput == (aout != NULL)) &&
    182                 (k->mHasKernelInput == (ain != NULL))) {
    183                 ins.add(ain);
    184                 inExts.add(inExt);
    185                 outs.add(aout);
    186                 outExts.add(outExt);
    187                 kernels.add(k);
    188             }
    189         }
    190 
    191     }
    192 
    193     MTLaunchStruct mtls;
    194 
    195     if(fieldDep) {
    196         for (size_t ct=0; ct < ins.size(); ct++) {
    197             Script *s = kernels[ct]->mScript;
    198             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    199             uint32_t slot = kernels[ct]->mSlot;
    200 
    201             si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls);
    202             si->forEachKernelSetup(slot, &mtls);
    203             si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
    204             mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls);
    205             si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL);
    206         }
    207     } else {
    208         ScriptList sl;
    209         sl.ins = ins.array();
    210         sl.outs = outs.array();
    211         sl.kernels = kernels.array();
    212         sl.count = kernels.size();
    213 
    214         Vector<const void *> usrPtrs;
    215         Vector<const void *> fnPtrs;
    216         Vector<uint32_t> sigs;
    217         for (size_t ct=0; ct < kernels.size(); ct++) {
    218             Script *s = kernels[ct]->mScript;
    219             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    220 
    221             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
    222             fnPtrs.add((void *)mtls.kernel);
    223             usrPtrs.add(mtls.fep.usr);
    224             sigs.add(mtls.fep.usrLen);
    225             si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL);
    226         }
    227         sl.sigs = sigs.array();
    228         sl.usrPtrs = usrPtrs.array();
    229         sl.fnPtrs = fnPtrs.array();
    230         sl.inExts = inExts.array();
    231         sl.outExts = outExts.array();
    232 
    233         Script *s = kernels[0]->mScript;
    234         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    235         si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls);
    236         mtls.script = NULL;
    237         mtls.kernel = (void (*)())&scriptGroupRoot;
    238         mtls.fep.usr = &sl;
    239         mCtx->launchThreads(ins[0], outs[0], NULL, &mtls);
    240 
    241         for (size_t ct=0; ct < kernels.size(); ct++) {
    242             Script *s = kernels[ct]->mScript;
    243             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    244             si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL);
    245         }
    246     }
    247 }
    248 
    249 
    250