Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuCore.h"
     18 #include "rsCpuScript.h"
     19 #include "rsScriptGroup.h"
     20 #include "rsCpuScriptGroup.h"
     21 
     22 using namespace android;
     23 using namespace android::renderscript;
     24 
     25 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
     26     mCtx = ctx;
     27     mSG = (ScriptGroup*)sg;
     28 }
     29 
     30 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
     31 
     32 }
     33 
     34 bool CpuScriptGroupImpl::init() {
     35     return true;
     36 }
     37 
     38 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
     39 }
     40 
     41 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
     42 }
     43 
     44 
     45 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
     46                                       uint32_t xstart, uint32_t xend,
     47                                       uint32_t outstep);
     48 
     49 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
     50                                          uint32_t xstart, uint32_t xend,
     51                                          uint32_t outstep) {
     52 
     53 
     54     const ScriptList *sl             = (const ScriptList *)kinfo->usr;
     55     RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
     56 
     57     const uint32_t oldInStride = mkinfo->inStride[0];
     58 
     59     for (size_t ct = 0; ct < sl->count; ct++) {
     60         ScriptGroupRootFunc_t func;
     61         func          = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
     62         mkinfo->usr   = sl->usrPtrs[ct];
     63 
     64         if (sl->ins[ct]) {
     65             rsAssert(kinfo->inLen == 1);
     66 
     67             mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
     68 
     69             mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
     70 
     71             if (sl->inExts[ct]) {
     72                 mkinfo->inPtr[0] =
     73                   (mkinfo->inPtr[0] +
     74                    sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
     75 
     76             } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
     77                 mkinfo->inPtr[0] =
     78                   (mkinfo->inPtr[0] +
     79                    sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
     80             }
     81 
     82         } else {
     83             rsAssert(kinfo->inLen == 0);
     84 
     85             mkinfo->inPtr[0]     = nullptr;
     86             mkinfo->inStride[0]  = 0;
     87         }
     88 
     89         uint32_t ostep;
     90         if (sl->outs[ct]) {
     91             rsAssert(kinfo->outLen == 1);
     92 
     93             mkinfo->outPtr[0] =
     94               (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
     95 
     96             ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
     97 
     98             if (sl->outExts[ct]) {
     99                 mkinfo->outPtr[0] =
    100                   mkinfo->outPtr[0] +
    101                   sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
    102 
    103             } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
    104                 mkinfo->outPtr[0] =
    105                   mkinfo->outPtr[0] +
    106                   sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
    107             }
    108         } else {
    109             rsAssert(kinfo->outLen == 0);
    110 
    111             mkinfo->outPtr[0] = nullptr;
    112             ostep             = 0;
    113         }
    114 
    115         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
    116         func(kinfo, xstart, xend, ostep);
    117     }
    118     //ALOGE("script group root");
    119 
    120     mkinfo->inStride[0] = oldInStride;
    121     mkinfo->usr         = sl;
    122 }
    123 
    124 
    125 
    126 void CpuScriptGroupImpl::execute() {
    127     Vector<Allocation *> ins;
    128     Vector<bool> inExts;
    129     Vector<Allocation *> outs;
    130     Vector<bool> outExts;
    131     Vector<const ScriptKernelID *> kernels;
    132     bool fieldDep = false;
    133 
    134     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
    135         ScriptGroup::Node *n = mSG->mNodes[ct];
    136         Script *s = n->mKernels[0]->mScript;
    137         if (s->hasObjectSlots()) {
    138             // Disable the ScriptGroup optimization if we have global RS
    139             // objects that might interfere between kernels.
    140             fieldDep = true;
    141         }
    142 
    143         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
    144 
    145         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
    146             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
    147                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
    148                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
    149             }
    150         }
    151 
    152         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
    153             const ScriptKernelID *k = n->mKernels[ct2];
    154             Allocation *ain = nullptr;
    155             Allocation *aout = nullptr;
    156             bool inExt = false;
    157             bool outExt = false;
    158 
    159             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
    160                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
    161                     ain = n->mInputs[ct3]->mAlloc.get();
    162                     break;
    163                 }
    164             }
    165             if (ain == nullptr) {
    166                 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
    167                     if (mSG->mInputs[ct3]->mKernel == k) {
    168                         ain = mSG->mInputs[ct3]->mAlloc.get();
    169                         inExt = true;
    170                         break;
    171                     }
    172                 }
    173             }
    174 
    175             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
    176                 if (n->mOutputs[ct3]->mSource.get() == k) {
    177                     aout = n->mOutputs[ct3]->mAlloc.get();
    178                     if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
    179                         fieldDep = true;
    180                     }
    181                     break;
    182                 }
    183             }
    184             if (aout == nullptr) {
    185                 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
    186                     if (mSG->mOutputs[ct3]->mKernel == k) {
    187                         aout = mSG->mOutputs[ct3]->mAlloc.get();
    188                         outExt = true;
    189                         break;
    190                     }
    191                 }
    192             }
    193 
    194             rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
    195                      (k->mHasKernelInput == (ain != nullptr)));
    196 
    197             ins.add(ain);
    198             inExts.add(inExt);
    199             outs.add(aout);
    200             outExts.add(outExt);
    201             kernels.add(k);
    202         }
    203 
    204     }
    205 
    206     MTLaunchStructForEach mtls;
    207 
    208     if (fieldDep) {
    209         for (size_t ct=0; ct < ins.size(); ct++) {
    210             Script *s = kernels[ct]->mScript;
    211             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    212             uint32_t slot = kernels[ct]->mSlot;
    213 
    214             uint32_t inLen;
    215             const Allocation **ains;
    216 
    217             if (ins[ct] == nullptr) {
    218                 inLen = 0;
    219                 ains  = nullptr;
    220 
    221             } else {
    222                 inLen = 1;
    223                 ains  = const_cast<const Allocation**>(&ins[ct]);
    224             }
    225 
    226             bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
    227 
    228             si->forEachKernelSetup(slot, &mtls);
    229             si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
    230                           mtls.fep.usrLen, nullptr);
    231 
    232             if (launchOK) {
    233                 mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls);
    234             }
    235 
    236             si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
    237         }
    238     } else {
    239         ScriptList sl;
    240         sl.ins = ins.array();
    241         sl.outs = outs.array();
    242         sl.kernels = kernels.array();
    243         sl.count = kernels.size();
    244 
    245         uint32_t inLen;
    246         const Allocation **ains;
    247 
    248         if (ins[0] == nullptr) {
    249             inLen = 0;
    250             ains  = nullptr;
    251 
    252         } else {
    253             inLen = 1;
    254             ains  = const_cast<const Allocation**>(&ins[0]);
    255         }
    256 
    257         Vector<const void *> usrPtrs;
    258         Vector<const void *> fnPtrs;
    259         Vector<uint32_t> sigs;
    260         for (size_t ct=0; ct < kernels.size(); ct++) {
    261             Script *s = kernels[ct]->mScript;
    262             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    263 
    264             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
    265             fnPtrs.add((void *)mtls.kernel);
    266             usrPtrs.add(mtls.fep.usr);
    267             sigs.add(mtls.fep.usrLen);
    268             si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
    269                           mtls.fep.usr, mtls.fep.usrLen, nullptr);
    270         }
    271         sl.sigs = sigs.array();
    272         sl.usrPtrs = usrPtrs.array();
    273         sl.fnPtrs = fnPtrs.array();
    274         sl.inExts = inExts.array();
    275         sl.outExts = outExts.array();
    276 
    277         Script *s = kernels[0]->mScript;
    278         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    279 
    280         if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
    281 
    282             mtls.script = nullptr;
    283             mtls.kernel = &scriptGroupRoot;
    284             mtls.fep.usr = &sl;
    285 
    286             mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls);
    287         }
    288 
    289         for (size_t ct=0; ct < kernels.size(); ct++) {
    290             Script *s = kernels[ct]->mScript;
    291             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    292             si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
    293                            nullptr);
    294         }
    295     }
    296 }
    297