Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2011 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuCore.h"
     18 #include "rsCpuScript.h"
     19 #include "rsScriptGroup.h"
     20 #include "rsCpuScriptGroup.h"
     21 
     22 #include <vector>
     23 
     24 namespace android {
     25 namespace renderscript {
     26 
     27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) {
     28     mCtx = ctx;
     29     mSG = (ScriptGroup*)sg;
     30 }
     31 
     32 CpuScriptGroupImpl::~CpuScriptGroupImpl() {
     33 
     34 }
     35 
     36 bool CpuScriptGroupImpl::init() {
     37     return true;
     38 }
     39 
     40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) {
     41 }
     42 
     43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) {
     44 }
     45 
     46 
     47 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo,
     48                                       uint32_t xstart, uint32_t xend,
     49                                       uint32_t outstep);
     50 
     51 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo,
     52                                          uint32_t xstart, uint32_t xend,
     53                                          uint32_t outstep) {
     54 
     55 
     56     const ScriptList *sl             = (const ScriptList *)kinfo->usr;
     57     RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo);
     58 
     59     const uint32_t oldInStride = mkinfo->inStride[0];
     60 
     61     for (size_t ct = 0; ct < sl->count; ct++) {
     62         ScriptGroupRootFunc_t func;
     63         func          = (ScriptGroupRootFunc_t)sl->fnPtrs[ct];
     64         mkinfo->usr   = sl->usrPtrs[ct];
     65 
     66         if (sl->ins[ct]) {
     67             rsAssert(kinfo->inLen == 1);
     68 
     69             mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr;
     70 
     71             mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes;
     72 
     73             if (sl->inExts[ct]) {
     74                 mkinfo->inPtr[0] =
     75                   (mkinfo->inPtr[0] +
     76                    sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y);
     77 
     78             } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
     79                 mkinfo->inPtr[0] =
     80                   (mkinfo->inPtr[0] +
     81                    sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid);
     82             }
     83 
     84         } else {
     85             rsAssert(kinfo->inLen == 0);
     86 
     87             mkinfo->inPtr[0]     = nullptr;
     88             mkinfo->inStride[0]  = 0;
     89         }
     90 
     91         uint32_t ostep;
     92         if (sl->outs[ct]) {
     93             mkinfo->outPtr[0] =
     94               (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr;
     95 
     96             ostep = sl->outs[ct]->mHal.state.elementSizeBytes;
     97 
     98             if (sl->outExts[ct]) {
     99                 mkinfo->outPtr[0] =
    100                   mkinfo->outPtr[0] +
    101                   sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y;
    102 
    103             } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) {
    104                 mkinfo->outPtr[0] =
    105                   mkinfo->outPtr[0] +
    106                   sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid;
    107             }
    108         } else {
    109             mkinfo->outPtr[0] = nullptr;
    110             ostep             = 0;
    111         }
    112 
    113         //ALOGE("kernel %i %p,%p  %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out);
    114         func(kinfo, xstart, xend, ostep);
    115     }
    116     //ALOGE("script group root");
    117 
    118     mkinfo->inStride[0] = oldInStride;
    119     mkinfo->usr         = sl;
    120 }
    121 
    122 
    123 
    124 void CpuScriptGroupImpl::execute() {
    125     std::vector<Allocation *> ins;
    126     std::vector<uint8_t> inExts;
    127     std::vector<Allocation *> outs;
    128     std::vector<uint8_t> outExts;
    129     std::vector<const ScriptKernelID *> kernels;
    130     bool fieldDep = false;
    131 
    132     for (size_t ct=0; ct < mSG->mNodes.size(); ct++) {
    133         ScriptGroup::Node *n = mSG->mNodes[ct];
    134         Script *s = n->mKernels[0]->mScript;
    135         if (s->hasObjectSlots()) {
    136             // Disable the ScriptGroup optimization if we have global RS
    137             // objects that might interfere between kernels.
    138             fieldDep = true;
    139         }
    140 
    141         //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size());
    142 
    143         for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) {
    144             if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) {
    145                 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot);
    146                 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get());
    147             }
    148         }
    149 
    150         for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) {
    151             const ScriptKernelID *k = n->mKernels[ct2];
    152             Allocation *ain = nullptr;
    153             Allocation *aout = nullptr;
    154             bool inExt = false;
    155             bool outExt = false;
    156 
    157             for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) {
    158                 if (n->mInputs[ct3]->mDstKernel.get() == k) {
    159                     ain = n->mInputs[ct3]->mAlloc.get();
    160                     break;
    161                 }
    162             }
    163             if (ain == nullptr) {
    164                 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) {
    165                     if (mSG->mInputs[ct3]->mKernel == k) {
    166                         ain = mSG->mInputs[ct3]->mAlloc.get();
    167                         inExt = true;
    168                         break;
    169                     }
    170                 }
    171             }
    172 
    173             for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) {
    174                 if (n->mOutputs[ct3]->mSource.get() == k) {
    175                     aout = n->mOutputs[ct3]->mAlloc.get();
    176                     if(n->mOutputs[ct3]->mDstField.get() != nullptr) {
    177                         fieldDep = true;
    178                     }
    179                     break;
    180                 }
    181             }
    182             if (aout == nullptr) {
    183                 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) {
    184                     if (mSG->mOutputs[ct3]->mKernel == k) {
    185                         aout = mSG->mOutputs[ct3]->mAlloc.get();
    186                         outExt = true;
    187                         break;
    188                     }
    189                 }
    190             }
    191 
    192             rsAssert((k->mHasKernelOutput == (aout != nullptr)) &&
    193                      (k->mHasKernelInput == (ain != nullptr)));
    194 
    195             ins.push_back(ain);
    196             inExts.push_back(inExt);
    197             outs.push_back(aout);
    198             outExts.push_back(outExt);
    199             kernels.push_back(k);
    200         }
    201 
    202     }
    203 
    204     MTLaunchStructForEach mtls;
    205 
    206     if (fieldDep) {
    207         for (size_t ct=0; ct < ins.size(); ct++) {
    208             Script *s = kernels[ct]->mScript;
    209             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    210             uint32_t slot = kernels[ct]->mSlot;
    211 
    212             uint32_t inLen;
    213             const Allocation **ains;
    214 
    215             if (ins[ct] == nullptr) {
    216                 inLen = 0;
    217                 ains  = nullptr;
    218 
    219             } else {
    220                 inLen = 1;
    221                 ains  = const_cast<const Allocation**>(&ins[ct]);
    222             }
    223 
    224             bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls);
    225 
    226             si->forEachKernelSetup(slot, &mtls);
    227             si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr,
    228                           mtls.fep.usrLen, nullptr);
    229 
    230             if (launchOK) {
    231                 mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls);
    232             }
    233 
    234             si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr);
    235         }
    236     } else {
    237         ScriptList sl;
    238         sl.ins = ins.data();
    239         sl.outs = outs.data();
    240         sl.kernels = kernels.data();
    241         sl.count = kernels.size();
    242 
    243         uint32_t inLen;
    244         const Allocation **ains;
    245 
    246         if (ins[0] == nullptr) {
    247             inLen = 0;
    248             ains  = nullptr;
    249 
    250         } else {
    251             inLen = 1;
    252             ains  = const_cast<const Allocation**>(&ins[0]);
    253         }
    254 
    255         std::vector<const void *> usrPtrs;
    256         std::vector<const void *> fnPtrs;
    257         std::vector<uint32_t> sigs;
    258         for (size_t ct=0; ct < kernels.size(); ct++) {
    259             Script *s = kernels[ct]->mScript;
    260             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    261 
    262             si->forEachKernelSetup(kernels[ct]->mSlot, &mtls);
    263             fnPtrs.push_back((void *)mtls.kernel);
    264             usrPtrs.push_back(mtls.fep.usr);
    265             sigs.push_back(mtls.fep.usrLen);
    266             si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct],
    267                           mtls.fep.usr, mtls.fep.usrLen, nullptr);
    268         }
    269         sl.sigs = sigs.data();
    270         sl.usrPtrs = usrPtrs.data();
    271         sl.fnPtrs = fnPtrs.data();
    272         sl.inExts = inExts.data();
    273         sl.outExts = outExts.data();
    274 
    275         Script *s = kernels[0]->mScript;
    276         RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    277 
    278         if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) {
    279 
    280             mtls.script = nullptr;
    281             mtls.kernel = &scriptGroupRoot;
    282             mtls.fep.usr = &sl;
    283 
    284             mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls);
    285         }
    286 
    287         for (size_t ct=0; ct < kernels.size(); ct++) {
    288             Script *s = kernels[ct]->mScript;
    289             RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s);
    290             si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0,
    291                            nullptr);
    292         }
    293     }
    294 }
    295 
    296 } // namespace renderscript
    297 } // namespace android
    298