1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsCpuCore.h" 18 #include "rsCpuScript.h" 19 #include "rsScriptGroup.h" 20 #include "rsCpuScriptGroup.h" 21 //#include "rsdBcc.h" 22 //#include "rsdAllocation.h" 23 24 using namespace android; 25 using namespace android::renderscript; 26 27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) { 28 mCtx = ctx; 29 mSG = sg; 30 } 31 32 CpuScriptGroupImpl::~CpuScriptGroupImpl() { 33 34 } 35 36 bool CpuScriptGroupImpl::init() { 37 return true; 38 } 39 40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { 41 } 42 43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { 44 } 45 46 47 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p, 48 uint32_t xstart, uint32_t xend, 49 uint32_t instep, uint32_t outstep); 50 51 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p, 52 uint32_t xstart, uint32_t xend, 53 uint32_t instep, uint32_t outstep) { 54 55 56 const ScriptList *sl = (const ScriptList *)p->usr; 57 RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p; 58 const void *oldUsr = p->usr; 59 60 for(size_t ct=0; ct < sl->count; ct++) { 61 ScriptGroupRootFunc_t func; 62 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; 63 mp->usr = sl->usrPtrs[ct]; 64 65 mp->ptrIn = NULL; 66 mp->in = NULL; 67 mp->ptrOut = NULL; 68 mp->out = NULL; 69 70 if (sl->ins[ct]) { 71 mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; 72 mp->in = mp->ptrIn; 73 if (sl->inExts[ct]) { 74 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y; 75 } else { 76 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) { 77 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid; 78 } 79 } 80 } 81 82 if (sl->outs[ct]) { 83 mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; 84 mp->out = mp->ptrOut; 85 if (sl->outExts[ct]) { 86 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y; 87 } else { 88 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) { 89 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid; 90 } 91 } 92 } 93 94 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); 95 func(p, xstart, xend, instep, outstep); 96 } 97 //ALOGE("script group root"); 98 99 //ConvolveParams *cp = (ConvolveParams *)p->usr; 100 101 mp->usr = oldUsr; 102 } 103 104 105 106 void CpuScriptGroupImpl::execute() { 107 Vector<Allocation *> ins; 108 Vector<bool> inExts; 109 Vector<Allocation *> outs; 110 Vector<bool> outExts; 111 Vector<const ScriptKernelID *> kernels; 112 bool fieldDep = false; 113 114 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { 115 ScriptGroup::Node *n = mSG->mNodes[ct]; 116 Script *s = n->mKernels[0]->mScript; 117 118 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); 119 120 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { 121 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { 122 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); 123 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); 124 } 125 } 126 127 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { 128 const ScriptKernelID *k = n->mKernels[ct2]; 129 Allocation *ain = NULL; 130 Allocation *aout = NULL; 131 bool inExt = false; 132 bool outExt = false; 133 134 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { 135 if (n->mInputs[ct3]->mDstKernel.get() == k) { 136 ain = n->mInputs[ct3]->mAlloc.get(); 137 //ALOGE(" link in %p", ain); 138 } 139 } 140 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { 141 if (mSG->mInputs[ct3]->mKernel == k) { 142 ain = mSG->mInputs[ct3]->mAlloc.get(); 143 inExt = true; 144 //ALOGE(" io in %p", ain); 145 } 146 } 147 148 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { 149 if (n->mOutputs[ct3]->mSource.get() == k) { 150 aout = n->mOutputs[ct3]->mAlloc.get(); 151 if(n->mOutputs[ct3]->mDstField.get() != NULL) { 152 fieldDep = true; 153 } 154 //ALOGE(" link out %p", aout); 155 } 156 } 157 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { 158 if (mSG->mOutputs[ct3]->mKernel == k) { 159 aout = mSG->mOutputs[ct3]->mAlloc.get(); 160 outExt = true; 161 //ALOGE(" io out %p", aout); 162 } 163 } 164 165 if ((k->mHasKernelOutput == (aout != NULL)) && 166 (k->mHasKernelInput == (ain != NULL))) { 167 ins.add(ain); 168 inExts.add(inExt); 169 outs.add(aout); 170 outExts.add(outExt); 171 kernels.add(k); 172 } 173 } 174 175 } 176 177 MTLaunchStruct mtls; 178 179 if(fieldDep) { 180 for (size_t ct=0; ct < ins.size(); ct++) { 181 Script *s = kernels[ct]->mScript; 182 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 183 uint32_t slot = kernels[ct]->mSlot; 184 185 si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls); 186 si->forEachKernelSetup(slot, &mtls); 187 mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls); 188 } 189 } else { 190 ScriptList sl; 191 sl.ins = ins.array(); 192 sl.outs = outs.array(); 193 sl.kernels = kernels.array(); 194 sl.count = kernels.size(); 195 196 Vector<const void *> usrPtrs; 197 Vector<const void *> fnPtrs; 198 Vector<uint32_t> sigs; 199 for (size_t ct=0; ct < kernels.size(); ct++) { 200 Script *s = kernels[ct]->mScript; 201 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 202 203 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); 204 fnPtrs.add((void *)mtls.kernel); 205 usrPtrs.add(mtls.fep.usr); 206 sigs.add(mtls.fep.usrLen); 207 } 208 sl.sigs = sigs.array(); 209 sl.usrPtrs = usrPtrs.array(); 210 sl.fnPtrs = fnPtrs.array(); 211 sl.inExts = inExts.array(); 212 sl.outExts = outExts.array(); 213 214 Script *s = kernels[0]->mScript; 215 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 216 si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls); 217 mtls.script = NULL; 218 mtls.kernel = (void (*)())&scriptGroupRoot; 219 mtls.fep.usr = &sl; 220 mCtx->launchThreads(ins[0], outs[0], NULL, &mtls); 221 } 222 } 223 224 225