1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsCpuCore.h" 18 #include "rsCpuScript.h" 19 #include "rsScriptGroup.h" 20 #include "rsCpuScriptGroup.h" 21 //#include "rsdBcc.h" 22 //#include "rsdAllocation.h" 23 24 using namespace android; 25 using namespace android::renderscript; 26 27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroup *sg) { 28 mCtx = ctx; 29 mSG = sg; 30 } 31 32 CpuScriptGroupImpl::~CpuScriptGroupImpl() { 33 34 } 35 36 bool CpuScriptGroupImpl::init() { 37 return true; 38 } 39 40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { 41 } 42 43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { 44 } 45 46 47 typedef void (*ScriptGroupRootFunc_t)(const RsForEachStubParamStruct *p, 48 uint32_t xstart, uint32_t xend, 49 uint32_t instep, uint32_t outstep); 50 51 void CpuScriptGroupImpl::scriptGroupRoot(const RsForEachStubParamStruct *p, 52 uint32_t xstart, uint32_t xend, 53 uint32_t instep, uint32_t outstep) { 54 55 56 const ScriptList *sl = (const ScriptList *)p->usr; 57 RsForEachStubParamStruct *mp = (RsForEachStubParamStruct *)p; 58 const void *oldUsr = p->usr; 59 60 for(size_t ct=0; ct < sl->count; ct++) { 61 ScriptGroupRootFunc_t func; 62 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; 63 mp->usr = sl->usrPtrs[ct]; 64 65 mp->ptrIn = NULL; 66 mp->in = NULL; 67 mp->ptrOut = NULL; 68 mp->out = NULL; 69 70 uint32_t istep = 0; 71 uint32_t ostep = 0; 72 73 if (sl->ins[ct]) { 74 mp->ptrIn = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; 75 istep = sl->ins[ct]->mHal.state.elementSizeBytes; 76 mp->in = mp->ptrIn; 77 if (sl->inExts[ct]) { 78 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->y; 79 } else { 80 if (sl->ins[ct]->mHal.drvState.lod[0].dimY > p->lid) { 81 mp->in = mp->ptrIn + sl->ins[ct]->mHal.drvState.lod[0].stride * p->lid; 82 } 83 } 84 } 85 86 if (sl->outs[ct]) { 87 mp->ptrOut = (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; 88 mp->out = mp->ptrOut; 89 ostep = sl->outs[ct]->mHal.state.elementSizeBytes; 90 if (sl->outExts[ct]) { 91 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->y; 92 } else { 93 if (sl->outs[ct]->mHal.drvState.lod[0].dimY > p->lid) { 94 mp->out = mp->ptrOut + sl->outs[ct]->mHal.drvState.lod[0].stride * p->lid; 95 } 96 } 97 } 98 99 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); 100 func(p, xstart, xend, istep, ostep); 101 } 102 //ALOGE("script group root"); 103 104 //ConvolveParams *cp = (ConvolveParams *)p->usr; 105 106 mp->usr = oldUsr; 107 } 108 109 110 111 void CpuScriptGroupImpl::execute() { 112 Vector<Allocation *> ins; 113 Vector<bool> inExts; 114 Vector<Allocation *> outs; 115 Vector<bool> outExts; 116 Vector<const ScriptKernelID *> kernels; 117 bool fieldDep = false; 118 119 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { 120 ScriptGroup::Node *n = mSG->mNodes[ct]; 121 Script *s = n->mKernels[0]->mScript; 122 if (s->hasObjectSlots()) { 123 // Disable the ScriptGroup optimization if we have global RS 124 // objects that might interfere between kernels. 125 fieldDep = true; 126 } 127 128 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); 129 130 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { 131 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { 132 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); 133 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); 134 } 135 } 136 137 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { 138 const ScriptKernelID *k = n->mKernels[ct2]; 139 Allocation *ain = NULL; 140 Allocation *aout = NULL; 141 bool inExt = false; 142 bool outExt = false; 143 144 if (k->mScript->hasObjectSlots()) { 145 // Disable the ScriptGroup optimization if we have global RS 146 // objects that might interfere between kernels. 147 fieldDep = true; 148 } 149 150 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { 151 if (n->mInputs[ct3]->mDstKernel.get() == k) { 152 ain = n->mInputs[ct3]->mAlloc.get(); 153 //ALOGE(" link in %p", ain); 154 } 155 } 156 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { 157 if (mSG->mInputs[ct3]->mKernel == k) { 158 ain = mSG->mInputs[ct3]->mAlloc.get(); 159 inExt = true; 160 //ALOGE(" io in %p", ain); 161 } 162 } 163 164 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { 165 if (n->mOutputs[ct3]->mSource.get() == k) { 166 aout = n->mOutputs[ct3]->mAlloc.get(); 167 if(n->mOutputs[ct3]->mDstField.get() != NULL) { 168 fieldDep = true; 169 } 170 //ALOGE(" link out %p", aout); 171 } 172 } 173 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { 174 if (mSG->mOutputs[ct3]->mKernel == k) { 175 aout = mSG->mOutputs[ct3]->mAlloc.get(); 176 outExt = true; 177 //ALOGE(" io out %p", aout); 178 } 179 } 180 181 if ((k->mHasKernelOutput == (aout != NULL)) && 182 (k->mHasKernelInput == (ain != NULL))) { 183 ins.add(ain); 184 inExts.add(inExt); 185 outs.add(aout); 186 outExts.add(outExt); 187 kernels.add(k); 188 } 189 } 190 191 } 192 193 MTLaunchStruct mtls; 194 195 if(fieldDep) { 196 for (size_t ct=0; ct < ins.size(); ct++) { 197 Script *s = kernels[ct]->mScript; 198 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 199 uint32_t slot = kernels[ct]->mSlot; 200 201 si->forEachMtlsSetup(ins[ct], outs[ct], NULL, 0, NULL, &mtls); 202 si->forEachKernelSetup(slot, &mtls); 203 si->preLaunch(slot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); 204 mCtx->launchThreads(ins[ct], outs[ct], NULL, &mtls); 205 si->postLaunch(slot, ins[ct], outs[ct], NULL, 0, NULL); 206 } 207 } else { 208 ScriptList sl; 209 sl.ins = ins.array(); 210 sl.outs = outs.array(); 211 sl.kernels = kernels.array(); 212 sl.count = kernels.size(); 213 214 Vector<const void *> usrPtrs; 215 Vector<const void *> fnPtrs; 216 Vector<uint32_t> sigs; 217 for (size_t ct=0; ct < kernels.size(); ct++) { 218 Script *s = kernels[ct]->mScript; 219 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 220 221 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); 222 fnPtrs.add((void *)mtls.kernel); 223 usrPtrs.add(mtls.fep.usr); 224 sigs.add(mtls.fep.usrLen); 225 si->preLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], mtls.fep.usr, mtls.fep.usrLen, NULL); 226 } 227 sl.sigs = sigs.array(); 228 sl.usrPtrs = usrPtrs.array(); 229 sl.fnPtrs = fnPtrs.array(); 230 sl.inExts = inExts.array(); 231 sl.outExts = outExts.array(); 232 233 Script *s = kernels[0]->mScript; 234 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 235 si->forEachMtlsSetup(ins[0], outs[0], NULL, 0, NULL, &mtls); 236 mtls.script = NULL; 237 mtls.kernel = (void (*)())&scriptGroupRoot; 238 mtls.fep.usr = &sl; 239 mCtx->launchThreads(ins[0], outs[0], NULL, &mtls); 240 241 for (size_t ct=0; ct < kernels.size(); ct++) { 242 Script *s = kernels[ct]->mScript; 243 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 244 si->postLaunch(kernels[ct]->mSlot, ins[ct], outs[ct], NULL, 0, NULL); 245 } 246 } 247 } 248 249 250