1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsCpuCore.h" 18 #include "rsCpuScript.h" 19 #include "rsScriptGroup.h" 20 #include "rsCpuScriptGroup.h" 21 22 using namespace android; 23 using namespace android::renderscript; 24 25 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) { 26 mCtx = ctx; 27 mSG = (ScriptGroup*)sg; 28 } 29 30 CpuScriptGroupImpl::~CpuScriptGroupImpl() { 31 32 } 33 34 bool CpuScriptGroupImpl::init() { 35 return true; 36 } 37 38 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { 39 } 40 41 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { 42 } 43 44 45 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo, 46 uint32_t xstart, uint32_t xend, 47 uint32_t outstep); 48 49 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo, 50 uint32_t xstart, uint32_t xend, 51 uint32_t outstep) { 52 53 54 const ScriptList *sl = (const ScriptList *)kinfo->usr; 55 RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 56 57 const uint32_t oldInStride = mkinfo->inStride[0]; 58 59 for (size_t ct = 0; ct < sl->count; ct++) { 60 ScriptGroupRootFunc_t func; 61 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; 62 mkinfo->usr = sl->usrPtrs[ct]; 63 64 if (sl->ins[ct]) { 65 rsAssert(kinfo->inLen == 1); 66 67 mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; 68 69 mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes; 70 71 if (sl->inExts[ct]) { 72 mkinfo->inPtr[0] = 73 (mkinfo->inPtr[0] + 74 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y); 75 76 } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) { 77 mkinfo->inPtr[0] = 78 (mkinfo->inPtr[0] + 79 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid); 80 } 81 82 } else { 83 rsAssert(kinfo->inLen == 0); 84 85 mkinfo->inPtr[0] = nullptr; 86 mkinfo->inStride[0] = 0; 87 } 88 89 uint32_t ostep; 90 if (sl->outs[ct]) { 91 rsAssert(kinfo->outLen == 1); 92 93 mkinfo->outPtr[0] = 94 (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; 95 96 ostep = sl->outs[ct]->mHal.state.elementSizeBytes; 97 98 if (sl->outExts[ct]) { 99 mkinfo->outPtr[0] = 100 mkinfo->outPtr[0] + 101 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y; 102 103 } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) { 104 mkinfo->outPtr[0] = 105 mkinfo->outPtr[0] + 106 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid; 107 } 108 } else { 109 rsAssert(kinfo->outLen == 0); 110 111 mkinfo->outPtr[0] = nullptr; 112 ostep = 0; 113 } 114 115 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); 116 func(kinfo, xstart, xend, ostep); 117 } 118 //ALOGE("script group root"); 119 120 mkinfo->inStride[0] = oldInStride; 121 mkinfo->usr = sl; 122 } 123 124 125 126 void CpuScriptGroupImpl::execute() { 127 Vector<Allocation *> ins; 128 Vector<bool> inExts; 129 Vector<Allocation *> outs; 130 Vector<bool> outExts; 131 Vector<const ScriptKernelID *> kernels; 132 bool fieldDep = false; 133 134 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { 135 ScriptGroup::Node *n = mSG->mNodes[ct]; 136 Script *s = n->mKernels[0]->mScript; 137 if (s->hasObjectSlots()) { 138 // Disable the ScriptGroup optimization if we have global RS 139 // objects that might interfere between kernels. 140 fieldDep = true; 141 } 142 143 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); 144 145 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { 146 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { 147 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); 148 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); 149 } 150 } 151 152 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { 153 const ScriptKernelID *k = n->mKernels[ct2]; 154 Allocation *ain = nullptr; 155 Allocation *aout = nullptr; 156 bool inExt = false; 157 bool outExt = false; 158 159 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { 160 if (n->mInputs[ct3]->mDstKernel.get() == k) { 161 ain = n->mInputs[ct3]->mAlloc.get(); 162 break; 163 } 164 } 165 if (ain == nullptr) { 166 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { 167 if (mSG->mInputs[ct3]->mKernel == k) { 168 ain = mSG->mInputs[ct3]->mAlloc.get(); 169 inExt = true; 170 break; 171 } 172 } 173 } 174 175 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { 176 if (n->mOutputs[ct3]->mSource.get() == k) { 177 aout = n->mOutputs[ct3]->mAlloc.get(); 178 if(n->mOutputs[ct3]->mDstField.get() != nullptr) { 179 fieldDep = true; 180 } 181 break; 182 } 183 } 184 if (aout == nullptr) { 185 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { 186 if (mSG->mOutputs[ct3]->mKernel == k) { 187 aout = mSG->mOutputs[ct3]->mAlloc.get(); 188 outExt = true; 189 break; 190 } 191 } 192 } 193 194 rsAssert((k->mHasKernelOutput == (aout != nullptr)) && 195 (k->mHasKernelInput == (ain != nullptr))); 196 197 ins.add(ain); 198 inExts.add(inExt); 199 outs.add(aout); 200 outExts.add(outExt); 201 kernels.add(k); 202 } 203 204 } 205 206 MTLaunchStructForEach mtls; 207 208 if (fieldDep) { 209 for (size_t ct=0; ct < ins.size(); ct++) { 210 Script *s = kernels[ct]->mScript; 211 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 212 uint32_t slot = kernels[ct]->mSlot; 213 214 uint32_t inLen; 215 const Allocation **ains; 216 217 if (ins[ct] == nullptr) { 218 inLen = 0; 219 ains = nullptr; 220 221 } else { 222 inLen = 1; 223 ains = const_cast<const Allocation**>(&ins[ct]); 224 } 225 226 bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls); 227 228 si->forEachKernelSetup(slot, &mtls); 229 si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr, 230 mtls.fep.usrLen, nullptr); 231 232 if (launchOK) { 233 mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls); 234 } 235 236 si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr); 237 } 238 } else { 239 ScriptList sl; 240 sl.ins = ins.array(); 241 sl.outs = outs.array(); 242 sl.kernels = kernels.array(); 243 sl.count = kernels.size(); 244 245 uint32_t inLen; 246 const Allocation **ains; 247 248 if (ins[0] == nullptr) { 249 inLen = 0; 250 ains = nullptr; 251 252 } else { 253 inLen = 1; 254 ains = const_cast<const Allocation**>(&ins[0]); 255 } 256 257 Vector<const void *> usrPtrs; 258 Vector<const void *> fnPtrs; 259 Vector<uint32_t> sigs; 260 for (size_t ct=0; ct < kernels.size(); ct++) { 261 Script *s = kernels[ct]->mScript; 262 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 263 264 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); 265 fnPtrs.add((void *)mtls.kernel); 266 usrPtrs.add(mtls.fep.usr); 267 sigs.add(mtls.fep.usrLen); 268 si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], 269 mtls.fep.usr, mtls.fep.usrLen, nullptr); 270 } 271 sl.sigs = sigs.array(); 272 sl.usrPtrs = usrPtrs.array(); 273 sl.fnPtrs = fnPtrs.array(); 274 sl.inExts = inExts.array(); 275 sl.outExts = outExts.array(); 276 277 Script *s = kernels[0]->mScript; 278 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 279 280 if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) { 281 282 mtls.script = nullptr; 283 mtls.kernel = &scriptGroupRoot; 284 mtls.fep.usr = &sl; 285 286 mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls); 287 } 288 289 for (size_t ct=0; ct < kernels.size(); ct++) { 290 Script *s = kernels[ct]->mScript; 291 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 292 si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0, 293 nullptr); 294 } 295 } 296 } 297