1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsCpuCore.h" 18 #include "rsCpuScript.h" 19 #include "rsScriptGroup.h" 20 #include "rsCpuScriptGroup.h" 21 22 #include <vector> 23 24 namespace android { 25 namespace renderscript { 26 27 CpuScriptGroupImpl::CpuScriptGroupImpl(RsdCpuReferenceImpl *ctx, const ScriptGroupBase *sg) { 28 mCtx = ctx; 29 mSG = (ScriptGroup*)sg; 30 } 31 32 CpuScriptGroupImpl::~CpuScriptGroupImpl() { 33 34 } 35 36 bool CpuScriptGroupImpl::init() { 37 return true; 38 } 39 40 void CpuScriptGroupImpl::setInput(const ScriptKernelID *kid, Allocation *a) { 41 } 42 43 void CpuScriptGroupImpl::setOutput(const ScriptKernelID *kid, Allocation *a) { 44 } 45 46 47 typedef void (*ScriptGroupRootFunc_t)(const RsExpandKernelDriverInfo *kinfo, 48 uint32_t xstart, uint32_t xend, 49 uint32_t outstep); 50 51 void CpuScriptGroupImpl::scriptGroupRoot(const RsExpandKernelDriverInfo *kinfo, 52 uint32_t xstart, uint32_t xend, 53 uint32_t outstep) { 54 55 56 const ScriptList *sl = (const ScriptList *)kinfo->usr; 57 RsExpandKernelDriverInfo *mkinfo = const_cast<RsExpandKernelDriverInfo *>(kinfo); 58 59 const uint32_t oldInStride = mkinfo->inStride[0]; 60 61 for (size_t ct = 0; ct < sl->count; ct++) { 62 ScriptGroupRootFunc_t func; 63 func = (ScriptGroupRootFunc_t)sl->fnPtrs[ct]; 64 mkinfo->usr = sl->usrPtrs[ct]; 65 66 if (sl->ins[ct]) { 67 rsAssert(kinfo->inLen == 1); 68 69 mkinfo->inPtr[0] = (const uint8_t *)sl->ins[ct]->mHal.drvState.lod[0].mallocPtr; 70 71 mkinfo->inStride[0] = sl->ins[ct]->mHal.state.elementSizeBytes; 72 73 if (sl->inExts[ct]) { 74 mkinfo->inPtr[0] = 75 (mkinfo->inPtr[0] + 76 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->current.y); 77 78 } else if (sl->ins[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) { 79 mkinfo->inPtr[0] = 80 (mkinfo->inPtr[0] + 81 sl->ins[ct]->mHal.drvState.lod[0].stride * kinfo->lid); 82 } 83 84 } else { 85 rsAssert(kinfo->inLen == 0); 86 87 mkinfo->inPtr[0] = nullptr; 88 mkinfo->inStride[0] = 0; 89 } 90 91 uint32_t ostep; 92 if (sl->outs[ct]) { 93 mkinfo->outPtr[0] = 94 (uint8_t *)sl->outs[ct]->mHal.drvState.lod[0].mallocPtr; 95 96 ostep = sl->outs[ct]->mHal.state.elementSizeBytes; 97 98 if (sl->outExts[ct]) { 99 mkinfo->outPtr[0] = 100 mkinfo->outPtr[0] + 101 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->current.y; 102 103 } else if (sl->outs[ct]->mHal.drvState.lod[0].dimY > kinfo->lid) { 104 mkinfo->outPtr[0] = 105 mkinfo->outPtr[0] + 106 sl->outs[ct]->mHal.drvState.lod[0].stride * kinfo->lid; 107 } 108 } else { 109 mkinfo->outPtr[0] = nullptr; 110 ostep = 0; 111 } 112 113 //ALOGE("kernel %i %p,%p %p,%p", ct, mp->ptrIn, mp->in, mp->ptrOut, mp->out); 114 func(kinfo, xstart, xend, ostep); 115 } 116 //ALOGE("script group root"); 117 118 mkinfo->inStride[0] = oldInStride; 119 mkinfo->usr = sl; 120 } 121 122 123 124 void CpuScriptGroupImpl::execute() { 125 std::vector<Allocation *> ins; 126 std::vector<uint8_t> inExts; 127 std::vector<Allocation *> outs; 128 std::vector<uint8_t> outExts; 129 std::vector<const ScriptKernelID *> kernels; 130 bool fieldDep = false; 131 132 for (size_t ct=0; ct < mSG->mNodes.size(); ct++) { 133 ScriptGroup::Node *n = mSG->mNodes[ct]; 134 Script *s = n->mKernels[0]->mScript; 135 if (s->hasObjectSlots()) { 136 // Disable the ScriptGroup optimization if we have global RS 137 // objects that might interfere between kernels. 138 fieldDep = true; 139 } 140 141 //ALOGE("node %i, order %i, in %i out %i", (int)ct, n->mOrder, (int)n->mInputs.size(), (int)n->mOutputs.size()); 142 143 for (size_t ct2=0; ct2 < n->mInputs.size(); ct2++) { 144 if (n->mInputs[ct2]->mDstField.get() && n->mInputs[ct2]->mDstField->mScript) { 145 //ALOGE("field %p %zu", n->mInputs[ct2]->mDstField->mScript, n->mInputs[ct2]->mDstField->mSlot); 146 s->setVarObj(n->mInputs[ct2]->mDstField->mSlot, n->mInputs[ct2]->mAlloc.get()); 147 } 148 } 149 150 for (size_t ct2=0; ct2 < n->mKernels.size(); ct2++) { 151 const ScriptKernelID *k = n->mKernels[ct2]; 152 Allocation *ain = nullptr; 153 Allocation *aout = nullptr; 154 bool inExt = false; 155 bool outExt = false; 156 157 for (size_t ct3=0; ct3 < n->mInputs.size(); ct3++) { 158 if (n->mInputs[ct3]->mDstKernel.get() == k) { 159 ain = n->mInputs[ct3]->mAlloc.get(); 160 break; 161 } 162 } 163 if (ain == nullptr) { 164 for (size_t ct3=0; ct3 < mSG->mInputs.size(); ct3++) { 165 if (mSG->mInputs[ct3]->mKernel == k) { 166 ain = mSG->mInputs[ct3]->mAlloc.get(); 167 inExt = true; 168 break; 169 } 170 } 171 } 172 173 for (size_t ct3=0; ct3 < n->mOutputs.size(); ct3++) { 174 if (n->mOutputs[ct3]->mSource.get() == k) { 175 aout = n->mOutputs[ct3]->mAlloc.get(); 176 if(n->mOutputs[ct3]->mDstField.get() != nullptr) { 177 fieldDep = true; 178 } 179 break; 180 } 181 } 182 if (aout == nullptr) { 183 for (size_t ct3=0; ct3 < mSG->mOutputs.size(); ct3++) { 184 if (mSG->mOutputs[ct3]->mKernel == k) { 185 aout = mSG->mOutputs[ct3]->mAlloc.get(); 186 outExt = true; 187 break; 188 } 189 } 190 } 191 192 rsAssert((k->mHasKernelOutput == (aout != nullptr)) && 193 (k->mHasKernelInput == (ain != nullptr))); 194 195 ins.push_back(ain); 196 inExts.push_back(inExt); 197 outs.push_back(aout); 198 outExts.push_back(outExt); 199 kernels.push_back(k); 200 } 201 202 } 203 204 MTLaunchStructForEach mtls; 205 206 if (fieldDep) { 207 for (size_t ct=0; ct < ins.size(); ct++) { 208 Script *s = kernels[ct]->mScript; 209 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 210 uint32_t slot = kernels[ct]->mSlot; 211 212 uint32_t inLen; 213 const Allocation **ains; 214 215 if (ins[ct] == nullptr) { 216 inLen = 0; 217 ains = nullptr; 218 219 } else { 220 inLen = 1; 221 ains = const_cast<const Allocation**>(&ins[ct]); 222 } 223 224 bool launchOK = si->forEachMtlsSetup(ains, inLen, outs[ct], nullptr, 0, nullptr, &mtls); 225 226 si->forEachKernelSetup(slot, &mtls); 227 si->preLaunch(slot, ains, inLen, outs[ct], mtls.fep.usr, 228 mtls.fep.usrLen, nullptr); 229 230 if (launchOK) { 231 mCtx->launchForEach(ains, inLen, outs[ct], nullptr, &mtls); 232 } 233 234 si->postLaunch(slot, ains, inLen, outs[ct], nullptr, 0, nullptr); 235 } 236 } else { 237 ScriptList sl; 238 sl.ins = ins.data(); 239 sl.outs = outs.data(); 240 sl.kernels = kernels.data(); 241 sl.count = kernels.size(); 242 243 uint32_t inLen; 244 const Allocation **ains; 245 246 if (ins[0] == nullptr) { 247 inLen = 0; 248 ains = nullptr; 249 250 } else { 251 inLen = 1; 252 ains = const_cast<const Allocation**>(&ins[0]); 253 } 254 255 std::vector<const void *> usrPtrs; 256 std::vector<const void *> fnPtrs; 257 std::vector<uint32_t> sigs; 258 for (size_t ct=0; ct < kernels.size(); ct++) { 259 Script *s = kernels[ct]->mScript; 260 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 261 262 si->forEachKernelSetup(kernels[ct]->mSlot, &mtls); 263 fnPtrs.push_back((void *)mtls.kernel); 264 usrPtrs.push_back(mtls.fep.usr); 265 sigs.push_back(mtls.fep.usrLen); 266 si->preLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], 267 mtls.fep.usr, mtls.fep.usrLen, nullptr); 268 } 269 sl.sigs = sigs.data(); 270 sl.usrPtrs = usrPtrs.data(); 271 sl.fnPtrs = fnPtrs.data(); 272 sl.inExts = inExts.data(); 273 sl.outExts = outExts.data(); 274 275 Script *s = kernels[0]->mScript; 276 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 277 278 if (si->forEachMtlsSetup(ains, inLen, outs[0], nullptr, 0, nullptr, &mtls)) { 279 280 mtls.script = nullptr; 281 mtls.kernel = &scriptGroupRoot; 282 mtls.fep.usr = &sl; 283 284 mCtx->launchForEach(ains, inLen, outs[0], nullptr, &mtls); 285 } 286 287 for (size_t ct=0; ct < kernels.size(); ct++) { 288 Script *s = kernels[ct]->mScript; 289 RsdCpuScriptImpl *si = (RsdCpuScriptImpl *)mCtx->lookupScript(s); 290 si->postLaunch(kernels[ct]->mSlot, ains, inLen, outs[ct], nullptr, 0, 291 nullptr); 292 } 293 } 294 } 295 296 } // namespace renderscript 297 } // namespace android 298