Home | History | Annotate | Download | only in cpu_ref

Lines Matching full:mtls

161     MTLaunchStruct *mtls = (MTLaunchStruct *)data;
162 if (mtls && mtls->fep.dimY <= 1 && mtls->xEnd <= mtls->xStart + mtls->mSliceSize) {
304 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
306 memcpy(&p, &mtls->fep, sizeof(p));
308 uint32_t sig = mtls->sig;
310 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
312 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
313 uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
314 uint32_t yEnd = yStart + mtls->mSliceSize;
315 yEnd = rsMin(yEnd, mtls->yEnd);
320 //ALOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
321 //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
324 p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * p.y) +
325 (mtls->fep.eStrideOut * mtls->xStart);
326 p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * p.y) +
327 (mtls->fep.eStrideIn * mtls->xStart);
328 fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
334 MTLaunchStruct *mtls = (MTLaunchStruct *)usr;
336 memcpy(&p, &mtls->fep, sizeof(p));
338 uint32_t sig = mtls->sig;
340 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
342 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1);
343 uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
344 uint32_t xEnd = xStart + mtls->mSliceSize;
345 xEnd = rsMin(xEnd, mtls->xEnd);
351 //ALOGE("usr ptr in %p, out %p", mtls->fep.ptrIn, mtls->fep.ptrOut);
353 p.out = mtls->fep.ptrOut + (mtls->fep.eStrideOut * xStart);
354 p.in = mtls->fep.ptrIn + (mtls->fep.eStrideIn * xStart);
355 fn(&p, xStart, xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);
360 const RsScriptCall *sc, MTLaunchStruct *mtls) {
364 if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) {
367 if (mtls->fep.dimY > 1) {
368 uint32_t s1 = mtls->fep.dimY / ((mWorkers.mCount + 1) * 4);
373 if (mtls->fep.yStrideOut) {
374 s2 = targetByteChunk / mtls->fep.yStrideOut;
376 s2 = targetByteChunk / mtls->fep.yStrideIn;
378 mtls->mSliceSize = rsMin(s1, s2);
380 if(mtls->mSliceSize < 1) {
381 mtls->mSliceSize = 1;
384 // mtls->mSliceSize = 2;
385 launchThreads(wc_xy, mtls);
387 uint32_t s1 = mtls->fep.dimX / ((mWorkers.mCount + 1) * 4);
392 if (mtls->fep.eStrideOut) {
393 s2 = targetByteChunk / mtls->fep.eStrideOut;
395 s2 = targetByteChunk / mtls->fep.eStrideIn;
397 mtls->mSliceSize = rsMin(s1, s2);
399 if(mtls->mSliceSize < 1) {
400 mtls->mSliceSize = 1;
403 launchThreads(wc_x, mtls);
410 memcpy(&p, &mtls->fep, sizeof(p));
411 uint32_t sig = mtls->sig;
414 outer_foreach_t fn = (outer_foreach_t) mtls->kernel;
415 for (p.ar[0] = mtls->arrayStart; p.ar[0] < mtls->arrayEnd; p.ar[0]++) {
416 for (p.z = mtls->zStart; p.z < mtls->zEnd; p.z++) {
417 for (p.y = mtls->yStart; p.y < mtls->yEnd; p.y++) {
418 uint32_t offset = mtls->fep.dimY * mtls->fep.dimZ * p.ar[0] +
419 mtls->fep.dimY * p.z + p.y;
420 p.out = mtls->fep.ptrOut + (mtls->fep.yStrideOut * offset) +
421 (mtls->fep.eStrideOut * mtls->xStart);
422 p.in = mtls->fep.ptrIn + (mtls->fep.yStrideIn * offset) +
423 (mtls->fep.eStrideIn * mtls->xStart);
424 fn(&p, mtls->xStart, mtls->xEnd, mtls->fep.eStrideIn, mtls->fep.eStrideOut);