1 /* 2 * Copyright (C) 2012 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "rsCpuCore.h" 18 #include "rsCpuScript.h" 19 #include "rsCpuScriptGroup.h" 20 #include "rsCpuScriptGroup2.h" 21 22 #include <malloc.h> 23 #include "rsContext.h" 24 25 #include <sys/types.h> 26 #include <sys/resource.h> 27 #include <sched.h> 28 #include <sys/syscall.h> 29 #include <stdio.h> 30 #include <string.h> 31 #include <unistd.h> 32 33 #if !defined(RS_SERVER) && !defined(RS_COMPATIBILITY_LIB) 34 #include <cutils/properties.h> 35 #include "utils/StopWatch.h" 36 #endif 37 38 #ifdef RS_SERVER 39 // Android exposes gettid(), standard Linux does not 40 static pid_t gettid() { 41 return syscall(SYS_gettid); 42 } 43 #endif 44 45 using namespace android; 46 using namespace android::renderscript; 47 48 typedef void (*outer_foreach_t)( 49 const RsExpandKernelDriverInfo *, 50 uint32_t x1, uint32_t x2, uint32_t outstep); 51 52 53 static pthread_key_t gThreadTLSKey = 0; 54 static uint32_t gThreadTLSKeyCount = 0; 55 static pthread_mutex_t gInitMutex = PTHREAD_MUTEX_INITIALIZER; 56 57 bool android::renderscript::gArchUseSIMD = false; 58 59 RsdCpuReference::~RsdCpuReference() { 60 } 61 62 RsdCpuReference * RsdCpuReference::create(Context *rsc, uint32_t version_major, 63 uint32_t version_minor, sym_lookup_t lfn, script_lookup_t slfn 64 , bcc::RSLinkRuntimeCallback pLinkRuntimeCallback, 65 RSSelectRTCallback pSelectRTCallback, 66 const char *pBccPluginName 67 ) { 68 69 RsdCpuReferenceImpl *cpu = new RsdCpuReferenceImpl(rsc); 70 if (!cpu) { 71 return nullptr; 72 } 73 if (!cpu->init(version_major, version_minor, lfn, slfn)) { 74 delete cpu; 75 return nullptr; 76 } 77 78 cpu->setLinkRuntimeCallback(pLinkRuntimeCallback); 79 cpu->setSelectRTCallback(pSelectRTCallback); 80 if (pBccPluginName) { 81 cpu->setBccPluginName(pBccPluginName); 82 } 83 84 return cpu; 85 } 86 87 88 Context * RsdCpuReference::getTlsContext() { 89 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); 90 return tls->mContext; 91 } 92 93 const Script * RsdCpuReference::getTlsScript() { 94 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); 95 return tls->mScript; 96 } 97 98 pthread_key_t RsdCpuReference::getThreadTLSKey(){ return gThreadTLSKey; } 99 100 //////////////////////////////////////////////////////////// 101 /// 102 103 RsdCpuReferenceImpl::RsdCpuReferenceImpl(Context *rsc) { 104 mRSC = rsc; 105 106 version_major = 0; 107 version_minor = 0; 108 mInForEach = false; 109 memset(&mWorkers, 0, sizeof(mWorkers)); 110 memset(&mTlsStruct, 0, sizeof(mTlsStruct)); 111 mExit = false; 112 mLinkRuntimeCallback = nullptr; 113 mSelectRTCallback = nullptr; 114 mSetupCompilerCallback = nullptr; 115 mEmbedGlobalInfo = true; 116 mEmbedGlobalInfoSkipConstant = true; 117 } 118 119 120 void * RsdCpuReferenceImpl::helperThreadProc(void *vrsc) { 121 RsdCpuReferenceImpl *dc = (RsdCpuReferenceImpl *)vrsc; 122 123 uint32_t idx = __sync_fetch_and_add(&dc->mWorkers.mLaunchCount, 1); 124 125 //ALOGV("RS helperThread starting %p idx=%i", dc, idx); 126 127 dc->mWorkers.mLaunchSignals[idx].init(); 128 dc->mWorkers.mNativeThreadId[idx] = gettid(); 129 130 memset(&dc->mTlsStruct, 0, sizeof(dc->mTlsStruct)); 131 int status = pthread_setspecific(gThreadTLSKey, &dc->mTlsStruct); 132 if (status) { 133 ALOGE("pthread_setspecific %i", status); 134 } 135 136 #if 0 137 typedef struct {uint64_t bits[1024 / 64]; } cpu_set_t; 138 cpu_set_t cpuset; 139 memset(&cpuset, 0, sizeof(cpuset)); 140 cpuset.bits[idx / 64] |= 1ULL << (idx % 64); 141 int ret = syscall(241, rsc->mWorkers.mNativeThreadId[idx], 142 sizeof(cpuset), &cpuset); 143 ALOGE("SETAFFINITY ret = %i %s", ret, EGLUtils::strerror(ret)); 144 #endif 145 146 while (!dc->mExit) { 147 dc->mWorkers.mLaunchSignals[idx].wait(); 148 if (dc->mWorkers.mLaunchCallback) { 149 // idx +1 is used because the calling thread is always worker 0. 150 dc->mWorkers.mLaunchCallback(dc->mWorkers.mLaunchData, idx+1); 151 } 152 __sync_fetch_and_sub(&dc->mWorkers.mRunningCount, 1); 153 dc->mWorkers.mCompleteSignal.set(); 154 } 155 156 //ALOGV("RS helperThread exited %p idx=%i", dc, idx); 157 return nullptr; 158 } 159 160 void RsdCpuReferenceImpl::launchThreads(WorkerCallback_t cbk, void *data) { 161 mWorkers.mLaunchData = data; 162 mWorkers.mLaunchCallback = cbk; 163 164 // fast path for very small launches 165 MTLaunchStruct *mtls = (MTLaunchStruct *)data; 166 if (mtls && mtls->fep.dim.y <= 1 && mtls->end.x <= mtls->start.x + mtls->mSliceSize) { 167 if (mWorkers.mLaunchCallback) { 168 mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); 169 } 170 return; 171 } 172 173 mWorkers.mRunningCount = mWorkers.mCount; 174 __sync_synchronize(); 175 176 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { 177 mWorkers.mLaunchSignals[ct].set(); 178 } 179 180 // We use the calling thread as one of the workers so we can start without 181 // the delay of the thread wakeup. 182 if (mWorkers.mLaunchCallback) { 183 mWorkers.mLaunchCallback(mWorkers.mLaunchData, 0); 184 } 185 186 while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) { 187 mWorkers.mCompleteSignal.wait(); 188 } 189 } 190 191 192 void RsdCpuReferenceImpl::lockMutex() { 193 pthread_mutex_lock(&gInitMutex); 194 } 195 196 void RsdCpuReferenceImpl::unlockMutex() { 197 pthread_mutex_unlock(&gInitMutex); 198 } 199 200 // Determine if the CPU we're running on supports SIMD instructions. 201 static void GetCpuInfo() { 202 // Read the CPU flags from /proc/cpuinfo. 203 FILE *cpuinfo = fopen("/proc/cpuinfo", "r"); 204 205 if (!cpuinfo) { 206 return; 207 } 208 209 char cpuinfostr[4096]; 210 // fgets() ends with newline or EOF, need to check the whole 211 // "cpuinfo" file to make sure we can use SIMD or not. 212 while (fgets(cpuinfostr, sizeof(cpuinfostr), cpuinfo)) { 213 #if defined(ARCH_ARM_HAVE_VFP) || defined(ARCH_ARM_USE_INTRINSICS) 214 gArchUseSIMD = strstr(cpuinfostr, " neon") || strstr(cpuinfostr, " asimd"); 215 #elif defined(ARCH_X86_HAVE_SSSE3) 216 gArchUseSIMD = strstr(cpuinfostr, " ssse3"); 217 #endif 218 if (gArchUseSIMD) { 219 break; 220 } 221 } 222 fclose(cpuinfo); 223 } 224 225 bool RsdCpuReferenceImpl::init(uint32_t version_major, uint32_t version_minor, 226 sym_lookup_t lfn, script_lookup_t slfn) { 227 228 mSymLookupFn = lfn; 229 mScriptLookupFn = slfn; 230 231 lockMutex(); 232 if (!gThreadTLSKeyCount) { 233 int status = pthread_key_create(&gThreadTLSKey, nullptr); 234 if (status) { 235 ALOGE("Failed to init thread tls key."); 236 unlockMutex(); 237 return false; 238 } 239 } 240 gThreadTLSKeyCount++; 241 unlockMutex(); 242 243 mTlsStruct.mContext = mRSC; 244 mTlsStruct.mScript = nullptr; 245 int status = pthread_setspecific(gThreadTLSKey, &mTlsStruct); 246 if (status) { 247 ALOGE("pthread_setspecific %i", status); 248 } 249 250 GetCpuInfo(); 251 252 int cpu = sysconf(_SC_NPROCESSORS_CONF); 253 if(mRSC->props.mDebugMaxThreads) { 254 cpu = mRSC->props.mDebugMaxThreads; 255 } 256 if (cpu < 2) { 257 mWorkers.mCount = 0; 258 return true; 259 } 260 261 // Subtract one from the cpu count because we also use the command thread as a worker. 262 mWorkers.mCount = (uint32_t)(cpu - 1); 263 264 ALOGV("%p Launching thread(s), CPUs %i", mRSC, mWorkers.mCount + 1); 265 266 mWorkers.mThreadId = (pthread_t *) calloc(mWorkers.mCount, sizeof(pthread_t)); 267 mWorkers.mNativeThreadId = (pid_t *) calloc(mWorkers.mCount, sizeof(pid_t)); 268 mWorkers.mLaunchSignals = new Signal[mWorkers.mCount]; 269 mWorkers.mLaunchCallback = nullptr; 270 271 mWorkers.mCompleteSignal.init(); 272 273 mWorkers.mRunningCount = mWorkers.mCount; 274 mWorkers.mLaunchCount = 0; 275 __sync_synchronize(); 276 277 pthread_attr_t threadAttr; 278 status = pthread_attr_init(&threadAttr); 279 if (status) { 280 ALOGE("Failed to init thread attribute."); 281 return false; 282 } 283 284 for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { 285 status = pthread_create(&mWorkers.mThreadId[ct], &threadAttr, helperThreadProc, this); 286 if (status) { 287 mWorkers.mCount = ct; 288 ALOGE("Created fewer than expected number of RS threads."); 289 break; 290 } 291 } 292 while (__sync_fetch_and_or(&mWorkers.mRunningCount, 0) != 0) { 293 usleep(100); 294 } 295 296 pthread_attr_destroy(&threadAttr); 297 return true; 298 } 299 300 301 void RsdCpuReferenceImpl::setPriority(int32_t priority) { 302 for (uint32_t ct=0; ct < mWorkers.mCount; ct++) { 303 setpriority(PRIO_PROCESS, mWorkers.mNativeThreadId[ct], priority); 304 } 305 } 306 307 RsdCpuReferenceImpl::~RsdCpuReferenceImpl() { 308 mExit = true; 309 mWorkers.mLaunchData = nullptr; 310 mWorkers.mLaunchCallback = nullptr; 311 mWorkers.mRunningCount = mWorkers.mCount; 312 __sync_synchronize(); 313 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { 314 mWorkers.mLaunchSignals[ct].set(); 315 } 316 void *res; 317 for (uint32_t ct = 0; ct < mWorkers.mCount; ct++) { 318 pthread_join(mWorkers.mThreadId[ct], &res); 319 } 320 rsAssert(__sync_fetch_and_or(&mWorkers.mRunningCount, 0) == 0); 321 free(mWorkers.mThreadId); 322 free(mWorkers.mNativeThreadId); 323 delete[] mWorkers.mLaunchSignals; 324 325 // Global structure cleanup. 326 lockMutex(); 327 --gThreadTLSKeyCount; 328 if (!gThreadTLSKeyCount) { 329 pthread_key_delete(gThreadTLSKey); 330 } 331 unlockMutex(); 332 333 } 334 335 static inline void FepPtrSetup(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo *fep, 336 uint32_t x, uint32_t y, 337 uint32_t z = 0, uint32_t lod = 0, 338 RsAllocationCubemapFace face = RS_ALLOCATION_CUBEMAP_FACE_POSITIVE_X, 339 uint32_t a1 = 0, uint32_t a2 = 0, uint32_t a3 = 0, uint32_t a4 = 0) { 340 341 for (uint32_t i = 0; i < fep->inLen; i++) { 342 fep->inPtr[i] = (const uint8_t *)mtls->ains[i]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4); 343 } 344 345 if (mtls->aout[0] != nullptr) { 346 fep->outPtr[0] = (uint8_t *)mtls->aout[0]->getPointerUnchecked(x, y, z, lod, face, a1, a2, a3, a4); 347 } 348 } 349 350 static uint32_t sliceInt(uint32_t *p, uint32_t val, uint32_t start, uint32_t end) { 351 if (start >= end) { 352 *p = start; 353 return val; 354 } 355 356 uint32_t div = end - start; 357 358 uint32_t n = val / div; 359 *p = (val - (n * div)) + start; 360 return n; 361 } 362 363 static bool SelectOuterSlice(const MTLaunchStruct *mtls, RsExpandKernelDriverInfo* fep, uint32_t sliceNum) { 364 365 uint32_t r = sliceNum; 366 r = sliceInt(&fep->current.z, r, mtls->start.z, mtls->end.z); 367 r = sliceInt(&fep->current.lod, r, mtls->start.lod, mtls->end.lod); 368 r = sliceInt(&fep->current.face, r, mtls->start.face, mtls->end.face); 369 r = sliceInt(&fep->current.array[0], r, mtls->start.array[0], mtls->end.array[0]); 370 r = sliceInt(&fep->current.array[1], r, mtls->start.array[1], mtls->end.array[1]); 371 r = sliceInt(&fep->current.array[2], r, mtls->start.array[2], mtls->end.array[2]); 372 r = sliceInt(&fep->current.array[3], r, mtls->start.array[3], mtls->end.array[3]); 373 return r == 0; 374 } 375 376 377 static void walk_general(void *usr, uint32_t idx) { 378 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 379 RsExpandKernelDriverInfo fep = mtls->fep; 380 fep.lid = idx; 381 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 382 383 384 while(1) { 385 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); 386 387 if (!SelectOuterSlice(mtls, &fep, slice)) { 388 return; 389 } 390 391 for (fep.current.y = mtls->start.y; fep.current.y < mtls->end.y; 392 fep.current.y++) { 393 394 FepPtrSetup(mtls, &fep, mtls->start.x, 395 fep.current.y, fep.current.z, fep.current.lod, 396 (RsAllocationCubemapFace)fep.current.face, 397 fep.current.array[0], fep.current.array[1], 398 fep.current.array[2], fep.current.array[3]); 399 400 fn(&fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]); 401 } 402 } 403 404 } 405 406 static void walk_2d(void *usr, uint32_t idx) { 407 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 408 RsExpandKernelDriverInfo fep = mtls->fep; 409 fep.lid = idx; 410 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 411 412 while (1) { 413 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); 414 uint32_t yStart = mtls->start.y + slice * mtls->mSliceSize; 415 uint32_t yEnd = yStart + mtls->mSliceSize; 416 417 yEnd = rsMin(yEnd, mtls->end.y); 418 419 if (yEnd <= yStart) { 420 return; 421 } 422 423 for (fep.current.y = yStart; fep.current.y < yEnd; fep.current.y++) { 424 FepPtrSetup(mtls, &fep, mtls->start.x, fep.current.y); 425 426 fn(&fep, mtls->start.x, mtls->end.x, fep.outStride[0]); 427 } 428 } 429 } 430 431 static void walk_1d(void *usr, uint32_t idx) { 432 MTLaunchStruct *mtls = (MTLaunchStruct *)usr; 433 RsExpandKernelDriverInfo fep = mtls->fep; 434 fep.lid = idx; 435 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 436 437 while (1) { 438 uint32_t slice = (uint32_t)__sync_fetch_and_add(&mtls->mSliceNum, 1); 439 uint32_t xStart = mtls->start.x + slice * mtls->mSliceSize; 440 uint32_t xEnd = xStart + mtls->mSliceSize; 441 442 xEnd = rsMin(xEnd, mtls->end.x); 443 444 if (xEnd <= xStart) { 445 return; 446 } 447 448 FepPtrSetup(mtls, &fep, xStart, 0); 449 450 fn(&fep, xStart, xEnd, fep.outStride[0]); 451 } 452 } 453 454 void RsdCpuReferenceImpl::launchThreads(const Allocation ** ains, 455 uint32_t inLen, 456 Allocation* aout, 457 const RsScriptCall* sc, 458 MTLaunchStruct* mtls) { 459 460 //android::StopWatch kernel_time("kernel time"); 461 462 bool outerDims = (mtls->start.z != mtls->end.z) || 463 (mtls->start.face != mtls->end.face) || 464 (mtls->start.lod != mtls->end.lod) || 465 (mtls->start.array[0] != mtls->end.array[0]) || 466 (mtls->start.array[1] != mtls->end.array[1]) || 467 (mtls->start.array[2] != mtls->end.array[2]) || 468 (mtls->start.array[3] != mtls->end.array[3]); 469 470 if ((mWorkers.mCount >= 1) && mtls->isThreadable && !mInForEach) { 471 const size_t targetByteChunk = 16 * 1024; 472 mInForEach = true; 473 474 if (outerDims) { 475 // No fancy logic for chunk size 476 mtls->mSliceSize = 1; 477 launchThreads(walk_general, mtls); 478 } else if (mtls->fep.dim.y > 1) { 479 uint32_t s1 = mtls->fep.dim.y / ((mWorkers.mCount + 1) * 4); 480 uint32_t s2 = 0; 481 482 // This chooses our slice size to rate limit atomic ops to 483 // one per 16k bytes of reads/writes. 484 if ((mtls->aout[0] != nullptr) && mtls->aout[0]->mHal.drvState.lod[0].stride) { 485 s2 = targetByteChunk / mtls->aout[0]->mHal.drvState.lod[0].stride; 486 } else if (mtls->ains[0]) { 487 s2 = targetByteChunk / mtls->ains[0]->mHal.drvState.lod[0].stride; 488 } else { 489 // Launch option only case 490 // Use s1 based only on the dimensions 491 s2 = s1; 492 } 493 mtls->mSliceSize = rsMin(s1, s2); 494 495 if(mtls->mSliceSize < 1) { 496 mtls->mSliceSize = 1; 497 } 498 499 launchThreads(walk_2d, mtls); 500 } else { 501 uint32_t s1 = mtls->fep.dim.x / ((mWorkers.mCount + 1) * 4); 502 uint32_t s2 = 0; 503 504 // This chooses our slice size to rate limit atomic ops to 505 // one per 16k bytes of reads/writes. 506 if ((mtls->aout[0] != nullptr) && mtls->aout[0]->getType()->getElementSizeBytes()) { 507 s2 = targetByteChunk / mtls->aout[0]->getType()->getElementSizeBytes(); 508 } else if (mtls->ains[0]) { 509 s2 = targetByteChunk / mtls->ains[0]->getType()->getElementSizeBytes(); 510 } else { 511 // Launch option only case 512 // Use s1 based only on the dimensions 513 s2 = s1; 514 } 515 mtls->mSliceSize = rsMin(s1, s2); 516 517 if (mtls->mSliceSize < 1) { 518 mtls->mSliceSize = 1; 519 } 520 521 launchThreads(walk_1d, mtls); 522 } 523 mInForEach = false; 524 525 } else { 526 outer_foreach_t fn = (outer_foreach_t) mtls->kernel; 527 uint32_t slice = 0; 528 529 530 while(SelectOuterSlice(mtls, &mtls->fep, slice++)) { 531 for (mtls->fep.current.y = mtls->start.y; 532 mtls->fep.current.y < mtls->end.y; 533 mtls->fep.current.y++) { 534 535 FepPtrSetup(mtls, &mtls->fep, mtls->start.x, 536 mtls->fep.current.y, mtls->fep.current.z, mtls->fep.current.lod, 537 (RsAllocationCubemapFace) mtls->fep.current.face, 538 mtls->fep.current.array[0], mtls->fep.current.array[1], 539 mtls->fep.current.array[2], mtls->fep.current.array[3]); 540 541 fn(&mtls->fep, mtls->start.x, mtls->end.x, mtls->fep.outStride[0]); 542 } 543 } 544 } 545 } 546 547 RsdCpuScriptImpl * RsdCpuReferenceImpl::setTLS(RsdCpuScriptImpl *sc) { 548 //ALOGE("setTls %p", sc); 549 ScriptTLSStruct * tls = (ScriptTLSStruct *)pthread_getspecific(gThreadTLSKey); 550 rsAssert(tls); 551 RsdCpuScriptImpl *old = tls->mImpl; 552 tls->mImpl = sc; 553 tls->mContext = mRSC; 554 if (sc) { 555 tls->mScript = sc->getScript(); 556 } else { 557 tls->mScript = nullptr; 558 } 559 return old; 560 } 561 562 const RsdCpuReference::CpuSymbol * RsdCpuReferenceImpl::symLookup(const char *name) { 563 return mSymLookupFn(mRSC, name); 564 } 565 566 567 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createScript(const ScriptC *s, 568 char const *resName, char const *cacheDir, 569 uint8_t const *bitcode, size_t bitcodeSize, 570 uint32_t flags) { 571 572 RsdCpuScriptImpl *i = new RsdCpuScriptImpl(this, s); 573 if (!i->init(resName, cacheDir, bitcode, bitcodeSize, flags 574 , getBccPluginName() 575 )) { 576 delete i; 577 return nullptr; 578 } 579 return i; 580 } 581 582 extern RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx, 583 const Script *s, const Element *e); 584 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve3x3(RsdCpuReferenceImpl *ctx, 585 const Script *s, const Element *e); 586 extern RsdCpuScriptImpl * rsdIntrinsic_ColorMatrix(RsdCpuReferenceImpl *ctx, 587 const Script *s, const Element *e); 588 extern RsdCpuScriptImpl * rsdIntrinsic_LUT(RsdCpuReferenceImpl *ctx, 589 const Script *s, const Element *e); 590 extern RsdCpuScriptImpl * rsdIntrinsic_Convolve5x5(RsdCpuReferenceImpl *ctx, 591 const Script *s, const Element *e); 592 extern RsdCpuScriptImpl * rsdIntrinsic_Blur(RsdCpuReferenceImpl *ctx, 593 const Script *s, const Element *e); 594 extern RsdCpuScriptImpl * rsdIntrinsic_YuvToRGB(RsdCpuReferenceImpl *ctx, 595 const Script *s, const Element *e); 596 extern RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx, 597 const Script *s, const Element *e); 598 extern RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, 599 const Script *s, const Element *e); 600 extern RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, 601 const Script *s, const Element *e); 602 extern RsdCpuScriptImpl * rsdIntrinsic_BLAS(RsdCpuReferenceImpl *ctx, 603 const Script *s, const Element *e); 604 605 RsdCpuReference::CpuScript * RsdCpuReferenceImpl::createIntrinsic(const Script *s, 606 RsScriptIntrinsicID iid, Element *e) { 607 608 RsdCpuScriptImpl *i = nullptr; 609 switch (iid) { 610 case RS_SCRIPT_INTRINSIC_ID_3DLUT: 611 i = rsdIntrinsic_3DLUT(this, s, e); 612 break; 613 case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_3x3: 614 i = rsdIntrinsic_Convolve3x3(this, s, e); 615 break; 616 case RS_SCRIPT_INTRINSIC_ID_COLOR_MATRIX: 617 i = rsdIntrinsic_ColorMatrix(this, s, e); 618 break; 619 case RS_SCRIPT_INTRINSIC_ID_LUT: 620 i = rsdIntrinsic_LUT(this, s, e); 621 break; 622 case RS_SCRIPT_INTRINSIC_ID_CONVOLVE_5x5: 623 i = rsdIntrinsic_Convolve5x5(this, s, e); 624 break; 625 case RS_SCRIPT_INTRINSIC_ID_BLUR: 626 i = rsdIntrinsic_Blur(this, s, e); 627 break; 628 case RS_SCRIPT_INTRINSIC_ID_YUV_TO_RGB: 629 i = rsdIntrinsic_YuvToRGB(this, s, e); 630 break; 631 case RS_SCRIPT_INTRINSIC_ID_BLEND: 632 i = rsdIntrinsic_Blend(this, s, e); 633 break; 634 case RS_SCRIPT_INTRINSIC_ID_HISTOGRAM: 635 i = rsdIntrinsic_Histogram(this, s, e); 636 break; 637 case RS_SCRIPT_INTRINSIC_ID_RESIZE: 638 i = rsdIntrinsic_Resize(this, s, e); 639 break; 640 case RS_SCRIPT_INTRINSIC_ID_BLAS: 641 i = rsdIntrinsic_BLAS(this, s, e); 642 break; 643 644 default: 645 rsAssert(0); 646 } 647 648 return i; 649 } 650 651 void* RsdCpuReferenceImpl::createScriptGroup(const ScriptGroupBase *sg) { 652 switch (sg->getApiVersion()) { 653 case ScriptGroupBase::SG_V1: { 654 CpuScriptGroupImpl *sgi = new CpuScriptGroupImpl(this, sg); 655 if (!sgi->init()) { 656 delete sgi; 657 return nullptr; 658 } 659 return sgi; 660 } 661 case ScriptGroupBase::SG_V2: { 662 return new CpuScriptGroup2Impl(this, sg); 663 } 664 } 665 return nullptr; 666 } 667