1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 #include "rsCpuIntrinsic.h" 19 #include "rsCpuIntrinsicInlines.h" 20 21 using namespace android; 22 using namespace android::renderscript; 23 24 namespace android { 25 namespace renderscript { 26 27 28 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic { 29 public: 30 void populateScript(Script *) override; 31 void invokeFreeChildren() override; 32 33 void setGlobalObj(uint32_t slot, ObjectBase *data) override; 34 35 ~RsdCpuScriptIntrinsicResize() override; 36 RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *); 37 38 void preLaunch(uint32_t slot, const Allocation ** ains, 39 uint32_t inLen, Allocation * aout, const void * usr, 40 uint32_t usrLen, const RsScriptCall *sc) override; 41 42 float scaleX; 43 float scaleY; 44 45 protected: 46 ObjectBaseRef<const Allocation> mAlloc; 47 ObjectBaseRef<const Element> mElement; 48 49 static void kernelU1(const RsExpandKernelDriverInfo *info, 50 uint32_t xstart, uint32_t xend, 51 uint32_t outstep); 52 static void kernelU2(const RsExpandKernelDriverInfo *info, 53 uint32_t xstart, uint32_t xend, 54 uint32_t outstep); 55 static void kernelU4(const RsExpandKernelDriverInfo *info, 56 uint32_t xstart, uint32_t xend, 57 uint32_t outstep); 58 static void kernelF1(const RsExpandKernelDriverInfo *info, 59 uint32_t xstart, uint32_t xend, 60 uint32_t outstep); 61 static void kernelF2(const RsExpandKernelDriverInfo *info, 62 uint32_t xstart, uint32_t xend, 63 uint32_t outstep); 64 static void kernelF4(const RsExpandKernelDriverInfo *info, 65 uint32_t xstart, uint32_t xend, 66 uint32_t outstep); 67 }; 68 69 } 70 } 71 72 73 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) { 74 rsAssert(slot == 0); 75 mAlloc.set(static_cast<Allocation *>(data)); 76 } 77 78 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) { 79 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 80 + x * (3.f * (p1 - p2) + p3 - p0))); 81 } 82 83 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) { 84 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 85 + x * (3.f * (p1 - p2) + p3 - p0))); 86 } 87 88 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { 89 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 90 + x * (3.f * (p1 - p2) + p3 - p0))); 91 } 92 93 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3, 94 float xf, float yf, int width) { 95 int startx = (int) floor(xf - 1); 96 xf = xf - floor(xf); 97 int maxx = width - 1; 98 int xs0 = rsMax(0, startx + 0); 99 int xs1 = rsMax(0, startx + 1); 100 int xs2 = rsMin(maxx, startx + 2); 101 int xs3 = rsMin(maxx, startx + 3); 102 103 float4 p0 = cubicInterpolate(convert_float4(yp0[xs0]), 104 convert_float4(yp0[xs1]), 105 convert_float4(yp0[xs2]), 106 convert_float4(yp0[xs3]), xf); 107 108 float4 p1 = cubicInterpolate(convert_float4(yp1[xs0]), 109 convert_float4(yp1[xs1]), 110 convert_float4(yp1[xs2]), 111 convert_float4(yp1[xs3]), xf); 112 113 float4 p2 = cubicInterpolate(convert_float4(yp2[xs0]), 114 convert_float4(yp2[xs1]), 115 convert_float4(yp2[xs2]), 116 convert_float4(yp2[xs3]), xf); 117 118 float4 p3 = cubicInterpolate(convert_float4(yp3[xs0]), 119 convert_float4(yp3[xs1]), 120 convert_float4(yp3[xs2]), 121 convert_float4(yp3[xs3]), xf); 122 123 float4 p = cubicInterpolate(p0, p1, p2, p3, yf); 124 p = clamp(p + 0.5f, 0.f, 255.f); 125 return convert_uchar4(p); 126 } 127 128 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3, 129 float xf, float yf, int width) { 130 int startx = (int) floor(xf - 1); 131 xf = xf - floor(xf); 132 int maxx = width - 1; 133 int xs0 = rsMax(0, startx + 0); 134 int xs1 = rsMax(0, startx + 1); 135 int xs2 = rsMin(maxx, startx + 2); 136 int xs3 = rsMin(maxx, startx + 3); 137 138 float2 p0 = cubicInterpolate(convert_float2(yp0[xs0]), 139 convert_float2(yp0[xs1]), 140 convert_float2(yp0[xs2]), 141 convert_float2(yp0[xs3]), xf); 142 143 float2 p1 = cubicInterpolate(convert_float2(yp1[xs0]), 144 convert_float2(yp1[xs1]), 145 convert_float2(yp1[xs2]), 146 convert_float2(yp1[xs3]), xf); 147 148 float2 p2 = cubicInterpolate(convert_float2(yp2[xs0]), 149 convert_float2(yp2[xs1]), 150 convert_float2(yp2[xs2]), 151 convert_float2(yp2[xs3]), xf); 152 153 float2 p3 = cubicInterpolate(convert_float2(yp3[xs0]), 154 convert_float2(yp3[xs1]), 155 convert_float2(yp3[xs2]), 156 convert_float2(yp3[xs3]), xf); 157 158 float2 p = cubicInterpolate(p0, p1, p2, p3, yf); 159 p = clamp(p + 0.5f, 0.f, 255.f); 160 return convert_uchar2(p); 161 } 162 163 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3, 164 float xf, float yf, int width) { 165 int startx = (int) floor(xf - 1); 166 xf = xf - floor(xf); 167 int maxx = width - 1; 168 int xs0 = rsMax(0, startx + 0); 169 int xs1 = rsMax(0, startx + 1); 170 int xs2 = rsMin(maxx, startx + 2); 171 int xs3 = rsMin(maxx, startx + 3); 172 173 float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1], 174 (float)yp0[xs2], (float)yp0[xs3], xf); 175 float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1], 176 (float)yp1[xs2], (float)yp1[xs3], xf); 177 float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1], 178 (float)yp2[xs2], (float)yp2[xs3], xf); 179 float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1], 180 (float)yp3[xs2], (float)yp3[xs3], xf); 181 182 float p = cubicInterpolate(p0, p1, p2, p3, yf); 183 p = clamp(p + 0.5f, 0.f, 255.f); 184 return (uchar)p; 185 } 186 187 extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc); 188 189 extern "C" void rsdIntrinsicResizeB4_K( 190 uchar4 *dst, 191 size_t count, 192 uint32_t xf, 193 uint32_t xinc, 194 uchar4 const *srcn, 195 uchar4 const *src0, 196 uchar4 const *src1, 197 uchar4 const *src2, 198 size_t xclip, 199 size_t avail, 200 uint64_t osc_ctl, 201 int32_t const *yr); 202 203 extern "C" void rsdIntrinsicResizeB2_K( 204 uchar2 *dst, 205 size_t count, 206 uint32_t xf, 207 uint32_t xinc, 208 uchar2 const *srcn, 209 uchar2 const *src0, 210 uchar2 const *src1, 211 uchar2 const *src2, 212 size_t xclip, 213 size_t avail, 214 uint64_t osc_ctl, 215 int32_t const *yr); 216 217 extern "C" void rsdIntrinsicResizeB1_K( 218 uchar *dst, 219 size_t count, 220 uint32_t xf, 221 uint32_t xinc, 222 uchar const *srcn, 223 uchar const *src0, 224 uchar const *src1, 225 uchar const *src2, 226 size_t xclip, 227 size_t avail, 228 uint64_t osc_ctl, 229 int32_t const *yr); 230 231 #if defined(ARCH_ARM_USE_INTRINSICS) 232 static void mkYCoeff(int32_t *yr, float yf) { 233 int32_t yf1 = rint(yf * 0x10000); 234 int32_t yf2 = rint(yf * yf * 0x10000); 235 int32_t yf3 = rint(yf * yf * yf * 0x10000); 236 237 yr[0] = -(2 * yf2 - yf3 - yf1) >> 1; 238 yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1; 239 yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1; 240 yr[3] = -(yf3 - yf2) >> 1; 241 } 242 #endif 243 244 static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3, 245 float xf, float yf, int width) { 246 int startx = (int) floor(xf - 1); 247 xf = xf - floor(xf); 248 int maxx = width - 1; 249 int xs0 = rsMax(0, startx + 0); 250 int xs1 = rsMax(0, startx + 1); 251 int xs2 = rsMin(maxx, startx + 2); 252 int xs3 = rsMin(maxx, startx + 3); 253 254 float4 p0 = cubicInterpolate(yp0[xs0], yp0[xs1], 255 yp0[xs2], yp0[xs3], xf); 256 float4 p1 = cubicInterpolate(yp1[xs0], yp1[xs1], 257 yp1[xs2], yp1[xs3], xf); 258 float4 p2 = cubicInterpolate(yp2[xs0], yp2[xs1], 259 yp2[xs2], yp2[xs3], xf); 260 float4 p3 = cubicInterpolate(yp3[xs0], yp3[xs1], 261 yp3[xs2], yp3[xs3], xf); 262 263 float4 p = cubicInterpolate(p0, p1, p2, p3, yf); 264 return p; 265 } 266 267 static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3, 268 float xf, float yf, int width) { 269 int startx = (int) floor(xf - 1); 270 xf = xf - floor(xf); 271 int maxx = width - 1; 272 int xs0 = rsMax(0, startx + 0); 273 int xs1 = rsMax(0, startx + 1); 274 int xs2 = rsMin(maxx, startx + 2); 275 int xs3 = rsMin(maxx, startx + 3); 276 277 float2 p0 = cubicInterpolate(yp0[xs0], yp0[xs1], 278 yp0[xs2], yp0[xs3], xf); 279 float2 p1 = cubicInterpolate(yp1[xs0], yp1[xs1], 280 yp1[xs2], yp1[xs3], xf); 281 float2 p2 = cubicInterpolate(yp2[xs0], yp2[xs1], 282 yp2[xs2], yp2[xs3], xf); 283 float2 p3 = cubicInterpolate(yp3[xs0], yp3[xs1], 284 yp3[xs2], yp3[xs3], xf); 285 286 float2 p = cubicInterpolate(p0, p1, p2, p3, yf); 287 return p; 288 } 289 290 static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3, 291 float xf, float yf, int width) { 292 int startx = (int) floor(xf - 1); 293 xf = xf - floor(xf); 294 int maxx = width - 1; 295 int xs0 = rsMax(0, startx + 0); 296 int xs1 = rsMax(0, startx + 1); 297 int xs2 = rsMin(maxx, startx + 2); 298 int xs3 = rsMin(maxx, startx + 3); 299 300 float p0 = cubicInterpolate(yp0[xs0], yp0[xs1], 301 yp0[xs2], yp0[xs3], xf); 302 float p1 = cubicInterpolate(yp1[xs0], yp1[xs1], 303 yp1[xs2], yp1[xs3], xf); 304 float p2 = cubicInterpolate(yp2[xs0], yp2[xs1], 305 yp2[xs2], yp2[xs3], xf); 306 float p3 = cubicInterpolate(yp3[xs0], yp3[xs1], 307 yp3[xs2], yp3[xs3], xf); 308 309 float p = cubicInterpolate(p0, p1, p2, p3, yf); 310 return p; 311 } 312 313 void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info, 314 uint32_t xstart, uint32_t xend, 315 uint32_t outstep) { 316 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 317 318 if (!cp->mAlloc.get()) { 319 ALOGE("Resize executed without input, skipping"); 320 return; 321 } 322 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 323 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 324 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 325 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 326 327 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 328 int starty = (int) floor(yf - 1); 329 yf = yf - floor(yf); 330 int maxy = srcHeight - 1; 331 int ys0 = rsMax(0, starty + 0); 332 int ys1 = rsMax(0, starty + 1); 333 int ys2 = rsMin(maxy, starty + 2); 334 int ys3 = rsMin(maxy, starty + 3); 335 336 const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0); 337 const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1); 338 const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2); 339 const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3); 340 341 uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart; 342 uint32_t x1 = xstart; 343 uint32_t x2 = xend; 344 345 #if defined(ARCH_ARM_USE_INTRINSICS) 346 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) { 347 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 348 long xf16 = rint(xf * 0x10000); 349 uint32_t xinc16 = rint(cp->scaleX * 0x10000); 350 351 int xoff = (xf16 >> 16) - 1; 352 int xclip = rsMax(0, xoff) - xoff; 353 int len = x2 - x1; 354 355 int32_t yr[4]; 356 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); 357 mkYCoeff(yr, yf); 358 359 xoff += xclip; 360 361 rsdIntrinsicResizeB4_K( 362 out, len, 363 xf16 & 0xffff, xinc16, 364 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, 365 xclip, srcWidth - xoff + xclip, 366 osc_ctl, yr); 367 out += len; 368 x1 += len; 369 } 370 #endif 371 372 while(x1 < x2) { 373 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 374 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 375 out++; 376 x1++; 377 } 378 } 379 380 void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info, 381 uint32_t xstart, uint32_t xend, 382 uint32_t outstep) { 383 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 384 385 if (!cp->mAlloc.get()) { 386 ALOGE("Resize executed without input, skipping"); 387 return; 388 } 389 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 390 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 391 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 392 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 393 394 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 395 int starty = (int) floor(yf - 1); 396 yf = yf - floor(yf); 397 int maxy = srcHeight - 1; 398 int ys0 = rsMax(0, starty + 0); 399 int ys1 = rsMax(0, starty + 1); 400 int ys2 = rsMin(maxy, starty + 2); 401 int ys3 = rsMin(maxy, starty + 3); 402 403 const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0); 404 const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1); 405 const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2); 406 const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3); 407 408 uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart; 409 uint32_t x1 = xstart; 410 uint32_t x2 = xend; 411 412 #if defined(ARCH_ARM_USE_INTRINSICS) 413 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) { 414 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 415 long xf16 = rint(xf * 0x10000); 416 uint32_t xinc16 = rint(cp->scaleX * 0x10000); 417 418 int xoff = (xf16 >> 16) - 1; 419 int xclip = rsMax(0, xoff) - xoff; 420 int len = x2 - x1; 421 422 int32_t yr[4]; 423 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); 424 mkYCoeff(yr, yf); 425 426 xoff += xclip; 427 428 rsdIntrinsicResizeB2_K( 429 out, len, 430 xf16 & 0xffff, xinc16, 431 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, 432 xclip, srcWidth - xoff + xclip, 433 osc_ctl, yr); 434 out += len; 435 x1 += len; 436 } 437 #endif 438 439 while(x1 < x2) { 440 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 441 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 442 out++; 443 x1++; 444 } 445 } 446 447 void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info, 448 uint32_t xstart, uint32_t xend, 449 uint32_t outstep) { 450 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 451 452 if (!cp->mAlloc.get()) { 453 ALOGE("Resize executed without input, skipping"); 454 return; 455 } 456 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 457 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 458 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 459 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 460 461 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 462 int starty = (int) floor(yf - 1); 463 yf = yf - floor(yf); 464 int maxy = srcHeight - 1; 465 int ys0 = rsMax(0, starty + 0); 466 int ys1 = rsMax(0, starty + 1); 467 int ys2 = rsMin(maxy, starty + 2); 468 int ys3 = rsMin(maxy, starty + 3); 469 470 const uchar *yp0 = pin + stride * ys0; 471 const uchar *yp1 = pin + stride * ys1; 472 const uchar *yp2 = pin + stride * ys2; 473 const uchar *yp3 = pin + stride * ys3; 474 475 uchar *out = ((uchar *)info->outPtr[0]) + xstart; 476 uint32_t x1 = xstart; 477 uint32_t x2 = xend; 478 479 #if defined(ARCH_ARM_USE_INTRINSICS) 480 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) { 481 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 482 long xf16 = rint(xf * 0x10000); 483 uint32_t xinc16 = rint(cp->scaleX * 0x10000); 484 485 int xoff = (xf16 >> 16) - 1; 486 int xclip = rsMax(0, xoff) - xoff; 487 int len = x2 - x1; 488 489 int32_t yr[4]; 490 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); 491 mkYCoeff(yr, yf); 492 493 xoff += xclip; 494 495 rsdIntrinsicResizeB1_K( 496 out, len, 497 xf16 & 0xffff, xinc16, 498 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, 499 xclip, srcWidth - xoff + xclip, 500 osc_ctl, yr); 501 out += len; 502 x1 += len; 503 } 504 #endif 505 506 while(x1 < x2) { 507 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 508 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 509 out++; 510 x1++; 511 } 512 } 513 514 void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info, 515 uint32_t xstart, uint32_t xend, 516 uint32_t outstep) { 517 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 518 519 if (!cp->mAlloc.get()) { 520 ALOGE("Resize executed without input, skipping"); 521 return; 522 } 523 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 524 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 525 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 526 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 527 528 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 529 int starty = (int) floor(yf - 1); 530 yf = yf - floor(yf); 531 int maxy = srcHeight - 1; 532 int ys0 = rsMax(0, starty + 0); 533 int ys1 = rsMax(0, starty + 1); 534 int ys2 = rsMin(maxy, starty + 2); 535 int ys3 = rsMin(maxy, starty + 3); 536 537 const float4 *yp0 = (const float4 *)(pin + stride * ys0); 538 const float4 *yp1 = (const float4 *)(pin + stride * ys1); 539 const float4 *yp2 = (const float4 *)(pin + stride * ys2); 540 const float4 *yp3 = (const float4 *)(pin + stride * ys3); 541 542 float4 *out = ((float4 *)info->outPtr[0]) + xstart; 543 uint32_t x1 = xstart; 544 uint32_t x2 = xend; 545 546 while(x1 < x2) { 547 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 548 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 549 out++; 550 x1++; 551 } 552 } 553 554 void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info, 555 uint32_t xstart, uint32_t xend, 556 uint32_t outstep) { 557 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 558 559 if (!cp->mAlloc.get()) { 560 ALOGE("Resize executed without input, skipping"); 561 return; 562 } 563 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 564 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 565 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 566 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 567 568 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 569 int starty = (int) floor(yf - 1); 570 yf = yf - floor(yf); 571 int maxy = srcHeight - 1; 572 int ys0 = rsMax(0, starty + 0); 573 int ys1 = rsMax(0, starty + 1); 574 int ys2 = rsMin(maxy, starty + 2); 575 int ys3 = rsMin(maxy, starty + 3); 576 577 const float2 *yp0 = (const float2 *)(pin + stride * ys0); 578 const float2 *yp1 = (const float2 *)(pin + stride * ys1); 579 const float2 *yp2 = (const float2 *)(pin + stride * ys2); 580 const float2 *yp3 = (const float2 *)(pin + stride * ys3); 581 582 float2 *out = ((float2 *)info->outPtr[0]) + xstart; 583 uint32_t x1 = xstart; 584 uint32_t x2 = xend; 585 586 while(x1 < x2) { 587 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 588 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 589 out++; 590 x1++; 591 } 592 } 593 594 void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info, 595 uint32_t xstart, uint32_t xend, 596 uint32_t outstep) { 597 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 598 599 if (!cp->mAlloc.get()) { 600 ALOGE("Resize executed without input, skipping"); 601 return; 602 } 603 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 604 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 605 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 606 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 607 608 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 609 int starty = (int) floor(yf - 1); 610 yf = yf - floor(yf); 611 int maxy = srcHeight - 1; 612 int ys0 = rsMax(0, starty + 0); 613 int ys1 = rsMax(0, starty + 1); 614 int ys2 = rsMin(maxy, starty + 2); 615 int ys3 = rsMin(maxy, starty + 3); 616 617 const float *yp0 = (const float *)(pin + stride * ys0); 618 const float *yp1 = (const float *)(pin + stride * ys1); 619 const float *yp2 = (const float *)(pin + stride * ys2); 620 const float *yp3 = (const float *)(pin + stride * ys3); 621 622 float *out = ((float *)info->outPtr[0]) + xstart; 623 uint32_t x1 = xstart; 624 uint32_t x2 = xend; 625 626 while(x1 < x2) { 627 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 628 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 629 out++; 630 x1++; 631 } 632 } 633 634 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize ( 635 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) 636 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) { 637 638 } 639 640 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() { 641 } 642 643 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, 644 const Allocation ** ains, 645 uint32_t inLen, Allocation * aout, 646 const void * usr, uint32_t usrLen, 647 const RsScriptCall *sc) 648 { 649 if (!mAlloc.get()) { 650 ALOGE("Resize executed without input, skipping"); 651 return; 652 } 653 const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY; 654 const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX; 655 const size_t stride = mAlloc->mHal.drvState.lod[0].stride; 656 657 //check the data type to determine F or U. 658 if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) { 659 switch(mAlloc->getType()->getElement()->getVectorSize()) { 660 case 1: 661 mRootPtr = &kernelU1; 662 break; 663 case 2: 664 mRootPtr = &kernelU2; 665 break; 666 case 3: 667 case 4: 668 mRootPtr = &kernelU4; 669 break; 670 } 671 } else { 672 switch(mAlloc->getType()->getElement()->getVectorSize()) { 673 case 1: 674 mRootPtr = &kernelF1; 675 break; 676 case 2: 677 mRootPtr = &kernelF2; 678 break; 679 case 3: 680 case 4: 681 mRootPtr = &kernelF4; 682 break; 683 } 684 } 685 686 scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX; 687 scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY; 688 689 } 690 691 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) { 692 s->mHal.info.exportedVariableCount = 1; 693 } 694 695 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() { 696 mAlloc.clear(); 697 } 698 699 700 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 701 702 return new RsdCpuScriptIntrinsicResize(ctx, s, e); 703 } 704