1 /* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 18 #include "rsCpuIntrinsic.h" 19 #include "rsCpuIntrinsicInlines.h" 20 21 namespace android { 22 namespace renderscript { 23 24 25 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic { 26 public: 27 void populateScript(Script *) override; 28 void invokeFreeChildren() override; 29 30 void setGlobalObj(uint32_t slot, ObjectBase *data) override; 31 32 ~RsdCpuScriptIntrinsicResize() override; 33 RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *); 34 35 void preLaunch(uint32_t slot, const Allocation ** ains, 36 uint32_t inLen, Allocation * aout, const void * usr, 37 uint32_t usrLen, const RsScriptCall *sc) override; 38 39 float scaleX; 40 float scaleY; 41 42 protected: 43 ObjectBaseRef<const Allocation> mAlloc; 44 ObjectBaseRef<const Element> mElement; 45 46 static void kernelU1(const RsExpandKernelDriverInfo *info, 47 uint32_t xstart, uint32_t xend, 48 uint32_t outstep); 49 static void kernelU2(const RsExpandKernelDriverInfo *info, 50 uint32_t xstart, uint32_t xend, 51 uint32_t outstep); 52 static void kernelU4(const RsExpandKernelDriverInfo *info, 53 uint32_t xstart, uint32_t xend, 54 uint32_t outstep); 55 static void kernelF1(const RsExpandKernelDriverInfo *info, 56 uint32_t xstart, uint32_t xend, 57 uint32_t outstep); 58 static void kernelF2(const RsExpandKernelDriverInfo *info, 59 uint32_t xstart, uint32_t xend, 60 uint32_t outstep); 61 static void kernelF4(const RsExpandKernelDriverInfo *info, 62 uint32_t xstart, uint32_t xend, 63 uint32_t outstep); 64 }; 65 66 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) { 67 rsAssert(slot == 0); 68 mAlloc.set(static_cast<Allocation *>(data)); 69 } 70 71 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) { 72 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 73 + x * (3.f * (p1 - p2) + p3 - p0))); 74 } 75 76 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) { 77 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 78 + x * (3.f * (p1 - p2) + p3 - p0))); 79 } 80 81 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) { 82 return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3 83 + x * (3.f * (p1 - p2) + p3 - p0))); 84 } 85 86 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3, 87 float xf, float yf, int width) { 88 int startx = (int) floor(xf - 1); 89 xf = xf - floor(xf); 90 int maxx = width - 1; 91 int xs0 = rsMax(0, startx + 0); 92 int xs1 = rsMax(0, startx + 1); 93 int xs2 = rsMin(maxx, startx + 2); 94 int xs3 = rsMin(maxx, startx + 3); 95 96 float4 p0 = cubicInterpolate(convert_float4(yp0[xs0]), 97 convert_float4(yp0[xs1]), 98 convert_float4(yp0[xs2]), 99 convert_float4(yp0[xs3]), xf); 100 101 float4 p1 = cubicInterpolate(convert_float4(yp1[xs0]), 102 convert_float4(yp1[xs1]), 103 convert_float4(yp1[xs2]), 104 convert_float4(yp1[xs3]), xf); 105 106 float4 p2 = cubicInterpolate(convert_float4(yp2[xs0]), 107 convert_float4(yp2[xs1]), 108 convert_float4(yp2[xs2]), 109 convert_float4(yp2[xs3]), xf); 110 111 float4 p3 = cubicInterpolate(convert_float4(yp3[xs0]), 112 convert_float4(yp3[xs1]), 113 convert_float4(yp3[xs2]), 114 convert_float4(yp3[xs3]), xf); 115 116 float4 p = cubicInterpolate(p0, p1, p2, p3, yf); 117 p = clamp(p + 0.5f, 0.f, 255.f); 118 return convert_uchar4(p); 119 } 120 121 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3, 122 float xf, float yf, int width) { 123 int startx = (int) floor(xf - 1); 124 xf = xf - floor(xf); 125 int maxx = width - 1; 126 int xs0 = rsMax(0, startx + 0); 127 int xs1 = rsMax(0, startx + 1); 128 int xs2 = rsMin(maxx, startx + 2); 129 int xs3 = rsMin(maxx, startx + 3); 130 131 float2 p0 = cubicInterpolate(convert_float2(yp0[xs0]), 132 convert_float2(yp0[xs1]), 133 convert_float2(yp0[xs2]), 134 convert_float2(yp0[xs3]), xf); 135 136 float2 p1 = cubicInterpolate(convert_float2(yp1[xs0]), 137 convert_float2(yp1[xs1]), 138 convert_float2(yp1[xs2]), 139 convert_float2(yp1[xs3]), xf); 140 141 float2 p2 = cubicInterpolate(convert_float2(yp2[xs0]), 142 convert_float2(yp2[xs1]), 143 convert_float2(yp2[xs2]), 144 convert_float2(yp2[xs3]), xf); 145 146 float2 p3 = cubicInterpolate(convert_float2(yp3[xs0]), 147 convert_float2(yp3[xs1]), 148 convert_float2(yp3[xs2]), 149 convert_float2(yp3[xs3]), xf); 150 151 float2 p = cubicInterpolate(p0, p1, p2, p3, yf); 152 p = clamp(p + 0.5f, 0.f, 255.f); 153 return convert_uchar2(p); 154 } 155 156 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3, 157 float xf, float yf, int width) { 158 int startx = (int) floor(xf - 1); 159 xf = xf - floor(xf); 160 int maxx = width - 1; 161 int xs0 = rsMax(0, startx + 0); 162 int xs1 = rsMax(0, startx + 1); 163 int xs2 = rsMin(maxx, startx + 2); 164 int xs3 = rsMin(maxx, startx + 3); 165 166 float p0 = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1], 167 (float)yp0[xs2], (float)yp0[xs3], xf); 168 float p1 = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1], 169 (float)yp1[xs2], (float)yp1[xs3], xf); 170 float p2 = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1], 171 (float)yp2[xs2], (float)yp2[xs3], xf); 172 float p3 = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1], 173 (float)yp3[xs2], (float)yp3[xs3], xf); 174 175 float p = cubicInterpolate(p0, p1, p2, p3, yf); 176 p = clamp(p + 0.5f, 0.f, 255.f); 177 return (uchar)p; 178 } 179 180 extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc); 181 182 extern "C" void rsdIntrinsicResizeB4_K( 183 uchar4 *dst, 184 size_t count, 185 uint32_t xf, 186 uint32_t xinc, 187 uchar4 const *srcn, 188 uchar4 const *src0, 189 uchar4 const *src1, 190 uchar4 const *src2, 191 size_t xclip, 192 size_t avail, 193 uint64_t osc_ctl, 194 int32_t const *yr); 195 196 extern "C" void rsdIntrinsicResizeB2_K( 197 uchar2 *dst, 198 size_t count, 199 uint32_t xf, 200 uint32_t xinc, 201 uchar2 const *srcn, 202 uchar2 const *src0, 203 uchar2 const *src1, 204 uchar2 const *src2, 205 size_t xclip, 206 size_t avail, 207 uint64_t osc_ctl, 208 int32_t const *yr); 209 210 extern "C" void rsdIntrinsicResizeB1_K( 211 uchar *dst, 212 size_t count, 213 uint32_t xf, 214 uint32_t xinc, 215 uchar const *srcn, 216 uchar const *src0, 217 uchar const *src1, 218 uchar const *src2, 219 size_t xclip, 220 size_t avail, 221 uint64_t osc_ctl, 222 int32_t const *yr); 223 224 #if defined(ARCH_ARM_USE_INTRINSICS) 225 static void mkYCoeff(int32_t *yr, float yf) { 226 int32_t yf1 = rint(yf * 0x10000); 227 int32_t yf2 = rint(yf * yf * 0x10000); 228 int32_t yf3 = rint(yf * yf * yf * 0x10000); 229 230 yr[0] = -(2 * yf2 - yf3 - yf1) >> 1; 231 yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1; 232 yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1; 233 yr[3] = -(yf3 - yf2) >> 1; 234 } 235 #endif 236 237 static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3, 238 float xf, float yf, int width) { 239 int startx = (int) floor(xf - 1); 240 xf = xf - floor(xf); 241 int maxx = width - 1; 242 int xs0 = rsMax(0, startx + 0); 243 int xs1 = rsMax(0, startx + 1); 244 int xs2 = rsMin(maxx, startx + 2); 245 int xs3 = rsMin(maxx, startx + 3); 246 247 float4 p0 = cubicInterpolate(yp0[xs0], yp0[xs1], 248 yp0[xs2], yp0[xs3], xf); 249 float4 p1 = cubicInterpolate(yp1[xs0], yp1[xs1], 250 yp1[xs2], yp1[xs3], xf); 251 float4 p2 = cubicInterpolate(yp2[xs0], yp2[xs1], 252 yp2[xs2], yp2[xs3], xf); 253 float4 p3 = cubicInterpolate(yp3[xs0], yp3[xs1], 254 yp3[xs2], yp3[xs3], xf); 255 256 float4 p = cubicInterpolate(p0, p1, p2, p3, yf); 257 return p; 258 } 259 260 static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3, 261 float xf, float yf, int width) { 262 int startx = (int) floor(xf - 1); 263 xf = xf - floor(xf); 264 int maxx = width - 1; 265 int xs0 = rsMax(0, startx + 0); 266 int xs1 = rsMax(0, startx + 1); 267 int xs2 = rsMin(maxx, startx + 2); 268 int xs3 = rsMin(maxx, startx + 3); 269 270 float2 p0 = cubicInterpolate(yp0[xs0], yp0[xs1], 271 yp0[xs2], yp0[xs3], xf); 272 float2 p1 = cubicInterpolate(yp1[xs0], yp1[xs1], 273 yp1[xs2], yp1[xs3], xf); 274 float2 p2 = cubicInterpolate(yp2[xs0], yp2[xs1], 275 yp2[xs2], yp2[xs3], xf); 276 float2 p3 = cubicInterpolate(yp3[xs0], yp3[xs1], 277 yp3[xs2], yp3[xs3], xf); 278 279 float2 p = cubicInterpolate(p0, p1, p2, p3, yf); 280 return p; 281 } 282 283 static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3, 284 float xf, float yf, int width) { 285 int startx = (int) floor(xf - 1); 286 xf = xf - floor(xf); 287 int maxx = width - 1; 288 int xs0 = rsMax(0, startx + 0); 289 int xs1 = rsMax(0, startx + 1); 290 int xs2 = rsMin(maxx, startx + 2); 291 int xs3 = rsMin(maxx, startx + 3); 292 293 float p0 = cubicInterpolate(yp0[xs0], yp0[xs1], 294 yp0[xs2], yp0[xs3], xf); 295 float p1 = cubicInterpolate(yp1[xs0], yp1[xs1], 296 yp1[xs2], yp1[xs3], xf); 297 float p2 = cubicInterpolate(yp2[xs0], yp2[xs1], 298 yp2[xs2], yp2[xs3], xf); 299 float p3 = cubicInterpolate(yp3[xs0], yp3[xs1], 300 yp3[xs2], yp3[xs3], xf); 301 302 float p = cubicInterpolate(p0, p1, p2, p3, yf); 303 return p; 304 } 305 306 void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info, 307 uint32_t xstart, uint32_t xend, 308 uint32_t outstep) { 309 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 310 311 if (!cp->mAlloc.get()) { 312 ALOGE("Resize executed without input, skipping"); 313 return; 314 } 315 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 316 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 317 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 318 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 319 320 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 321 int starty = (int) floor(yf - 1); 322 yf = yf - floor(yf); 323 int maxy = srcHeight - 1; 324 int ys0 = rsMax(0, starty + 0); 325 int ys1 = rsMax(0, starty + 1); 326 int ys2 = rsMin(maxy, starty + 2); 327 int ys3 = rsMin(maxy, starty + 3); 328 329 const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0); 330 const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1); 331 const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2); 332 const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3); 333 334 uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart; 335 uint32_t x1 = xstart; 336 uint32_t x2 = xend; 337 338 #if defined(ARCH_ARM_USE_INTRINSICS) 339 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) { 340 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 341 long xf16 = rint(xf * 0x10000); 342 uint32_t xinc16 = rint(cp->scaleX * 0x10000); 343 344 int xoff = (xf16 >> 16) - 1; 345 int xclip = rsMax(0, xoff) - xoff; 346 int len = x2 - x1; 347 348 int32_t yr[4]; 349 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); 350 mkYCoeff(yr, yf); 351 352 xoff += xclip; 353 354 rsdIntrinsicResizeB4_K( 355 out, len, 356 xf16 & 0xffff, xinc16, 357 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, 358 xclip, srcWidth - xoff + xclip, 359 osc_ctl, yr); 360 out += len; 361 x1 += len; 362 } 363 #endif 364 365 while(x1 < x2) { 366 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 367 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 368 out++; 369 x1++; 370 } 371 } 372 373 void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info, 374 uint32_t xstart, uint32_t xend, 375 uint32_t outstep) { 376 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 377 378 if (!cp->mAlloc.get()) { 379 ALOGE("Resize executed without input, skipping"); 380 return; 381 } 382 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 383 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 384 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 385 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 386 387 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 388 int starty = (int) floor(yf - 1); 389 yf = yf - floor(yf); 390 int maxy = srcHeight - 1; 391 int ys0 = rsMax(0, starty + 0); 392 int ys1 = rsMax(0, starty + 1); 393 int ys2 = rsMin(maxy, starty + 2); 394 int ys3 = rsMin(maxy, starty + 3); 395 396 const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0); 397 const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1); 398 const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2); 399 const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3); 400 401 uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart; 402 uint32_t x1 = xstart; 403 uint32_t x2 = xend; 404 405 #if defined(ARCH_ARM_USE_INTRINSICS) 406 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) { 407 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 408 long xf16 = rint(xf * 0x10000); 409 uint32_t xinc16 = rint(cp->scaleX * 0x10000); 410 411 int xoff = (xf16 >> 16) - 1; 412 int xclip = rsMax(0, xoff) - xoff; 413 int len = x2 - x1; 414 415 int32_t yr[4]; 416 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); 417 mkYCoeff(yr, yf); 418 419 xoff += xclip; 420 421 rsdIntrinsicResizeB2_K( 422 out, len, 423 xf16 & 0xffff, xinc16, 424 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, 425 xclip, srcWidth - xoff + xclip, 426 osc_ctl, yr); 427 out += len; 428 x1 += len; 429 } 430 #endif 431 432 while(x1 < x2) { 433 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 434 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 435 out++; 436 x1++; 437 } 438 } 439 440 void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info, 441 uint32_t xstart, uint32_t xend, 442 uint32_t outstep) { 443 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 444 445 if (!cp->mAlloc.get()) { 446 ALOGE("Resize executed without input, skipping"); 447 return; 448 } 449 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 450 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 451 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 452 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 453 454 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 455 int starty = (int) floor(yf - 1); 456 yf = yf - floor(yf); 457 int maxy = srcHeight - 1; 458 int ys0 = rsMax(0, starty + 0); 459 int ys1 = rsMax(0, starty + 1); 460 int ys2 = rsMin(maxy, starty + 2); 461 int ys3 = rsMin(maxy, starty + 3); 462 463 const uchar *yp0 = pin + stride * ys0; 464 const uchar *yp1 = pin + stride * ys1; 465 const uchar *yp2 = pin + stride * ys2; 466 const uchar *yp3 = pin + stride * ys3; 467 468 uchar *out = ((uchar *)info->outPtr[0]) + xstart; 469 uint32_t x1 = xstart; 470 uint32_t x2 = xend; 471 472 #if defined(ARCH_ARM_USE_INTRINSICS) 473 if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) { 474 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 475 long xf16 = rint(xf * 0x10000); 476 uint32_t xinc16 = rint(cp->scaleX * 0x10000); 477 478 int xoff = (xf16 >> 16) - 1; 479 int xclip = rsMax(0, xoff) - xoff; 480 int len = x2 - x1; 481 482 int32_t yr[4]; 483 uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16); 484 mkYCoeff(yr, yf); 485 486 xoff += xclip; 487 488 rsdIntrinsicResizeB1_K( 489 out, len, 490 xf16 & 0xffff, xinc16, 491 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff, 492 xclip, srcWidth - xoff + xclip, 493 osc_ctl, yr); 494 out += len; 495 x1 += len; 496 } 497 #endif 498 499 while(x1 < x2) { 500 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 501 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 502 out++; 503 x1++; 504 } 505 } 506 507 void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info, 508 uint32_t xstart, uint32_t xend, 509 uint32_t outstep) { 510 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 511 512 if (!cp->mAlloc.get()) { 513 ALOGE("Resize executed without input, skipping"); 514 return; 515 } 516 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 517 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 518 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 519 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 520 521 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 522 int starty = (int) floor(yf - 1); 523 yf = yf - floor(yf); 524 int maxy = srcHeight - 1; 525 int ys0 = rsMax(0, starty + 0); 526 int ys1 = rsMax(0, starty + 1); 527 int ys2 = rsMin(maxy, starty + 2); 528 int ys3 = rsMin(maxy, starty + 3); 529 530 const float4 *yp0 = (const float4 *)(pin + stride * ys0); 531 const float4 *yp1 = (const float4 *)(pin + stride * ys1); 532 const float4 *yp2 = (const float4 *)(pin + stride * ys2); 533 const float4 *yp3 = (const float4 *)(pin + stride * ys3); 534 535 float4 *out = ((float4 *)info->outPtr[0]) + xstart; 536 uint32_t x1 = xstart; 537 uint32_t x2 = xend; 538 539 while(x1 < x2) { 540 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 541 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 542 out++; 543 x1++; 544 } 545 } 546 547 void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info, 548 uint32_t xstart, uint32_t xend, 549 uint32_t outstep) { 550 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 551 552 if (!cp->mAlloc.get()) { 553 ALOGE("Resize executed without input, skipping"); 554 return; 555 } 556 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 557 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 558 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 559 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 560 561 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 562 int starty = (int) floor(yf - 1); 563 yf = yf - floor(yf); 564 int maxy = srcHeight - 1; 565 int ys0 = rsMax(0, starty + 0); 566 int ys1 = rsMax(0, starty + 1); 567 int ys2 = rsMin(maxy, starty + 2); 568 int ys3 = rsMin(maxy, starty + 3); 569 570 const float2 *yp0 = (const float2 *)(pin + stride * ys0); 571 const float2 *yp1 = (const float2 *)(pin + stride * ys1); 572 const float2 *yp2 = (const float2 *)(pin + stride * ys2); 573 const float2 *yp3 = (const float2 *)(pin + stride * ys3); 574 575 float2 *out = ((float2 *)info->outPtr[0]) + xstart; 576 uint32_t x1 = xstart; 577 uint32_t x2 = xend; 578 579 while(x1 < x2) { 580 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 581 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 582 out++; 583 x1++; 584 } 585 } 586 587 void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info, 588 uint32_t xstart, uint32_t xend, 589 uint32_t outstep) { 590 RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr; 591 592 if (!cp->mAlloc.get()) { 593 ALOGE("Resize executed without input, skipping"); 594 return; 595 } 596 const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr; 597 const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY; 598 const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX; 599 const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride; 600 601 float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f; 602 int starty = (int) floor(yf - 1); 603 yf = yf - floor(yf); 604 int maxy = srcHeight - 1; 605 int ys0 = rsMax(0, starty + 0); 606 int ys1 = rsMax(0, starty + 1); 607 int ys2 = rsMin(maxy, starty + 2); 608 int ys3 = rsMin(maxy, starty + 3); 609 610 const float *yp0 = (const float *)(pin + stride * ys0); 611 const float *yp1 = (const float *)(pin + stride * ys1); 612 const float *yp2 = (const float *)(pin + stride * ys2); 613 const float *yp3 = (const float *)(pin + stride * ys3); 614 615 float *out = ((float *)info->outPtr[0]) + xstart; 616 uint32_t x1 = xstart; 617 uint32_t x2 = xend; 618 619 while(x1 < x2) { 620 float xf = (x1 + 0.5f) * cp->scaleX - 0.5f; 621 *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth); 622 out++; 623 x1++; 624 } 625 } 626 627 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize ( 628 RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) 629 : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) { 630 631 } 632 633 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() { 634 } 635 636 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, 637 const Allocation ** ains, 638 uint32_t inLen, Allocation * aout, 639 const void * usr, uint32_t usrLen, 640 const RsScriptCall *sc) 641 { 642 if (!mAlloc.get()) { 643 ALOGE("Resize executed without input, skipping"); 644 return; 645 } 646 const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY; 647 const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX; 648 649 //check the data type to determine F or U. 650 if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) { 651 switch(mAlloc->getType()->getElement()->getVectorSize()) { 652 case 1: 653 mRootPtr = &kernelU1; 654 break; 655 case 2: 656 mRootPtr = &kernelU2; 657 break; 658 case 3: 659 case 4: 660 mRootPtr = &kernelU4; 661 break; 662 } 663 } else { 664 switch(mAlloc->getType()->getElement()->getVectorSize()) { 665 case 1: 666 mRootPtr = &kernelF1; 667 break; 668 case 2: 669 mRootPtr = &kernelF2; 670 break; 671 case 3: 672 case 4: 673 mRootPtr = &kernelF4; 674 break; 675 } 676 } 677 678 scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX; 679 scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY; 680 681 } 682 683 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) { 684 s->mHal.info.exportedVariableCount = 1; 685 } 686 687 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() { 688 mAlloc.clear(); 689 } 690 691 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) { 692 693 return new RsdCpuScriptIntrinsicResize(ctx, s, e); 694 } 695 696 } // namespace renderscript 697 } // namespace android 698