Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 namespace android {
     22 namespace renderscript {
     23 
     24 
     25 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
     26 public:
     27     void populateScript(Script *) override;
     28     void invokeFreeChildren() override;
     29 
     30     void setGlobalObj(uint32_t slot, ObjectBase *data) override;
     31 
     32     ~RsdCpuScriptIntrinsicResize() override;
     33     RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
     34 
     35     void preLaunch(uint32_t slot, const Allocation ** ains,
     36                    uint32_t inLen, Allocation * aout, const void * usr,
     37                    uint32_t usrLen, const RsScriptCall *sc) override;
     38 
     39     float scaleX;
     40     float scaleY;
     41 
     42 protected:
     43     ObjectBaseRef<const Allocation> mAlloc;
     44     ObjectBaseRef<const Element> mElement;
     45 
     46     static void kernelU1(const RsExpandKernelDriverInfo *info,
     47                          uint32_t xstart, uint32_t xend,
     48                          uint32_t outstep);
     49     static void kernelU2(const RsExpandKernelDriverInfo *info,
     50                          uint32_t xstart, uint32_t xend,
     51                          uint32_t outstep);
     52     static void kernelU4(const RsExpandKernelDriverInfo *info,
     53                          uint32_t xstart, uint32_t xend,
     54                          uint32_t outstep);
     55     static void kernelF1(const RsExpandKernelDriverInfo *info,
     56                          uint32_t xstart, uint32_t xend,
     57                          uint32_t outstep);
     58     static void kernelF2(const RsExpandKernelDriverInfo *info,
     59                          uint32_t xstart, uint32_t xend,
     60                          uint32_t outstep);
     61     static void kernelF4(const RsExpandKernelDriverInfo *info,
     62                          uint32_t xstart, uint32_t xend,
     63                          uint32_t outstep);
     64 };
     65 
     66 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
     67     rsAssert(slot == 0);
     68     mAlloc.set(static_cast<Allocation *>(data));
     69 }
     70 
     71 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
     72     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     73             + x * (3.f * (p1 - p2) + p3 - p0)));
     74 }
     75 
     76 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
     77     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     78             + x * (3.f * (p1 - p2) + p3 - p0)));
     79 }
     80 
     81 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
     82     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     83             + x * (3.f * (p1 - p2) + p3 - p0)));
     84 }
     85 
     86 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
     87                          float xf, float yf, int width) {
     88     int startx = (int) floor(xf - 1);
     89     xf = xf - floor(xf);
     90     int maxx = width - 1;
     91     int xs0 = rsMax(0, startx + 0);
     92     int xs1 = rsMax(0, startx + 1);
     93     int xs2 = rsMin(maxx, startx + 2);
     94     int xs3 = rsMin(maxx, startx + 3);
     95 
     96     float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
     97                                   convert_float4(yp0[xs1]),
     98                                   convert_float4(yp0[xs2]),
     99                                   convert_float4(yp0[xs3]), xf);
    100 
    101     float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
    102                                   convert_float4(yp1[xs1]),
    103                                   convert_float4(yp1[xs2]),
    104                                   convert_float4(yp1[xs3]), xf);
    105 
    106     float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
    107                                   convert_float4(yp2[xs1]),
    108                                   convert_float4(yp2[xs2]),
    109                                   convert_float4(yp2[xs3]), xf);
    110 
    111     float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
    112                                   convert_float4(yp3[xs1]),
    113                                   convert_float4(yp3[xs2]),
    114                                   convert_float4(yp3[xs3]), xf);
    115 
    116     float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    117     p = clamp(p + 0.5f, 0.f, 255.f);
    118     return convert_uchar4(p);
    119 }
    120 
    121 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
    122                          float xf, float yf, int width) {
    123     int startx = (int) floor(xf - 1);
    124     xf = xf - floor(xf);
    125     int maxx = width - 1;
    126     int xs0 = rsMax(0, startx + 0);
    127     int xs1 = rsMax(0, startx + 1);
    128     int xs2 = rsMin(maxx, startx + 2);
    129     int xs3 = rsMin(maxx, startx + 3);
    130 
    131     float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
    132                                   convert_float2(yp0[xs1]),
    133                                   convert_float2(yp0[xs2]),
    134                                   convert_float2(yp0[xs3]), xf);
    135 
    136     float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
    137                                   convert_float2(yp1[xs1]),
    138                                   convert_float2(yp1[xs2]),
    139                                   convert_float2(yp1[xs3]), xf);
    140 
    141     float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
    142                                   convert_float2(yp2[xs1]),
    143                                   convert_float2(yp2[xs2]),
    144                                   convert_float2(yp2[xs3]), xf);
    145 
    146     float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
    147                                   convert_float2(yp3[xs1]),
    148                                   convert_float2(yp3[xs2]),
    149                                   convert_float2(yp3[xs3]), xf);
    150 
    151     float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    152     p = clamp(p + 0.5f, 0.f, 255.f);
    153     return convert_uchar2(p);
    154 }
    155 
    156 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
    157                         float xf, float yf, int width) {
    158     int startx = (int) floor(xf - 1);
    159     xf = xf - floor(xf);
    160     int maxx = width - 1;
    161     int xs0 = rsMax(0, startx + 0);
    162     int xs1 = rsMax(0, startx + 1);
    163     int xs2 = rsMin(maxx, startx + 2);
    164     int xs3 = rsMin(maxx, startx + 3);
    165 
    166     float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
    167                                  (float)yp0[xs2], (float)yp0[xs3], xf);
    168     float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
    169                                  (float)yp1[xs2], (float)yp1[xs3], xf);
    170     float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
    171                                  (float)yp2[xs2], (float)yp2[xs3], xf);
    172     float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
    173                                  (float)yp3[xs2], (float)yp3[xs3], xf);
    174 
    175     float p  = cubicInterpolate(p0, p1, p2, p3, yf);
    176     p = clamp(p + 0.5f, 0.f, 255.f);
    177     return (uchar)p;
    178 }
    179 
    180 extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc);
    181 
    182 extern "C" void rsdIntrinsicResizeB4_K(
    183             uchar4 *dst,
    184             size_t count,
    185             uint32_t xf,
    186             uint32_t xinc,
    187             uchar4 const *srcn,
    188             uchar4 const *src0,
    189             uchar4 const *src1,
    190             uchar4 const *src2,
    191             size_t xclip,
    192             size_t avail,
    193             uint64_t osc_ctl,
    194             int32_t const *yr);
    195 
    196 extern "C" void rsdIntrinsicResizeB2_K(
    197             uchar2 *dst,
    198             size_t count,
    199             uint32_t xf,
    200             uint32_t xinc,
    201             uchar2 const *srcn,
    202             uchar2 const *src0,
    203             uchar2 const *src1,
    204             uchar2 const *src2,
    205             size_t xclip,
    206             size_t avail,
    207             uint64_t osc_ctl,
    208             int32_t const *yr);
    209 
    210 extern "C" void rsdIntrinsicResizeB1_K(
    211             uchar *dst,
    212             size_t count,
    213             uint32_t xf,
    214             uint32_t xinc,
    215             uchar const *srcn,
    216             uchar const *src0,
    217             uchar const *src1,
    218             uchar const *src2,
    219             size_t xclip,
    220             size_t avail,
    221             uint64_t osc_ctl,
    222             int32_t const *yr);
    223 
    224 #if defined(ARCH_ARM_USE_INTRINSICS)
    225 static void mkYCoeff(int32_t *yr, float yf) {
    226     int32_t yf1 = rint(yf * 0x10000);
    227     int32_t yf2 = rint(yf * yf * 0x10000);
    228     int32_t yf3 = rint(yf * yf * yf * 0x10000);
    229 
    230     yr[0] = -(2 * yf2 - yf3 - yf1) >> 1;
    231     yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1;
    232     yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1;
    233     yr[3] = -(yf3 - yf2) >> 1;
    234 }
    235 #endif
    236 
    237 static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3,
    238                          float xf, float yf, int width) {
    239     int startx = (int) floor(xf - 1);
    240     xf = xf - floor(xf);
    241     int maxx = width - 1;
    242     int xs0 = rsMax(0, startx + 0);
    243     int xs1 = rsMax(0, startx + 1);
    244     int xs2 = rsMin(maxx, startx + 2);
    245     int xs3 = rsMin(maxx, startx + 3);
    246 
    247     float4 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
    248                                   yp0[xs2], yp0[xs3], xf);
    249     float4 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
    250                                   yp1[xs2], yp1[xs3], xf);
    251     float4 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
    252                                   yp2[xs2], yp2[xs3], xf);
    253     float4 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
    254                                   yp3[xs2], yp3[xs3], xf);
    255 
    256     float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    257     return p;
    258 }
    259 
    260 static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3,
    261                          float xf, float yf, int width) {
    262     int startx = (int) floor(xf - 1);
    263     xf = xf - floor(xf);
    264     int maxx = width - 1;
    265     int xs0 = rsMax(0, startx + 0);
    266     int xs1 = rsMax(0, startx + 1);
    267     int xs2 = rsMin(maxx, startx + 2);
    268     int xs3 = rsMin(maxx, startx + 3);
    269 
    270     float2 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
    271                                   yp0[xs2], yp0[xs3], xf);
    272     float2 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
    273                                   yp1[xs2], yp1[xs3], xf);
    274     float2 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
    275                                   yp2[xs2], yp2[xs3], xf);
    276     float2 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
    277                                   yp3[xs2], yp3[xs3], xf);
    278 
    279     float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    280     return p;
    281 }
    282 
    283 static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3,
    284                         float xf, float yf, int width) {
    285     int startx = (int) floor(xf - 1);
    286     xf = xf - floor(xf);
    287     int maxx = width - 1;
    288     int xs0 = rsMax(0, startx + 0);
    289     int xs1 = rsMax(0, startx + 1);
    290     int xs2 = rsMin(maxx, startx + 2);
    291     int xs3 = rsMin(maxx, startx + 3);
    292 
    293     float p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
    294                                  yp0[xs2], yp0[xs3], xf);
    295     float p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
    296                                  yp1[xs2], yp1[xs3], xf);
    297     float p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
    298                                  yp2[xs2], yp2[xs3], xf);
    299     float p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
    300                                  yp3[xs2], yp3[xs3], xf);
    301 
    302     float p  = cubicInterpolate(p0, p1, p2, p3, yf);
    303     return p;
    304 }
    305 
    306 void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
    307                                                 uint32_t xstart, uint32_t xend,
    308                                                 uint32_t outstep) {
    309     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    310 
    311     if (!cp->mAlloc.get()) {
    312         ALOGE("Resize executed without input, skipping");
    313         return;
    314     }
    315     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    316     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    317     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    318     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    319 
    320     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    321     int starty = (int) floor(yf - 1);
    322     yf = yf - floor(yf);
    323     int maxy = srcHeight - 1;
    324     int ys0 = rsMax(0, starty + 0);
    325     int ys1 = rsMax(0, starty + 1);
    326     int ys2 = rsMin(maxy, starty + 2);
    327     int ys3 = rsMin(maxy, starty + 3);
    328 
    329     const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
    330     const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
    331     const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
    332     const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
    333 
    334     uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
    335     uint32_t x1 = xstart;
    336     uint32_t x2 = xend;
    337 
    338 #if defined(ARCH_ARM_USE_INTRINSICS)
    339     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
    340         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    341         long xf16 = rint(xf * 0x10000);
    342         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
    343 
    344         int xoff = (xf16 >> 16) - 1;
    345         int xclip = rsMax(0, xoff) - xoff;
    346         int len = x2 - x1;
    347 
    348         int32_t yr[4];
    349         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
    350         mkYCoeff(yr, yf);
    351 
    352         xoff += xclip;
    353 
    354         rsdIntrinsicResizeB4_K(
    355                 out, len,
    356                 xf16 & 0xffff, xinc16,
    357                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
    358                 xclip, srcWidth - xoff + xclip,
    359                 osc_ctl, yr);
    360         out += len;
    361         x1 += len;
    362     }
    363 #endif
    364 
    365     while(x1 < x2) {
    366         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    367         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    368         out++;
    369         x1++;
    370     }
    371 }
    372 
    373 void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
    374                                                 uint32_t xstart, uint32_t xend,
    375                                                 uint32_t outstep) {
    376     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    377 
    378     if (!cp->mAlloc.get()) {
    379         ALOGE("Resize executed without input, skipping");
    380         return;
    381     }
    382     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    383     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    384     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    385     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    386 
    387     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    388     int starty = (int) floor(yf - 1);
    389     yf = yf - floor(yf);
    390     int maxy = srcHeight - 1;
    391     int ys0 = rsMax(0, starty + 0);
    392     int ys1 = rsMax(0, starty + 1);
    393     int ys2 = rsMin(maxy, starty + 2);
    394     int ys3 = rsMin(maxy, starty + 3);
    395 
    396     const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
    397     const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
    398     const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
    399     const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
    400 
    401     uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
    402     uint32_t x1 = xstart;
    403     uint32_t x2 = xend;
    404 
    405 #if defined(ARCH_ARM_USE_INTRINSICS)
    406     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
    407         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    408         long xf16 = rint(xf * 0x10000);
    409         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
    410 
    411         int xoff = (xf16 >> 16) - 1;
    412         int xclip = rsMax(0, xoff) - xoff;
    413         int len = x2 - x1;
    414 
    415         int32_t yr[4];
    416         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
    417         mkYCoeff(yr, yf);
    418 
    419         xoff += xclip;
    420 
    421         rsdIntrinsicResizeB2_K(
    422                 out, len,
    423                 xf16 & 0xffff, xinc16,
    424                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
    425                 xclip, srcWidth - xoff + xclip,
    426                 osc_ctl, yr);
    427         out += len;
    428         x1 += len;
    429     }
    430 #endif
    431 
    432     while(x1 < x2) {
    433         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    434         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    435         out++;
    436         x1++;
    437     }
    438 }
    439 
    440 void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
    441                                                 uint32_t xstart, uint32_t xend,
    442                                                 uint32_t outstep) {
    443     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    444 
    445     if (!cp->mAlloc.get()) {
    446         ALOGE("Resize executed without input, skipping");
    447         return;
    448     }
    449     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    450     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    451     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    452     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    453 
    454     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    455     int starty = (int) floor(yf - 1);
    456     yf = yf - floor(yf);
    457     int maxy = srcHeight - 1;
    458     int ys0 = rsMax(0, starty + 0);
    459     int ys1 = rsMax(0, starty + 1);
    460     int ys2 = rsMin(maxy, starty + 2);
    461     int ys3 = rsMin(maxy, starty + 3);
    462 
    463     const uchar *yp0 = pin + stride * ys0;
    464     const uchar *yp1 = pin + stride * ys1;
    465     const uchar *yp2 = pin + stride * ys2;
    466     const uchar *yp3 = pin + stride * ys3;
    467 
    468     uchar *out = ((uchar *)info->outPtr[0]) + xstart;
    469     uint32_t x1 = xstart;
    470     uint32_t x2 = xend;
    471 
    472 #if defined(ARCH_ARM_USE_INTRINSICS)
    473     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
    474         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    475         long xf16 = rint(xf * 0x10000);
    476         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
    477 
    478         int xoff = (xf16 >> 16) - 1;
    479         int xclip = rsMax(0, xoff) - xoff;
    480         int len = x2 - x1;
    481 
    482         int32_t yr[4];
    483         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
    484         mkYCoeff(yr, yf);
    485 
    486         xoff += xclip;
    487 
    488         rsdIntrinsicResizeB1_K(
    489                 out, len,
    490                 xf16 & 0xffff, xinc16,
    491                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
    492                 xclip, srcWidth - xoff + xclip,
    493                 osc_ctl, yr);
    494         out += len;
    495         x1 += len;
    496     }
    497 #endif
    498 
    499     while(x1 < x2) {
    500         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    501         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    502         out++;
    503         x1++;
    504     }
    505 }
    506 
    507 void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
    508                                                 uint32_t xstart, uint32_t xend,
    509                                                 uint32_t outstep) {
    510     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    511 
    512     if (!cp->mAlloc.get()) {
    513         ALOGE("Resize executed without input, skipping");
    514         return;
    515     }
    516     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    517     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    518     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    519     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    520 
    521     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    522     int starty = (int) floor(yf - 1);
    523     yf = yf - floor(yf);
    524     int maxy = srcHeight - 1;
    525     int ys0 = rsMax(0, starty + 0);
    526     int ys1 = rsMax(0, starty + 1);
    527     int ys2 = rsMin(maxy, starty + 2);
    528     int ys3 = rsMin(maxy, starty + 3);
    529 
    530     const float4 *yp0 = (const float4 *)(pin + stride * ys0);
    531     const float4 *yp1 = (const float4 *)(pin + stride * ys1);
    532     const float4 *yp2 = (const float4 *)(pin + stride * ys2);
    533     const float4 *yp3 = (const float4 *)(pin + stride * ys3);
    534 
    535     float4 *out = ((float4 *)info->outPtr[0]) + xstart;
    536     uint32_t x1 = xstart;
    537     uint32_t x2 = xend;
    538 
    539     while(x1 < x2) {
    540         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    541         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    542         out++;
    543         x1++;
    544     }
    545 }
    546 
    547 void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
    548                                                 uint32_t xstart, uint32_t xend,
    549                                                 uint32_t outstep) {
    550     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    551 
    552     if (!cp->mAlloc.get()) {
    553         ALOGE("Resize executed without input, skipping");
    554         return;
    555     }
    556     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    557     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    558     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    559     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    560 
    561     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    562     int starty = (int) floor(yf - 1);
    563     yf = yf - floor(yf);
    564     int maxy = srcHeight - 1;
    565     int ys0 = rsMax(0, starty + 0);
    566     int ys1 = rsMax(0, starty + 1);
    567     int ys2 = rsMin(maxy, starty + 2);
    568     int ys3 = rsMin(maxy, starty + 3);
    569 
    570     const float2 *yp0 = (const float2 *)(pin + stride * ys0);
    571     const float2 *yp1 = (const float2 *)(pin + stride * ys1);
    572     const float2 *yp2 = (const float2 *)(pin + stride * ys2);
    573     const float2 *yp3 = (const float2 *)(pin + stride * ys3);
    574 
    575     float2 *out = ((float2 *)info->outPtr[0]) + xstart;
    576     uint32_t x1 = xstart;
    577     uint32_t x2 = xend;
    578 
    579     while(x1 < x2) {
    580         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    581         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    582         out++;
    583         x1++;
    584     }
    585 }
    586 
    587 void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
    588                                                 uint32_t xstart, uint32_t xend,
    589                                                 uint32_t outstep) {
    590     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    591 
    592     if (!cp->mAlloc.get()) {
    593         ALOGE("Resize executed without input, skipping");
    594         return;
    595     }
    596     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    597     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    598     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    599     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    600 
    601     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    602     int starty = (int) floor(yf - 1);
    603     yf = yf - floor(yf);
    604     int maxy = srcHeight - 1;
    605     int ys0 = rsMax(0, starty + 0);
    606     int ys1 = rsMax(0, starty + 1);
    607     int ys2 = rsMin(maxy, starty + 2);
    608     int ys3 = rsMin(maxy, starty + 3);
    609 
    610     const float *yp0 = (const float *)(pin + stride * ys0);
    611     const float *yp1 = (const float *)(pin + stride * ys1);
    612     const float *yp2 = (const float *)(pin + stride * ys2);
    613     const float *yp3 = (const float *)(pin + stride * ys3);
    614 
    615     float *out = ((float *)info->outPtr[0]) + xstart;
    616     uint32_t x1 = xstart;
    617     uint32_t x2 = xend;
    618 
    619     while(x1 < x2) {
    620         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    621         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    622         out++;
    623         x1++;
    624     }
    625 }
    626 
    627 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
    628             RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
    629             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
    630 
    631 }
    632 
    633 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
    634 }
    635 
    636 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
    637                                             const Allocation ** ains,
    638                                             uint32_t inLen, Allocation * aout,
    639                                             const void * usr, uint32_t usrLen,
    640                                             const RsScriptCall *sc)
    641 {
    642     if (!mAlloc.get()) {
    643         ALOGE("Resize executed without input, skipping");
    644         return;
    645     }
    646     const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
    647     const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
    648 
    649     //check the data type to determine F or U.
    650     if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) {
    651         switch(mAlloc->getType()->getElement()->getVectorSize()) {
    652         case 1:
    653             mRootPtr = &kernelU1;
    654             break;
    655         case 2:
    656             mRootPtr = &kernelU2;
    657             break;
    658         case 3:
    659         case 4:
    660             mRootPtr = &kernelU4;
    661             break;
    662         }
    663     } else {
    664         switch(mAlloc->getType()->getElement()->getVectorSize()) {
    665         case 1:
    666             mRootPtr = &kernelF1;
    667             break;
    668         case 2:
    669             mRootPtr = &kernelF2;
    670             break;
    671         case 3:
    672         case 4:
    673             mRootPtr = &kernelF4;
    674             break;
    675         }
    676     }
    677 
    678     scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
    679     scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
    680 
    681 }
    682 
    683 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
    684     s->mHal.info.exportedVariableCount = 1;
    685 }
    686 
    687 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
    688     mAlloc.clear();
    689 }
    690 
    691 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
    692 
    693     return new RsdCpuScriptIntrinsicResize(ctx, s, e);
    694 }
    695 
    696 } // namespace renderscript
    697 } // namespace android
    698