Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 using namespace android;
     22 using namespace android::renderscript;
     23 
     24 namespace android {
     25 namespace renderscript {
     26 
     27 
     28 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
     29 public:
     30     void populateScript(Script *) override;
     31     void invokeFreeChildren() override;
     32 
     33     void setGlobalObj(uint32_t slot, ObjectBase *data) override;
     34 
     35     ~RsdCpuScriptIntrinsicResize() override;
     36     RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
     37 
     38     void preLaunch(uint32_t slot, const Allocation ** ains,
     39                    uint32_t inLen, Allocation * aout, const void * usr,
     40                    uint32_t usrLen, const RsScriptCall *sc) override;
     41 
     42     float scaleX;
     43     float scaleY;
     44 
     45 protected:
     46     ObjectBaseRef<const Allocation> mAlloc;
     47     ObjectBaseRef<const Element> mElement;
     48 
     49     static void kernelU1(const RsExpandKernelDriverInfo *info,
     50                          uint32_t xstart, uint32_t xend,
     51                          uint32_t outstep);
     52     static void kernelU2(const RsExpandKernelDriverInfo *info,
     53                          uint32_t xstart, uint32_t xend,
     54                          uint32_t outstep);
     55     static void kernelU4(const RsExpandKernelDriverInfo *info,
     56                          uint32_t xstart, uint32_t xend,
     57                          uint32_t outstep);
     58     static void kernelF1(const RsExpandKernelDriverInfo *info,
     59                          uint32_t xstart, uint32_t xend,
     60                          uint32_t outstep);
     61     static void kernelF2(const RsExpandKernelDriverInfo *info,
     62                          uint32_t xstart, uint32_t xend,
     63                          uint32_t outstep);
     64     static void kernelF4(const RsExpandKernelDriverInfo *info,
     65                          uint32_t xstart, uint32_t xend,
     66                          uint32_t outstep);
     67 };
     68 
     69 }
     70 }
     71 
     72 
     73 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
     74     rsAssert(slot == 0);
     75     mAlloc.set(static_cast<Allocation *>(data));
     76 }
     77 
     78 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
     79     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     80             + x * (3.f * (p1 - p2) + p3 - p0)));
     81 }
     82 
     83 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
     84     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     85             + x * (3.f * (p1 - p2) + p3 - p0)));
     86 }
     87 
     88 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
     89     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     90             + x * (3.f * (p1 - p2) + p3 - p0)));
     91 }
     92 
     93 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
     94                          float xf, float yf, int width) {
     95     int startx = (int) floor(xf - 1);
     96     xf = xf - floor(xf);
     97     int maxx = width - 1;
     98     int xs0 = rsMax(0, startx + 0);
     99     int xs1 = rsMax(0, startx + 1);
    100     int xs2 = rsMin(maxx, startx + 2);
    101     int xs3 = rsMin(maxx, startx + 3);
    102 
    103     float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
    104                                   convert_float4(yp0[xs1]),
    105                                   convert_float4(yp0[xs2]),
    106                                   convert_float4(yp0[xs3]), xf);
    107 
    108     float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
    109                                   convert_float4(yp1[xs1]),
    110                                   convert_float4(yp1[xs2]),
    111                                   convert_float4(yp1[xs3]), xf);
    112 
    113     float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
    114                                   convert_float4(yp2[xs1]),
    115                                   convert_float4(yp2[xs2]),
    116                                   convert_float4(yp2[xs3]), xf);
    117 
    118     float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
    119                                   convert_float4(yp3[xs1]),
    120                                   convert_float4(yp3[xs2]),
    121                                   convert_float4(yp3[xs3]), xf);
    122 
    123     float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    124     p = clamp(p + 0.5f, 0.f, 255.f);
    125     return convert_uchar4(p);
    126 }
    127 
    128 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
    129                          float xf, float yf, int width) {
    130     int startx = (int) floor(xf - 1);
    131     xf = xf - floor(xf);
    132     int maxx = width - 1;
    133     int xs0 = rsMax(0, startx + 0);
    134     int xs1 = rsMax(0, startx + 1);
    135     int xs2 = rsMin(maxx, startx + 2);
    136     int xs3 = rsMin(maxx, startx + 3);
    137 
    138     float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
    139                                   convert_float2(yp0[xs1]),
    140                                   convert_float2(yp0[xs2]),
    141                                   convert_float2(yp0[xs3]), xf);
    142 
    143     float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
    144                                   convert_float2(yp1[xs1]),
    145                                   convert_float2(yp1[xs2]),
    146                                   convert_float2(yp1[xs3]), xf);
    147 
    148     float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
    149                                   convert_float2(yp2[xs1]),
    150                                   convert_float2(yp2[xs2]),
    151                                   convert_float2(yp2[xs3]), xf);
    152 
    153     float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
    154                                   convert_float2(yp3[xs1]),
    155                                   convert_float2(yp3[xs2]),
    156                                   convert_float2(yp3[xs3]), xf);
    157 
    158     float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    159     p = clamp(p + 0.5f, 0.f, 255.f);
    160     return convert_uchar2(p);
    161 }
    162 
    163 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
    164                         float xf, float yf, int width) {
    165     int startx = (int) floor(xf - 1);
    166     xf = xf - floor(xf);
    167     int maxx = width - 1;
    168     int xs0 = rsMax(0, startx + 0);
    169     int xs1 = rsMax(0, startx + 1);
    170     int xs2 = rsMin(maxx, startx + 2);
    171     int xs3 = rsMin(maxx, startx + 3);
    172 
    173     float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
    174                                  (float)yp0[xs2], (float)yp0[xs3], xf);
    175     float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
    176                                  (float)yp1[xs2], (float)yp1[xs3], xf);
    177     float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
    178                                  (float)yp2[xs2], (float)yp2[xs3], xf);
    179     float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
    180                                  (float)yp3[xs2], (float)yp3[xs3], xf);
    181 
    182     float p  = cubicInterpolate(p0, p1, p2, p3, yf);
    183     p = clamp(p + 0.5f, 0.f, 255.f);
    184     return (uchar)p;
    185 }
    186 
    187 extern "C" uint64_t rsdIntrinsicResize_oscctl_K(uint32_t xinc);
    188 
    189 extern "C" void rsdIntrinsicResizeB4_K(
    190             uchar4 *dst,
    191             size_t count,
    192             uint32_t xf,
    193             uint32_t xinc,
    194             uchar4 const *srcn,
    195             uchar4 const *src0,
    196             uchar4 const *src1,
    197             uchar4 const *src2,
    198             size_t xclip,
    199             size_t avail,
    200             uint64_t osc_ctl,
    201             int32_t const *yr);
    202 
    203 extern "C" void rsdIntrinsicResizeB2_K(
    204             uchar2 *dst,
    205             size_t count,
    206             uint32_t xf,
    207             uint32_t xinc,
    208             uchar2 const *srcn,
    209             uchar2 const *src0,
    210             uchar2 const *src1,
    211             uchar2 const *src2,
    212             size_t xclip,
    213             size_t avail,
    214             uint64_t osc_ctl,
    215             int32_t const *yr);
    216 
    217 extern "C" void rsdIntrinsicResizeB1_K(
    218             uchar *dst,
    219             size_t count,
    220             uint32_t xf,
    221             uint32_t xinc,
    222             uchar const *srcn,
    223             uchar const *src0,
    224             uchar const *src1,
    225             uchar const *src2,
    226             size_t xclip,
    227             size_t avail,
    228             uint64_t osc_ctl,
    229             int32_t const *yr);
    230 
    231 #if defined(ARCH_ARM_USE_INTRINSICS)
    232 static void mkYCoeff(int32_t *yr, float yf) {
    233     int32_t yf1 = rint(yf * 0x10000);
    234     int32_t yf2 = rint(yf * yf * 0x10000);
    235     int32_t yf3 = rint(yf * yf * yf * 0x10000);
    236 
    237     yr[0] = -(2 * yf2 - yf3 - yf1) >> 1;
    238     yr[1] = (3 * yf3 - 5 * yf2 + 0x20000) >> 1;
    239     yr[2] = (-3 * yf3 + 4 * yf2 + yf1) >> 1;
    240     yr[3] = -(yf3 - yf2) >> 1;
    241 }
    242 #endif
    243 
    244 static float4 OneBiCubic(const float4 *yp0, const float4 *yp1, const float4 *yp2, const float4 *yp3,
    245                          float xf, float yf, int width) {
    246     int startx = (int) floor(xf - 1);
    247     xf = xf - floor(xf);
    248     int maxx = width - 1;
    249     int xs0 = rsMax(0, startx + 0);
    250     int xs1 = rsMax(0, startx + 1);
    251     int xs2 = rsMin(maxx, startx + 2);
    252     int xs3 = rsMin(maxx, startx + 3);
    253 
    254     float4 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
    255                                   yp0[xs2], yp0[xs3], xf);
    256     float4 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
    257                                   yp1[xs2], yp1[xs3], xf);
    258     float4 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
    259                                   yp2[xs2], yp2[xs3], xf);
    260     float4 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
    261                                   yp3[xs2], yp3[xs3], xf);
    262 
    263     float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    264     return p;
    265 }
    266 
    267 static float2 OneBiCubic(const float2 *yp0, const float2 *yp1, const float2 *yp2, const float2 *yp3,
    268                          float xf, float yf, int width) {
    269     int startx = (int) floor(xf - 1);
    270     xf = xf - floor(xf);
    271     int maxx = width - 1;
    272     int xs0 = rsMax(0, startx + 0);
    273     int xs1 = rsMax(0, startx + 1);
    274     int xs2 = rsMin(maxx, startx + 2);
    275     int xs3 = rsMin(maxx, startx + 3);
    276 
    277     float2 p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
    278                                   yp0[xs2], yp0[xs3], xf);
    279     float2 p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
    280                                   yp1[xs2], yp1[xs3], xf);
    281     float2 p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
    282                                   yp2[xs2], yp2[xs3], xf);
    283     float2 p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
    284                                   yp3[xs2], yp3[xs3], xf);
    285 
    286     float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    287     return p;
    288 }
    289 
    290 static float OneBiCubic(const float *yp0, const float *yp1, const float *yp2, const float *yp3,
    291                         float xf, float yf, int width) {
    292     int startx = (int) floor(xf - 1);
    293     xf = xf - floor(xf);
    294     int maxx = width - 1;
    295     int xs0 = rsMax(0, startx + 0);
    296     int xs1 = rsMax(0, startx + 1);
    297     int xs2 = rsMin(maxx, startx + 2);
    298     int xs3 = rsMin(maxx, startx + 3);
    299 
    300     float p0  = cubicInterpolate(yp0[xs0], yp0[xs1],
    301                                  yp0[xs2], yp0[xs3], xf);
    302     float p1  = cubicInterpolate(yp1[xs0], yp1[xs1],
    303                                  yp1[xs2], yp1[xs3], xf);
    304     float p2  = cubicInterpolate(yp2[xs0], yp2[xs1],
    305                                  yp2[xs2], yp2[xs3], xf);
    306     float p3  = cubicInterpolate(yp3[xs0], yp3[xs1],
    307                                  yp3[xs2], yp3[xs3], xf);
    308 
    309     float p  = cubicInterpolate(p0, p1, p2, p3, yf);
    310     return p;
    311 }
    312 
    313 void RsdCpuScriptIntrinsicResize::kernelU4(const RsExpandKernelDriverInfo *info,
    314                                                 uint32_t xstart, uint32_t xend,
    315                                                 uint32_t outstep) {
    316     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    317 
    318     if (!cp->mAlloc.get()) {
    319         ALOGE("Resize executed without input, skipping");
    320         return;
    321     }
    322     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    323     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    324     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    325     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    326 
    327     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    328     int starty = (int) floor(yf - 1);
    329     yf = yf - floor(yf);
    330     int maxy = srcHeight - 1;
    331     int ys0 = rsMax(0, starty + 0);
    332     int ys1 = rsMax(0, starty + 1);
    333     int ys2 = rsMin(maxy, starty + 2);
    334     int ys3 = rsMin(maxy, starty + 3);
    335 
    336     const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
    337     const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
    338     const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
    339     const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
    340 
    341     uchar4 *out = ((uchar4 *)info->outPtr[0]) + xstart;
    342     uint32_t x1 = xstart;
    343     uint32_t x2 = xend;
    344 
    345 #if defined(ARCH_ARM_USE_INTRINSICS)
    346     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
    347         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    348         long xf16 = rint(xf * 0x10000);
    349         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
    350 
    351         int xoff = (xf16 >> 16) - 1;
    352         int xclip = rsMax(0, xoff) - xoff;
    353         int len = x2 - x1;
    354 
    355         int32_t yr[4];
    356         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
    357         mkYCoeff(yr, yf);
    358 
    359         xoff += xclip;
    360 
    361         rsdIntrinsicResizeB4_K(
    362                 out, len,
    363                 xf16 & 0xffff, xinc16,
    364                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
    365                 xclip, srcWidth - xoff + xclip,
    366                 osc_ctl, yr);
    367         out += len;
    368         x1 += len;
    369     }
    370 #endif
    371 
    372     while(x1 < x2) {
    373         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    374         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    375         out++;
    376         x1++;
    377     }
    378 }
    379 
    380 void RsdCpuScriptIntrinsicResize::kernelU2(const RsExpandKernelDriverInfo *info,
    381                                                 uint32_t xstart, uint32_t xend,
    382                                                 uint32_t outstep) {
    383     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    384 
    385     if (!cp->mAlloc.get()) {
    386         ALOGE("Resize executed without input, skipping");
    387         return;
    388     }
    389     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    390     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    391     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    392     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    393 
    394     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    395     int starty = (int) floor(yf - 1);
    396     yf = yf - floor(yf);
    397     int maxy = srcHeight - 1;
    398     int ys0 = rsMax(0, starty + 0);
    399     int ys1 = rsMax(0, starty + 1);
    400     int ys2 = rsMin(maxy, starty + 2);
    401     int ys3 = rsMin(maxy, starty + 3);
    402 
    403     const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
    404     const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
    405     const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
    406     const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
    407 
    408     uchar2 *out = ((uchar2 *)info->outPtr[0]) + xstart;
    409     uint32_t x1 = xstart;
    410     uint32_t x2 = xend;
    411 
    412 #if defined(ARCH_ARM_USE_INTRINSICS)
    413     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
    414         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    415         long xf16 = rint(xf * 0x10000);
    416         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
    417 
    418         int xoff = (xf16 >> 16) - 1;
    419         int xclip = rsMax(0, xoff) - xoff;
    420         int len = x2 - x1;
    421 
    422         int32_t yr[4];
    423         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
    424         mkYCoeff(yr, yf);
    425 
    426         xoff += xclip;
    427 
    428         rsdIntrinsicResizeB2_K(
    429                 out, len,
    430                 xf16 & 0xffff, xinc16,
    431                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
    432                 xclip, srcWidth - xoff + xclip,
    433                 osc_ctl, yr);
    434         out += len;
    435         x1 += len;
    436     }
    437 #endif
    438 
    439     while(x1 < x2) {
    440         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    441         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    442         out++;
    443         x1++;
    444     }
    445 }
    446 
    447 void RsdCpuScriptIntrinsicResize::kernelU1(const RsExpandKernelDriverInfo *info,
    448                                                 uint32_t xstart, uint32_t xend,
    449                                                 uint32_t outstep) {
    450     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    451 
    452     if (!cp->mAlloc.get()) {
    453         ALOGE("Resize executed without input, skipping");
    454         return;
    455     }
    456     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    457     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    458     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    459     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    460 
    461     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    462     int starty = (int) floor(yf - 1);
    463     yf = yf - floor(yf);
    464     int maxy = srcHeight - 1;
    465     int ys0 = rsMax(0, starty + 0);
    466     int ys1 = rsMax(0, starty + 1);
    467     int ys2 = rsMin(maxy, starty + 2);
    468     int ys3 = rsMin(maxy, starty + 3);
    469 
    470     const uchar *yp0 = pin + stride * ys0;
    471     const uchar *yp1 = pin + stride * ys1;
    472     const uchar *yp2 = pin + stride * ys2;
    473     const uchar *yp3 = pin + stride * ys3;
    474 
    475     uchar *out = ((uchar *)info->outPtr[0]) + xstart;
    476     uint32_t x1 = xstart;
    477     uint32_t x2 = xend;
    478 
    479 #if defined(ARCH_ARM_USE_INTRINSICS)
    480     if (gArchUseSIMD && x2 > x1 && cp->scaleX < 4.0f) {
    481         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    482         long xf16 = rint(xf * 0x10000);
    483         uint32_t xinc16 = rint(cp->scaleX * 0x10000);
    484 
    485         int xoff = (xf16 >> 16) - 1;
    486         int xclip = rsMax(0, xoff) - xoff;
    487         int len = x2 - x1;
    488 
    489         int32_t yr[4];
    490         uint64_t osc_ctl = rsdIntrinsicResize_oscctl_K(xinc16);
    491         mkYCoeff(yr, yf);
    492 
    493         xoff += xclip;
    494 
    495         rsdIntrinsicResizeB1_K(
    496                 out, len,
    497                 xf16 & 0xffff, xinc16,
    498                 yp0 + xoff, yp1 + xoff, yp2 + xoff, yp3 + xoff,
    499                 xclip, srcWidth - xoff + xclip,
    500                 osc_ctl, yr);
    501         out += len;
    502         x1 += len;
    503     }
    504 #endif
    505 
    506     while(x1 < x2) {
    507         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    508         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    509         out++;
    510         x1++;
    511     }
    512 }
    513 
    514 void RsdCpuScriptIntrinsicResize::kernelF4(const RsExpandKernelDriverInfo *info,
    515                                                 uint32_t xstart, uint32_t xend,
    516                                                 uint32_t outstep) {
    517     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    518 
    519     if (!cp->mAlloc.get()) {
    520         ALOGE("Resize executed without input, skipping");
    521         return;
    522     }
    523     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    524     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    525     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    526     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    527 
    528     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    529     int starty = (int) floor(yf - 1);
    530     yf = yf - floor(yf);
    531     int maxy = srcHeight - 1;
    532     int ys0 = rsMax(0, starty + 0);
    533     int ys1 = rsMax(0, starty + 1);
    534     int ys2 = rsMin(maxy, starty + 2);
    535     int ys3 = rsMin(maxy, starty + 3);
    536 
    537     const float4 *yp0 = (const float4 *)(pin + stride * ys0);
    538     const float4 *yp1 = (const float4 *)(pin + stride * ys1);
    539     const float4 *yp2 = (const float4 *)(pin + stride * ys2);
    540     const float4 *yp3 = (const float4 *)(pin + stride * ys3);
    541 
    542     float4 *out = ((float4 *)info->outPtr[0]) + xstart;
    543     uint32_t x1 = xstart;
    544     uint32_t x2 = xend;
    545 
    546     while(x1 < x2) {
    547         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    548         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    549         out++;
    550         x1++;
    551     }
    552 }
    553 
    554 void RsdCpuScriptIntrinsicResize::kernelF2(const RsExpandKernelDriverInfo *info,
    555                                                 uint32_t xstart, uint32_t xend,
    556                                                 uint32_t outstep) {
    557     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    558 
    559     if (!cp->mAlloc.get()) {
    560         ALOGE("Resize executed without input, skipping");
    561         return;
    562     }
    563     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    564     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    565     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    566     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    567 
    568     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    569     int starty = (int) floor(yf - 1);
    570     yf = yf - floor(yf);
    571     int maxy = srcHeight - 1;
    572     int ys0 = rsMax(0, starty + 0);
    573     int ys1 = rsMax(0, starty + 1);
    574     int ys2 = rsMin(maxy, starty + 2);
    575     int ys3 = rsMin(maxy, starty + 3);
    576 
    577     const float2 *yp0 = (const float2 *)(pin + stride * ys0);
    578     const float2 *yp1 = (const float2 *)(pin + stride * ys1);
    579     const float2 *yp2 = (const float2 *)(pin + stride * ys2);
    580     const float2 *yp3 = (const float2 *)(pin + stride * ys3);
    581 
    582     float2 *out = ((float2 *)info->outPtr[0]) + xstart;
    583     uint32_t x1 = xstart;
    584     uint32_t x2 = xend;
    585 
    586     while(x1 < x2) {
    587         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    588         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    589         out++;
    590         x1++;
    591     }
    592 }
    593 
    594 void RsdCpuScriptIntrinsicResize::kernelF1(const RsExpandKernelDriverInfo *info,
    595                                                 uint32_t xstart, uint32_t xend,
    596                                                 uint32_t outstep) {
    597     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)info->usr;
    598 
    599     if (!cp->mAlloc.get()) {
    600         ALOGE("Resize executed without input, skipping");
    601         return;
    602     }
    603     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    604     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    605     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    606     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    607 
    608     float yf = (info->current.y + 0.5f) * cp->scaleY - 0.5f;
    609     int starty = (int) floor(yf - 1);
    610     yf = yf - floor(yf);
    611     int maxy = srcHeight - 1;
    612     int ys0 = rsMax(0, starty + 0);
    613     int ys1 = rsMax(0, starty + 1);
    614     int ys2 = rsMin(maxy, starty + 2);
    615     int ys3 = rsMin(maxy, starty + 3);
    616 
    617     const float *yp0 = (const float *)(pin + stride * ys0);
    618     const float *yp1 = (const float *)(pin + stride * ys1);
    619     const float *yp2 = (const float *)(pin + stride * ys2);
    620     const float *yp3 = (const float *)(pin + stride * ys3);
    621 
    622     float *out = ((float *)info->outPtr[0]) + xstart;
    623     uint32_t x1 = xstart;
    624     uint32_t x2 = xend;
    625 
    626     while(x1 < x2) {
    627         float xf = (x1 + 0.5f) * cp->scaleX - 0.5f;
    628         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    629         out++;
    630         x1++;
    631     }
    632 }
    633 
    634 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
    635             RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
    636             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
    637 
    638 }
    639 
    640 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
    641 }
    642 
    643 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot,
    644                                             const Allocation ** ains,
    645                                             uint32_t inLen, Allocation * aout,
    646                                             const void * usr, uint32_t usrLen,
    647                                             const RsScriptCall *sc)
    648 {
    649     if (!mAlloc.get()) {
    650         ALOGE("Resize executed without input, skipping");
    651         return;
    652     }
    653     const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
    654     const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
    655     const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
    656 
    657     //check the data type to determine F or U.
    658     if (mAlloc->getType()->getElement()->getType() == RS_TYPE_UNSIGNED_8) {
    659         switch(mAlloc->getType()->getElement()->getVectorSize()) {
    660         case 1:
    661             mRootPtr = &kernelU1;
    662             break;
    663         case 2:
    664             mRootPtr = &kernelU2;
    665             break;
    666         case 3:
    667         case 4:
    668             mRootPtr = &kernelU4;
    669             break;
    670         }
    671     } else {
    672         switch(mAlloc->getType()->getElement()->getVectorSize()) {
    673         case 1:
    674             mRootPtr = &kernelF1;
    675             break;
    676         case 2:
    677             mRootPtr = &kernelF2;
    678             break;
    679         case 3:
    680         case 4:
    681             mRootPtr = &kernelF4;
    682             break;
    683         }
    684     }
    685 
    686     scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
    687     scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
    688 
    689 }
    690 
    691 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
    692     s->mHal.info.exportedVariableCount = 1;
    693 }
    694 
    695 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
    696     mAlloc.clear();
    697 }
    698 
    699 
    700 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
    701 
    702     return new RsdCpuScriptIntrinsicResize(ctx, s, e);
    703 }
    704