Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2014 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 using namespace android;
     22 using namespace android::renderscript;
     23 
     24 namespace android {
     25 namespace renderscript {
     26 
     27 
     28 class RsdCpuScriptIntrinsicResize : public RsdCpuScriptIntrinsic {
     29 public:
     30     virtual void populateScript(Script *);
     31     virtual void invokeFreeChildren();
     32 
     33     virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
     34 
     35     virtual ~RsdCpuScriptIntrinsicResize();
     36     RsdCpuScriptIntrinsicResize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *);
     37 
     38     virtual void preLaunch(uint32_t slot, const Allocation * ain,
     39                            Allocation * aout, const void * usr,
     40                            uint32_t usrLen, const RsScriptCall *sc);
     41 
     42     float scaleX;
     43     float scaleY;
     44 
     45 protected:
     46     ObjectBaseRef<const Allocation> mAlloc;
     47     ObjectBaseRef<const Element> mElement;
     48 
     49     static void kernelU1(const RsForEachStubParamStruct *p,
     50                          uint32_t xstart, uint32_t xend,
     51                          uint32_t instep, uint32_t outstep);
     52     static void kernelU2(const RsForEachStubParamStruct *p,
     53                          uint32_t xstart, uint32_t xend,
     54                          uint32_t instep, uint32_t outstep);
     55     static void kernelU4(const RsForEachStubParamStruct *p,
     56                          uint32_t xstart, uint32_t xend,
     57                          uint32_t instep, uint32_t outstep);
     58 };
     59 
     60 }
     61 }
     62 
     63 
     64 void RsdCpuScriptIntrinsicResize::setGlobalObj(uint32_t slot, ObjectBase *data) {
     65     rsAssert(slot == 0);
     66     mAlloc.set(static_cast<Allocation *>(data));
     67 }
     68 
     69 static float4 cubicInterpolate(float4 p0,float4 p1,float4 p2,float4 p3, float x) {
     70     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     71             + x * (3.f * (p1 - p2) + p3 - p0)));
     72 }
     73 
     74 static float2 cubicInterpolate(float2 p0,float2 p1,float2 p2,float2 p3, float x) {
     75     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     76             + x * (3.f * (p1 - p2) + p3 - p0)));
     77 }
     78 
     79 static float cubicInterpolate(float p0,float p1,float p2,float p3 , float x) {
     80     return p1 + 0.5f * x * (p2 - p0 + x * (2.f * p0 - 5.f * p1 + 4.f * p2 - p3
     81             + x * (3.f * (p1 - p2) + p3 - p0)));
     82 }
     83 
     84 static uchar4 OneBiCubic(const uchar4 *yp0, const uchar4 *yp1, const uchar4 *yp2, const uchar4 *yp3,
     85                          float xf, float yf, int width) {
     86     int startx = (int) floor(xf - 2);
     87     xf = xf - floor(xf);
     88     int maxx = width - 1;
     89     int xs0 = rsMax(0, startx + 0);
     90     int xs1 = rsMax(0, startx + 1);
     91     int xs2 = rsMin(maxx, startx + 2);
     92     int xs3 = rsMin(maxx, startx + 3);
     93 
     94     float4 p0  = cubicInterpolate(convert_float4(yp0[xs0]),
     95                                   convert_float4(yp0[xs1]),
     96                                   convert_float4(yp0[xs2]),
     97                                   convert_float4(yp0[xs3]), xf);
     98 
     99     float4 p1  = cubicInterpolate(convert_float4(yp1[xs0]),
    100                                   convert_float4(yp1[xs1]),
    101                                   convert_float4(yp1[xs2]),
    102                                   convert_float4(yp1[xs3]), xf);
    103 
    104     float4 p2  = cubicInterpolate(convert_float4(yp2[xs0]),
    105                                   convert_float4(yp2[xs1]),
    106                                   convert_float4(yp2[xs2]),
    107                                   convert_float4(yp2[xs3]), xf);
    108 
    109     float4 p3  = cubicInterpolate(convert_float4(yp3[xs0]),
    110                                   convert_float4(yp3[xs1]),
    111                                   convert_float4(yp3[xs2]),
    112                                   convert_float4(yp3[xs3]), xf);
    113 
    114     float4 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    115     p = clamp(p, 0.f, 255.f);
    116     return convert_uchar4(p);
    117 }
    118 
    119 static uchar2 OneBiCubic(const uchar2 *yp0, const uchar2 *yp1, const uchar2 *yp2, const uchar2 *yp3,
    120                          float xf, float yf, int width) {
    121     int startx = (int) floor(xf - 2);
    122     xf = xf - floor(xf);
    123     int maxx = width - 1;
    124     int xs0 = rsMax(0, startx + 0);
    125     int xs1 = rsMax(0, startx + 1);
    126     int xs2 = rsMin(maxx, startx + 2);
    127     int xs3 = rsMin(maxx, startx + 3);
    128 
    129     float2 p0  = cubicInterpolate(convert_float2(yp0[xs0]),
    130                                   convert_float2(yp0[xs1]),
    131                                   convert_float2(yp0[xs2]),
    132                                   convert_float2(yp0[xs3]), xf);
    133 
    134     float2 p1  = cubicInterpolate(convert_float2(yp1[xs0]),
    135                                   convert_float2(yp1[xs1]),
    136                                   convert_float2(yp1[xs2]),
    137                                   convert_float2(yp1[xs3]), xf);
    138 
    139     float2 p2  = cubicInterpolate(convert_float2(yp2[xs0]),
    140                                   convert_float2(yp2[xs1]),
    141                                   convert_float2(yp2[xs2]),
    142                                   convert_float2(yp2[xs3]), xf);
    143 
    144     float2 p3  = cubicInterpolate(convert_float2(yp3[xs0]),
    145                                   convert_float2(yp3[xs1]),
    146                                   convert_float2(yp3[xs2]),
    147                                   convert_float2(yp3[xs3]), xf);
    148 
    149     float2 p  = cubicInterpolate(p0, p1, p2, p3, yf);
    150     p = clamp(p, 0.f, 255.f);
    151     return convert_uchar2(p);
    152 }
    153 
    154 static uchar OneBiCubic(const uchar *yp0, const uchar *yp1, const uchar *yp2, const uchar *yp3,
    155                         float xf, float yf, int width) {
    156     int startx = (int) floor(xf - 2);
    157     xf = xf - floor(xf);
    158     int maxx = width - 1;
    159     int xs0 = rsMax(0, startx + 0);
    160     int xs1 = rsMax(0, startx + 1);
    161     int xs2 = rsMin(maxx, startx + 2);
    162     int xs3 = rsMin(maxx, startx + 3);
    163 
    164     float p0  = cubicInterpolate((float)yp0[xs0], (float)yp0[xs1],
    165                                  (float)yp0[xs2], (float)yp0[xs3], xf);
    166     float p1  = cubicInterpolate((float)yp1[xs0], (float)yp1[xs1],
    167                                  (float)yp1[xs2], (float)yp1[xs3], xf);
    168     float p2  = cubicInterpolate((float)yp2[xs0], (float)yp2[xs1],
    169                                  (float)yp2[xs2], (float)yp2[xs3], xf);
    170     float p3  = cubicInterpolate((float)yp3[xs0], (float)yp3[xs1],
    171                                  (float)yp3[xs2], (float)yp3[xs3], xf);
    172 
    173     float p  = cubicInterpolate(p0, p1, p2, p3, yf);
    174     p = clamp(p, 0.f, 255.f);
    175     return (uchar)p;
    176 }
    177 
    178 void RsdCpuScriptIntrinsicResize::kernelU4(const RsForEachStubParamStruct *p,
    179                                                 uint32_t xstart, uint32_t xend,
    180                                                 uint32_t instep, uint32_t outstep) {
    181     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
    182 
    183     if (!cp->mAlloc.get()) {
    184         ALOGE("Resize executed without input, skipping");
    185         return;
    186     }
    187     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    188     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    189     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    190     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    191 
    192     float yf = p->y * cp->scaleY;
    193     int starty = (int) floor(yf - 2);
    194     yf = yf - floor(yf);
    195     int maxy = srcHeight - 1;
    196     int ys0 = rsMax(0, starty + 0);
    197     int ys1 = rsMax(0, starty + 1);
    198     int ys2 = rsMin(maxy, starty + 2);
    199     int ys3 = rsMin(maxy, starty + 3);
    200 
    201     const uchar4 *yp0 = (const uchar4 *)(pin + stride * ys0);
    202     const uchar4 *yp1 = (const uchar4 *)(pin + stride * ys1);
    203     const uchar4 *yp2 = (const uchar4 *)(pin + stride * ys2);
    204     const uchar4 *yp3 = (const uchar4 *)(pin + stride * ys3);
    205 
    206     uchar4 *out = ((uchar4 *)p->out) + xstart;
    207     uint32_t x1 = xstart;
    208     uint32_t x2 = xend;
    209 
    210     while(x1 < x2) {
    211         float xf = x1 * cp->scaleX;
    212         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    213         out++;
    214         x1++;
    215     }
    216 }
    217 
    218 void RsdCpuScriptIntrinsicResize::kernelU2(const RsForEachStubParamStruct *p,
    219                                                 uint32_t xstart, uint32_t xend,
    220                                                 uint32_t instep, uint32_t outstep) {
    221     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
    222 
    223     if (!cp->mAlloc.get()) {
    224         ALOGE("Resize executed without input, skipping");
    225         return;
    226     }
    227     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    228     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    229     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    230     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    231 
    232     float yf = p->y * cp->scaleY;
    233     int starty = (int) floor(yf - 2);
    234     yf = yf - floor(yf);
    235     int maxy = srcHeight - 1;
    236     int ys0 = rsMax(0, starty + 0);
    237     int ys1 = rsMax(0, starty + 1);
    238     int ys2 = rsMin(maxy, starty + 2);
    239     int ys3 = rsMin(maxy, starty + 3);
    240 
    241     const uchar2 *yp0 = (const uchar2 *)(pin + stride * ys0);
    242     const uchar2 *yp1 = (const uchar2 *)(pin + stride * ys1);
    243     const uchar2 *yp2 = (const uchar2 *)(pin + stride * ys2);
    244     const uchar2 *yp3 = (const uchar2 *)(pin + stride * ys3);
    245 
    246     uchar2 *out = ((uchar2 *)p->out) + xstart;
    247     uint32_t x1 = xstart;
    248     uint32_t x2 = xend;
    249 
    250     while(x1 < x2) {
    251         float xf = x1 * cp->scaleX;
    252         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    253         out++;
    254         x1++;
    255     }
    256 }
    257 
    258 void RsdCpuScriptIntrinsicResize::kernelU1(const RsForEachStubParamStruct *p,
    259                                                 uint32_t xstart, uint32_t xend,
    260                                                 uint32_t instep, uint32_t outstep) {
    261     RsdCpuScriptIntrinsicResize *cp = (RsdCpuScriptIntrinsicResize *)p->usr;
    262 
    263     if (!cp->mAlloc.get()) {
    264         ALOGE("Resize executed without input, skipping");
    265         return;
    266     }
    267     const uchar *pin = (const uchar *)cp->mAlloc->mHal.drvState.lod[0].mallocPtr;
    268     const int srcHeight = cp->mAlloc->mHal.drvState.lod[0].dimY;
    269     const int srcWidth = cp->mAlloc->mHal.drvState.lod[0].dimX;
    270     const size_t stride = cp->mAlloc->mHal.drvState.lod[0].stride;
    271 
    272     float yf = p->y * cp->scaleY;
    273     int starty = (int) floor(yf - 2);
    274     yf = yf - floor(yf);
    275     int maxy = srcHeight - 1;
    276     int ys0 = rsMax(0, starty + 0);
    277     int ys1 = rsMax(0, starty + 1);
    278     int ys2 = rsMin(maxy, starty + 2);
    279     int ys3 = rsMin(maxy, starty + 3);
    280 
    281     const uchar *yp0 = pin + stride * ys0;
    282     const uchar *yp1 = pin + stride * ys1;
    283     const uchar *yp2 = pin + stride * ys2;
    284     const uchar *yp3 = pin + stride * ys3;
    285 
    286     uchar *out = ((uchar *)p->out) + xstart;
    287     uint32_t x1 = xstart;
    288     uint32_t x2 = xend;
    289 
    290     while(x1 < x2) {
    291         float xf = x1 * cp->scaleX;
    292         *out = OneBiCubic(yp0, yp1, yp2, yp3, xf, yf, srcWidth);
    293         out++;
    294         x1++;
    295     }
    296 }
    297 
    298 RsdCpuScriptIntrinsicResize::RsdCpuScriptIntrinsicResize (
    299             RsdCpuReferenceImpl *ctx, const Script *s, const Element *e)
    300             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_RESIZE) {
    301 
    302 }
    303 
    304 RsdCpuScriptIntrinsicResize::~RsdCpuScriptIntrinsicResize() {
    305 }
    306 
    307 void RsdCpuScriptIntrinsicResize::preLaunch(uint32_t slot, const Allocation * ain,
    308                                             Allocation * aout, const void * usr,
    309                                             uint32_t usrLen, const RsScriptCall *sc)
    310 {
    311     if (!mAlloc.get()) {
    312         ALOGE("Resize executed without input, skipping");
    313         return;
    314     }
    315     const uint32_t srcHeight = mAlloc->mHal.drvState.lod[0].dimY;
    316     const uint32_t srcWidth = mAlloc->mHal.drvState.lod[0].dimX;
    317     const size_t stride = mAlloc->mHal.drvState.lod[0].stride;
    318 
    319     switch(mAlloc->getType()->getElement()->getVectorSize()) {
    320     case 1:
    321         mRootPtr = &kernelU1;
    322         break;
    323     case 2:
    324         mRootPtr = &kernelU2;
    325         break;
    326     case 3:
    327     case 4:
    328         mRootPtr = &kernelU4;
    329         break;
    330     }
    331 
    332     scaleX = (float)srcWidth / aout->mHal.drvState.lod[0].dimX;
    333     scaleY = (float)srcHeight / aout->mHal.drvState.lod[0].dimY;
    334 
    335 }
    336 
    337 void RsdCpuScriptIntrinsicResize::populateScript(Script *s) {
    338     s->mHal.info.exportedVariableCount = 1;
    339 }
    340 
    341 void RsdCpuScriptIntrinsicResize::invokeFreeChildren() {
    342     mAlloc.clear();
    343 }
    344 
    345 
    346 RsdCpuScriptImpl * rsdIntrinsic_Resize(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
    347 
    348     return new RsdCpuScriptIntrinsicResize(ctx, s, e);
    349 }
    350 
    351 
    352