Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuIntrinsic.h"
     18 #include "rsCpuIntrinsicInlines.h"
     19 
     20 using namespace android;
     21 using namespace android::renderscript;
     22 
     23 namespace android {
     24 namespace renderscript {
     25 
     26 
     27 class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
     28 public:
     29     virtual void populateScript(Script *);
     30     virtual void invokeFreeChildren();
     31 
     32     virtual void setGlobalVar(uint32_t slot, const void *data, size_t dataLength);
     33     virtual void setGlobalObj(uint32_t slot, ObjectBase *data);
     34 
     35     virtual ~RsdCpuScriptIntrinsicHistogram();
     36     RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     37 
     38 protected:
     39     void preLaunch(uint32_t slot, const Allocation * ain,
     40                    Allocation * aout, const void * usr,
     41                    uint32_t usrLen, const RsScriptCall *sc);
     42     void postLaunch(uint32_t slot, const Allocation * ain,
     43                     Allocation * aout, const void * usr,
     44                     uint32_t usrLen, const RsScriptCall *sc);
     45 
     46 
     47     float mDot[4];
     48     int mDotI[4];
     49     int *mSums;
     50     ObjectBaseRef<Allocation> mAllocOut;
     51 
     52     static void kernelP1U4(const RsForEachStubParamStruct *p,
     53                           uint32_t xstart, uint32_t xend,
     54                           uint32_t instep, uint32_t outstep);
     55     static void kernelP1U3(const RsForEachStubParamStruct *p,
     56                           uint32_t xstart, uint32_t xend,
     57                           uint32_t instep, uint32_t outstep);
     58     static void kernelP1U2(const RsForEachStubParamStruct *p,
     59                           uint32_t xstart, uint32_t xend,
     60                           uint32_t instep, uint32_t outstep);
     61     static void kernelP1U1(const RsForEachStubParamStruct *p,
     62                           uint32_t xstart, uint32_t xend,
     63                           uint32_t instep, uint32_t outstep);
     64 
     65     static void kernelP1L4(const RsForEachStubParamStruct *p,
     66                            uint32_t xstart, uint32_t xend,
     67                            uint32_t instep, uint32_t outstep);
     68     static void kernelP1L3(const RsForEachStubParamStruct *p,
     69                            uint32_t xstart, uint32_t xend,
     70                            uint32_t instep, uint32_t outstep);
     71     static void kernelP1L2(const RsForEachStubParamStruct *p,
     72                            uint32_t xstart, uint32_t xend,
     73                            uint32_t instep, uint32_t outstep);
     74     static void kernelP1L1(const RsForEachStubParamStruct *p,
     75                            uint32_t xstart, uint32_t xend,
     76                            uint32_t instep, uint32_t outstep);
     77 
     78 };
     79 
     80 }
     81 }
     82 
     83 void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
     84     rsAssert(slot == 1);
     85     mAllocOut.set(static_cast<Allocation *>(data));
     86 }
     87 
     88 void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
     89     rsAssert(slot == 0);
     90     rsAssert(dataLength == 16);
     91     memcpy(mDot, data, 16);
     92     mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
     93     mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
     94     mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
     95     mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
     96 }
     97 
     98 
     99 
    100 void RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot, const Allocation * ain,
    101                                       Allocation * aout, const void * usr,
    102                                       uint32_t usrLen, const RsScriptCall *sc) {
    103 
    104     const uint32_t threads = mCtx->getThreadCount();
    105     uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
    106 
    107     switch (slot) {
    108     case 0:
    109         switch(vSize) {
    110         case 1:
    111             mRootPtr = &kernelP1U1;
    112             break;
    113         case 2:
    114             mRootPtr = &kernelP1U2;
    115             break;
    116         case 3:
    117             mRootPtr = &kernelP1U3;
    118             vSize = 4;
    119             break;
    120         case 4:
    121             mRootPtr = &kernelP1U4;
    122             break;
    123         }
    124         break;
    125     case 1:
    126         switch(ain->getType()->getElement()->getVectorSize()) {
    127         case 1:
    128             mRootPtr = &kernelP1L1;
    129             break;
    130         case 2:
    131             mRootPtr = &kernelP1L2;
    132             break;
    133         case 3:
    134             mRootPtr = &kernelP1L3;
    135             break;
    136         case 4:
    137             mRootPtr = &kernelP1L4;
    138             break;
    139         }
    140         break;
    141     }
    142     memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
    143 }
    144 
    145 void RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot, const Allocation * ain,
    146                                        Allocation * aout, const void * usr,
    147                                        uint32_t usrLen, const RsScriptCall *sc) {
    148 
    149     unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
    150     uint32_t threads = mCtx->getThreadCount();
    151     uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
    152 
    153     if (vSize == 3) vSize = 4;
    154 
    155     for (uint32_t ct=0; ct < (256 * vSize); ct++) {
    156         o[ct] = mSums[ct];
    157         for (uint32_t t=1; t < threads; t++) {
    158             o[ct] += mSums[ct + (256 * vSize * t)];
    159         }
    160     }
    161 }
    162 
    163 void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsForEachStubParamStruct *p,
    164                                                 uint32_t xstart, uint32_t xend,
    165                                                 uint32_t instep, uint32_t outstep) {
    166 
    167     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
    168     uchar *in = (uchar *)p->in;
    169     int * sums = &cp->mSums[256 * 4 * p->lid];
    170 
    171     for (uint32_t x = xstart; x < xend; x++) {
    172         sums[(in[0] << 2)    ] ++;
    173         sums[(in[1] << 2) + 1] ++;
    174         sums[(in[2] << 2) + 2] ++;
    175         sums[(in[3] << 2) + 3] ++;
    176         in += instep;
    177     }
    178 }
    179 
    180 void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsForEachStubParamStruct *p,
    181                                                 uint32_t xstart, uint32_t xend,
    182                                                 uint32_t instep, uint32_t outstep) {
    183 
    184     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
    185     uchar *in = (uchar *)p->in;
    186     int * sums = &cp->mSums[256 * 4 * p->lid];
    187 
    188     for (uint32_t x = xstart; x < xend; x++) {
    189         sums[(in[0] << 2)    ] ++;
    190         sums[(in[1] << 2) + 1] ++;
    191         sums[(in[2] << 2) + 2] ++;
    192         in += instep;
    193     }
    194 }
    195 
    196 void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsForEachStubParamStruct *p,
    197                                                 uint32_t xstart, uint32_t xend,
    198                                                 uint32_t instep, uint32_t outstep) {
    199 
    200     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
    201     uchar *in = (uchar *)p->in;
    202     int * sums = &cp->mSums[256 * 2 * p->lid];
    203 
    204     for (uint32_t x = xstart; x < xend; x++) {
    205         sums[(in[0] << 1)    ] ++;
    206         sums[(in[1] << 1) + 1] ++;
    207         in += instep;
    208     }
    209 }
    210 
    211 void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsForEachStubParamStruct *p,
    212                                                 uint32_t xstart, uint32_t xend,
    213                                                 uint32_t instep, uint32_t outstep) {
    214 
    215     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
    216     uchar *in = (uchar *)p->in;
    217     int * sums = &cp->mSums[256 * p->lid];
    218 
    219     for (uint32_t x = xstart; x < xend; x++) {
    220         int t = (cp->mDotI[0] * in[0]) +
    221                 (cp->mDotI[1] * in[1]) +
    222                 (cp->mDotI[2] * in[2]) +
    223                 (cp->mDotI[3] * in[3]);
    224         sums[(t + 0x7f) >> 8] ++;
    225         in += instep;
    226     }
    227 }
    228 
    229 void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsForEachStubParamStruct *p,
    230                                                 uint32_t xstart, uint32_t xend,
    231                                                 uint32_t instep, uint32_t outstep) {
    232 
    233     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
    234     uchar *in = (uchar *)p->in;
    235     int * sums = &cp->mSums[256 * p->lid];
    236 
    237     for (uint32_t x = xstart; x < xend; x++) {
    238         int t = (cp->mDotI[0] * in[0]) +
    239                 (cp->mDotI[1] * in[1]) +
    240                 (cp->mDotI[2] * in[2]);
    241         sums[(t + 0x7f) >> 8] ++;
    242         in += instep;
    243     }
    244 }
    245 
    246 void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsForEachStubParamStruct *p,
    247                                                 uint32_t xstart, uint32_t xend,
    248                                                 uint32_t instep, uint32_t outstep) {
    249 
    250     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
    251     uchar *in = (uchar *)p->in;
    252     int * sums = &cp->mSums[256 * p->lid];
    253 
    254     for (uint32_t x = xstart; x < xend; x++) {
    255         int t = (cp->mDotI[0] * in[0]) +
    256                 (cp->mDotI[1] * in[1]);
    257         sums[(t + 0x7f) >> 8] ++;
    258         in += instep;
    259     }
    260 }
    261 
    262 void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsForEachStubParamStruct *p,
    263                                                 uint32_t xstart, uint32_t xend,
    264                                                 uint32_t instep, uint32_t outstep) {
    265 
    266     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
    267     uchar *in = (uchar *)p->in;
    268     int * sums = &cp->mSums[256 * p->lid];
    269 
    270     for (uint32_t x = xstart; x < xend; x++) {
    271         int t = (cp->mDotI[0] * in[0]);
    272         sums[(t + 0x7f) >> 8] ++;
    273         in += instep;
    274     }
    275 }
    276 
    277 void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsForEachStubParamStruct *p,
    278                                                 uint32_t xstart, uint32_t xend,
    279                                                 uint32_t instep, uint32_t outstep) {
    280 
    281     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)p->usr;
    282     uchar *in = (uchar *)p->in;
    283     int * sums = &cp->mSums[256 * p->lid];
    284 
    285     for (uint32_t x = xstart; x < xend; x++) {
    286         sums[in[0]] ++;
    287         in += instep;
    288     }
    289 }
    290 
    291 
    292 RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
    293                                                      const Script *s, const Element *e)
    294             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
    295 
    296     mRootPtr = NULL;
    297     mSums = new int[256 * 4 * mCtx->getThreadCount()];
    298     mDot[0] = 0.299f;
    299     mDot[1] = 0.587f;
    300     mDot[2] = 0.114f;
    301     mDot[3] = 0;
    302     mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
    303     mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
    304     mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
    305     mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
    306 }
    307 
    308 RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
    309     if (mSums) {
    310         delete []mSums;
    311     }
    312 }
    313 
    314 void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
    315     s->mHal.info.exportedVariableCount = 2;
    316 }
    317 
    318 void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
    319 }
    320 
    321 
    322 RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
    323 
    324     return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
    325 }
    326 
    327 
    328