Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuIntrinsic.h"
     18 #include "rsCpuIntrinsicInlines.h"
     19 
     20 namespace android {
     21 namespace renderscript {
     22 
     23 
     24 class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
     25 public:
     26     void populateScript(Script *) override;
     27     void invokeFreeChildren() override;
     28 
     29     void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override;
     30     void setGlobalObj(uint32_t slot, ObjectBase *data) override;
     31 
     32     ~RsdCpuScriptIntrinsicHistogram() override;
     33     RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     34 
     35 protected:
     36     void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
     37                    Allocation * aout, const void * usr,
     38                    uint32_t usrLen, const RsScriptCall *sc);
     39     void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
     40                     Allocation * aout, const void * usr,
     41                     uint32_t usrLen, const RsScriptCall *sc);
     42 
     43 
     44     float mDot[4];
     45     int mDotI[4];
     46     int *mSums;
     47     ObjectBaseRef<Allocation> mAllocOut;
     48 
     49     static void kernelP1U4(const RsExpandKernelDriverInfo *info,
     50                            uint32_t xstart, uint32_t xend,
     51                            uint32_t outstep);
     52     static void kernelP1U3(const RsExpandKernelDriverInfo *info,
     53                            uint32_t xstart, uint32_t xend,
     54                            uint32_t outstep);
     55     static void kernelP1U2(const RsExpandKernelDriverInfo *info,
     56                            uint32_t xstart, uint32_t xend,
     57                            uint32_t outstep);
     58     static void kernelP1U1(const RsExpandKernelDriverInfo *info,
     59                            uint32_t xstart, uint32_t xend,
     60                            uint32_t outstep);
     61 
     62     static void kernelP1L4(const RsExpandKernelDriverInfo *info,
     63                            uint32_t xstart, uint32_t xend,
     64                            uint32_t outstep);
     65     static void kernelP1L3(const RsExpandKernelDriverInfo *info,
     66                            uint32_t xstart, uint32_t xend,
     67                            uint32_t outstep);
     68     static void kernelP1L2(const RsExpandKernelDriverInfo *info,
     69                            uint32_t xstart, uint32_t xend,
     70                            uint32_t outstep);
     71     static void kernelP1L1(const RsExpandKernelDriverInfo *info,
     72                            uint32_t xstart, uint32_t xend,
     73                            uint32_t outstep);
     74 
     75 };
     76 
     77 void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
     78     rsAssert(slot == 1);
     79     mAllocOut.set(static_cast<Allocation *>(data));
     80 }
     81 
     82 void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
     83     rsAssert(slot == 0);
     84     rsAssert(dataLength == 16);
     85     memcpy(mDot, data, 16);
     86     mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
     87     mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
     88     mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
     89     mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
     90 }
     91 
     92 
     93 
     94 void
     95 RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot,
     96                                           const Allocation ** ains,
     97                                           uint32_t inLen, Allocation * aout,
     98                                           const void * usr, uint32_t usrLen,
     99                                           const RsScriptCall *sc) {
    100 
    101     const uint32_t threads = mCtx->getThreadCount();
    102     uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
    103 
    104     switch (slot) {
    105     case 0:
    106         switch(vSize) {
    107         case 1:
    108             mRootPtr = &kernelP1U1;
    109             break;
    110         case 2:
    111             mRootPtr = &kernelP1U2;
    112             break;
    113         case 3:
    114             mRootPtr = &kernelP1U3;
    115             vSize = 4;
    116             break;
    117         case 4:
    118             mRootPtr = &kernelP1U4;
    119             break;
    120         }
    121         break;
    122     case 1:
    123         switch(ains[0]->getType()->getElement()->getVectorSize()) {
    124         case 1:
    125             mRootPtr = &kernelP1L1;
    126             break;
    127         case 2:
    128             mRootPtr = &kernelP1L2;
    129             break;
    130         case 3:
    131             mRootPtr = &kernelP1L3;
    132             break;
    133         case 4:
    134             mRootPtr = &kernelP1L4;
    135             break;
    136         }
    137         break;
    138     }
    139     memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
    140 }
    141 
    142 void
    143 RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot,
    144                                            const Allocation ** ains,
    145                                            uint32_t inLen,  Allocation * aout,
    146                                            const void * usr, uint32_t usrLen,
    147                                            const RsScriptCall *sc) {
    148 
    149     unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
    150     uint32_t threads = mCtx->getThreadCount();
    151     uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
    152 
    153     if (vSize == 3) vSize = 4;
    154 
    155     for (uint32_t ct=0; ct < (256 * vSize); ct++) {
    156         o[ct] = mSums[ct];
    157         for (uint32_t t=1; t < threads; t++) {
    158             o[ct] += mSums[ct + (256 * vSize * t)];
    159         }
    160     }
    161 }
    162 
    163 void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info,
    164                                                 uint32_t xstart, uint32_t xend,
    165                                                 uint32_t outstep) {
    166 
    167     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    168     uchar *in = (uchar *)info->inPtr[0];
    169     int * sums = &cp->mSums[256 * 4 * info->lid];
    170 
    171     for (uint32_t x = xstart; x < xend; x++) {
    172         sums[(in[0] << 2)    ] ++;
    173         sums[(in[1] << 2) + 1] ++;
    174         sums[(in[2] << 2) + 2] ++;
    175         sums[(in[3] << 2) + 3] ++;
    176         in += info->inStride[0];
    177     }
    178 }
    179 
    180 void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info,
    181                                                 uint32_t xstart, uint32_t xend,
    182                                                 uint32_t outstep) {
    183 
    184     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    185     uchar *in = (uchar *)info->inPtr[0];
    186     int * sums = &cp->mSums[256 * 4 * info->lid];
    187 
    188     for (uint32_t x = xstart; x < xend; x++) {
    189         sums[(in[0] << 2)    ] ++;
    190         sums[(in[1] << 2) + 1] ++;
    191         sums[(in[2] << 2) + 2] ++;
    192         in += info->inStride[0];
    193     }
    194 }
    195 
    196 void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info,
    197                                                 uint32_t xstart, uint32_t xend,
    198                                                 uint32_t outstep) {
    199 
    200     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    201     uchar *in = (uchar *)info->inPtr[0];
    202     int * sums = &cp->mSums[256 * 2 * info->lid];
    203 
    204     for (uint32_t x = xstart; x < xend; x++) {
    205         sums[(in[0] << 1)    ] ++;
    206         sums[(in[1] << 1) + 1] ++;
    207         in += info->inStride[0];
    208     }
    209 }
    210 
    211 void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info,
    212                                                 uint32_t xstart, uint32_t xend,
    213                                                 uint32_t outstep) {
    214 
    215     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    216     uchar *in = (uchar *)info->inPtr[0];
    217     int * sums = &cp->mSums[256 * info->lid];
    218 
    219     for (uint32_t x = xstart; x < xend; x++) {
    220         int t = (cp->mDotI[0] * in[0]) +
    221                 (cp->mDotI[1] * in[1]) +
    222                 (cp->mDotI[2] * in[2]) +
    223                 (cp->mDotI[3] * in[3]);
    224         sums[(t + 0x7f) >> 8] ++;
    225         in += info->inStride[0];
    226     }
    227 }
    228 
    229 void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info,
    230                                                 uint32_t xstart, uint32_t xend,
    231                                                 uint32_t outstep) {
    232 
    233     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    234     uchar *in = (uchar *)info->inPtr[0];
    235     int * sums = &cp->mSums[256 * info->lid];
    236 
    237     for (uint32_t x = xstart; x < xend; x++) {
    238         int t = (cp->mDotI[0] * in[0]) +
    239                 (cp->mDotI[1] * in[1]) +
    240                 (cp->mDotI[2] * in[2]);
    241         sums[(t + 0x7f) >> 8] ++;
    242         in += info->inStride[0];
    243     }
    244 }
    245 
    246 void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info,
    247                                                 uint32_t xstart, uint32_t xend,
    248                                                 uint32_t outstep) {
    249 
    250     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    251     uchar *in = (uchar *)info->inPtr[0];
    252     int * sums = &cp->mSums[256 * info->lid];
    253 
    254     for (uint32_t x = xstart; x < xend; x++) {
    255         int t = (cp->mDotI[0] * in[0]) +
    256                 (cp->mDotI[1] * in[1]);
    257         sums[(t + 0x7f) >> 8] ++;
    258         in += info->inStride[0];
    259     }
    260 }
    261 
    262 void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info,
    263                                                 uint32_t xstart, uint32_t xend,
    264                                                 uint32_t outstep) {
    265 
    266     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    267     uchar *in = (uchar *)info->inPtr[0];
    268     int * sums = &cp->mSums[256 * info->lid];
    269 
    270     for (uint32_t x = xstart; x < xend; x++) {
    271         int t = (cp->mDotI[0] * in[0]);
    272         sums[(t + 0x7f) >> 8] ++;
    273         in += info->inStride[0];
    274     }
    275 }
    276 
    277 void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info,
    278                                                 uint32_t xstart, uint32_t xend,
    279                                                 uint32_t outstep) {
    280 
    281     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    282     uchar *in = (uchar *)info->inPtr[0];
    283     int * sums = &cp->mSums[256 * info->lid];
    284 
    285     for (uint32_t x = xstart; x < xend; x++) {
    286         sums[in[0]] ++;
    287         in += info->inStride[0];
    288     }
    289 }
    290 
    291 
    292 RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
    293                                                      const Script *s, const Element *e)
    294             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
    295 
    296     mRootPtr = nullptr;
    297     mSums = new int[256 * 4 * mCtx->getThreadCount()];
    298     mDot[0] = 0.299f;
    299     mDot[1] = 0.587f;
    300     mDot[2] = 0.114f;
    301     mDot[3] = 0;
    302     mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
    303     mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
    304     mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
    305     mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
    306 }
    307 
    308 RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
    309     if (mSums) {
    310         delete []mSums;
    311     }
    312 }
    313 
    314 void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
    315     s->mHal.info.exportedVariableCount = 2;
    316 }
    317 
    318 void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
    319 }
    320 
    321 RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
    322 
    323     return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
    324 }
    325 
    326 } // namespace renderscript
    327 } // namespace android
    328