Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "rsCpuIntrinsic.h"
     18 #include "rsCpuIntrinsicInlines.h"
     19 
     20 using namespace android;
     21 using namespace android::renderscript;
     22 
     23 namespace android {
     24 namespace renderscript {
     25 
     26 
     27 class RsdCpuScriptIntrinsicHistogram : public RsdCpuScriptIntrinsic {
     28 public:
     29     void populateScript(Script *) override;
     30     void invokeFreeChildren() override;
     31 
     32     void setGlobalVar(uint32_t slot, const void *data, size_t dataLength) override;
     33     void setGlobalObj(uint32_t slot, ObjectBase *data) override;
     34 
     35     ~RsdCpuScriptIntrinsicHistogram() override;
     36     RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     37 
     38 protected:
     39     void preLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
     40                    Allocation * aout, const void * usr,
     41                    uint32_t usrLen, const RsScriptCall *sc);
     42     void postLaunch(uint32_t slot, const Allocation ** ains, uint32_t inLen,
     43                     Allocation * aout, const void * usr,
     44                     uint32_t usrLen, const RsScriptCall *sc);
     45 
     46 
     47     float mDot[4];
     48     int mDotI[4];
     49     int *mSums;
     50     ObjectBaseRef<Allocation> mAllocOut;
     51 
     52     static void kernelP1U4(const RsExpandKernelDriverInfo *info,
     53                            uint32_t xstart, uint32_t xend,
     54                            uint32_t outstep);
     55     static void kernelP1U3(const RsExpandKernelDriverInfo *info,
     56                            uint32_t xstart, uint32_t xend,
     57                            uint32_t outstep);
     58     static void kernelP1U2(const RsExpandKernelDriverInfo *info,
     59                            uint32_t xstart, uint32_t xend,
     60                            uint32_t outstep);
     61     static void kernelP1U1(const RsExpandKernelDriverInfo *info,
     62                            uint32_t xstart, uint32_t xend,
     63                            uint32_t outstep);
     64 
     65     static void kernelP1L4(const RsExpandKernelDriverInfo *info,
     66                            uint32_t xstart, uint32_t xend,
     67                            uint32_t outstep);
     68     static void kernelP1L3(const RsExpandKernelDriverInfo *info,
     69                            uint32_t xstart, uint32_t xend,
     70                            uint32_t outstep);
     71     static void kernelP1L2(const RsExpandKernelDriverInfo *info,
     72                            uint32_t xstart, uint32_t xend,
     73                            uint32_t outstep);
     74     static void kernelP1L1(const RsExpandKernelDriverInfo *info,
     75                            uint32_t xstart, uint32_t xend,
     76                            uint32_t outstep);
     77 
     78 };
     79 
     80 }
     81 }
     82 
     83 void RsdCpuScriptIntrinsicHistogram::setGlobalObj(uint32_t slot, ObjectBase *data) {
     84     rsAssert(slot == 1);
     85     mAllocOut.set(static_cast<Allocation *>(data));
     86 }
     87 
     88 void RsdCpuScriptIntrinsicHistogram::setGlobalVar(uint32_t slot, const void *data, size_t dataLength) {
     89     rsAssert(slot == 0);
     90     rsAssert(dataLength == 16);
     91     memcpy(mDot, data, 16);
     92     mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
     93     mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
     94     mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
     95     mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
     96 }
     97 
     98 
     99 
    100 void
    101 RsdCpuScriptIntrinsicHistogram::preLaunch(uint32_t slot,
    102                                           const Allocation ** ains,
    103                                           uint32_t inLen, Allocation * aout,
    104                                           const void * usr, uint32_t usrLen,
    105                                           const RsScriptCall *sc) {
    106 
    107     const uint32_t threads = mCtx->getThreadCount();
    108     uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
    109 
    110     switch (slot) {
    111     case 0:
    112         switch(vSize) {
    113         case 1:
    114             mRootPtr = &kernelP1U1;
    115             break;
    116         case 2:
    117             mRootPtr = &kernelP1U2;
    118             break;
    119         case 3:
    120             mRootPtr = &kernelP1U3;
    121             vSize = 4;
    122             break;
    123         case 4:
    124             mRootPtr = &kernelP1U4;
    125             break;
    126         }
    127         break;
    128     case 1:
    129         switch(ains[0]->getType()->getElement()->getVectorSize()) {
    130         case 1:
    131             mRootPtr = &kernelP1L1;
    132             break;
    133         case 2:
    134             mRootPtr = &kernelP1L2;
    135             break;
    136         case 3:
    137             mRootPtr = &kernelP1L3;
    138             break;
    139         case 4:
    140             mRootPtr = &kernelP1L4;
    141             break;
    142         }
    143         break;
    144     }
    145     memset(mSums, 0, 256 * sizeof(int32_t) * threads * vSize);
    146 }
    147 
    148 void
    149 RsdCpuScriptIntrinsicHistogram::postLaunch(uint32_t slot,
    150                                            const Allocation ** ains,
    151                                            uint32_t inLen,  Allocation * aout,
    152                                            const void * usr, uint32_t usrLen,
    153                                            const RsScriptCall *sc) {
    154 
    155     unsigned int *o = (unsigned int *)mAllocOut->mHal.drvState.lod[0].mallocPtr;
    156     uint32_t threads = mCtx->getThreadCount();
    157     uint32_t vSize = mAllocOut->getType()->getElement()->getVectorSize();
    158 
    159     if (vSize == 3) vSize = 4;
    160 
    161     for (uint32_t ct=0; ct < (256 * vSize); ct++) {
    162         o[ct] = mSums[ct];
    163         for (uint32_t t=1; t < threads; t++) {
    164             o[ct] += mSums[ct + (256 * vSize * t)];
    165         }
    166     }
    167 }
    168 
    169 void RsdCpuScriptIntrinsicHistogram::kernelP1U4(const RsExpandKernelDriverInfo *info,
    170                                                 uint32_t xstart, uint32_t xend,
    171                                                 uint32_t outstep) {
    172 
    173     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    174     uchar *in = (uchar *)info->inPtr[0];
    175     int * sums = &cp->mSums[256 * 4 * info->lid];
    176 
    177     for (uint32_t x = xstart; x < xend; x++) {
    178         sums[(in[0] << 2)    ] ++;
    179         sums[(in[1] << 2) + 1] ++;
    180         sums[(in[2] << 2) + 2] ++;
    181         sums[(in[3] << 2) + 3] ++;
    182         in += info->inStride[0];
    183     }
    184 }
    185 
    186 void RsdCpuScriptIntrinsicHistogram::kernelP1U3(const RsExpandKernelDriverInfo *info,
    187                                                 uint32_t xstart, uint32_t xend,
    188                                                 uint32_t outstep) {
    189 
    190     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    191     uchar *in = (uchar *)info->inPtr[0];
    192     int * sums = &cp->mSums[256 * 4 * info->lid];
    193 
    194     for (uint32_t x = xstart; x < xend; x++) {
    195         sums[(in[0] << 2)    ] ++;
    196         sums[(in[1] << 2) + 1] ++;
    197         sums[(in[2] << 2) + 2] ++;
    198         in += info->inStride[0];
    199     }
    200 }
    201 
    202 void RsdCpuScriptIntrinsicHistogram::kernelP1U2(const RsExpandKernelDriverInfo *info,
    203                                                 uint32_t xstart, uint32_t xend,
    204                                                 uint32_t outstep) {
    205 
    206     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    207     uchar *in = (uchar *)info->inPtr[0];
    208     int * sums = &cp->mSums[256 * 2 * info->lid];
    209 
    210     for (uint32_t x = xstart; x < xend; x++) {
    211         sums[(in[0] << 1)    ] ++;
    212         sums[(in[1] << 1) + 1] ++;
    213         in += info->inStride[0];
    214     }
    215 }
    216 
    217 void RsdCpuScriptIntrinsicHistogram::kernelP1L4(const RsExpandKernelDriverInfo *info,
    218                                                 uint32_t xstart, uint32_t xend,
    219                                                 uint32_t outstep) {
    220 
    221     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    222     uchar *in = (uchar *)info->inPtr[0];
    223     int * sums = &cp->mSums[256 * info->lid];
    224 
    225     for (uint32_t x = xstart; x < xend; x++) {
    226         int t = (cp->mDotI[0] * in[0]) +
    227                 (cp->mDotI[1] * in[1]) +
    228                 (cp->mDotI[2] * in[2]) +
    229                 (cp->mDotI[3] * in[3]);
    230         sums[(t + 0x7f) >> 8] ++;
    231         in += info->inStride[0];
    232     }
    233 }
    234 
    235 void RsdCpuScriptIntrinsicHistogram::kernelP1L3(const RsExpandKernelDriverInfo *info,
    236                                                 uint32_t xstart, uint32_t xend,
    237                                                 uint32_t outstep) {
    238 
    239     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    240     uchar *in = (uchar *)info->inPtr[0];
    241     int * sums = &cp->mSums[256 * info->lid];
    242 
    243     for (uint32_t x = xstart; x < xend; x++) {
    244         int t = (cp->mDotI[0] * in[0]) +
    245                 (cp->mDotI[1] * in[1]) +
    246                 (cp->mDotI[2] * in[2]);
    247         sums[(t + 0x7f) >> 8] ++;
    248         in += info->inStride[0];
    249     }
    250 }
    251 
    252 void RsdCpuScriptIntrinsicHistogram::kernelP1L2(const RsExpandKernelDriverInfo *info,
    253                                                 uint32_t xstart, uint32_t xend,
    254                                                 uint32_t outstep) {
    255 
    256     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    257     uchar *in = (uchar *)info->inPtr[0];
    258     int * sums = &cp->mSums[256 * info->lid];
    259 
    260     for (uint32_t x = xstart; x < xend; x++) {
    261         int t = (cp->mDotI[0] * in[0]) +
    262                 (cp->mDotI[1] * in[1]);
    263         sums[(t + 0x7f) >> 8] ++;
    264         in += info->inStride[0];
    265     }
    266 }
    267 
    268 void RsdCpuScriptIntrinsicHistogram::kernelP1L1(const RsExpandKernelDriverInfo *info,
    269                                                 uint32_t xstart, uint32_t xend,
    270                                                 uint32_t outstep) {
    271 
    272     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    273     uchar *in = (uchar *)info->inPtr[0];
    274     int * sums = &cp->mSums[256 * info->lid];
    275 
    276     for (uint32_t x = xstart; x < xend; x++) {
    277         int t = (cp->mDotI[0] * in[0]);
    278         sums[(t + 0x7f) >> 8] ++;
    279         in += info->inStride[0];
    280     }
    281 }
    282 
    283 void RsdCpuScriptIntrinsicHistogram::kernelP1U1(const RsExpandKernelDriverInfo *info,
    284                                                 uint32_t xstart, uint32_t xend,
    285                                                 uint32_t outstep) {
    286 
    287     RsdCpuScriptIntrinsicHistogram *cp = (RsdCpuScriptIntrinsicHistogram *)info->usr;
    288     uchar *in = (uchar *)info->inPtr[0];
    289     int * sums = &cp->mSums[256 * info->lid];
    290 
    291     for (uint32_t x = xstart; x < xend; x++) {
    292         sums[in[0]] ++;
    293         in += info->inStride[0];
    294     }
    295 }
    296 
    297 
    298 RsdCpuScriptIntrinsicHistogram::RsdCpuScriptIntrinsicHistogram(RsdCpuReferenceImpl *ctx,
    299                                                      const Script *s, const Element *e)
    300             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_HISTOGRAM) {
    301 
    302     mRootPtr = nullptr;
    303     mSums = new int[256 * 4 * mCtx->getThreadCount()];
    304     mDot[0] = 0.299f;
    305     mDot[1] = 0.587f;
    306     mDot[2] = 0.114f;
    307     mDot[3] = 0;
    308     mDotI[0] = (int)((mDot[0] * 256.f) + 0.5f);
    309     mDotI[1] = (int)((mDot[1] * 256.f) + 0.5f);
    310     mDotI[2] = (int)((mDot[2] * 256.f) + 0.5f);
    311     mDotI[3] = (int)((mDot[3] * 256.f) + 0.5f);
    312 }
    313 
    314 RsdCpuScriptIntrinsicHistogram::~RsdCpuScriptIntrinsicHistogram() {
    315     if (mSums) {
    316         delete []mSums;
    317     }
    318 }
    319 
    320 void RsdCpuScriptIntrinsicHistogram::populateScript(Script *s) {
    321     s->mHal.info.exportedVariableCount = 2;
    322 }
    323 
    324 void RsdCpuScriptIntrinsicHistogram::invokeFreeChildren() {
    325 }
    326 
    327 
    328 RsdCpuScriptImpl * rsdIntrinsic_Histogram(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) {
    329 
    330     return new RsdCpuScriptIntrinsicHistogram(ctx, s, e);
    331 }
    332