Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 namespace android {
     22 namespace renderscript {
     23 
     24 
     25 class RsdCpuScriptIntrinsic3DLUT : public RsdCpuScriptIntrinsic {
     26 public:
     27     void populateScript(Script *) override;
     28     void invokeFreeChildren() override;
     29 
     30     void setGlobalObj(uint32_t slot, ObjectBase *data) override;
     31 
     32     ~RsdCpuScriptIntrinsic3DLUT() override;
     33     RsdCpuScriptIntrinsic3DLUT(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     34 
     35 protected:
     36     ObjectBaseRef<Allocation> mLUT;
     37 
     38     static void kernel(const RsExpandKernelDriverInfo *info,
     39                        uint32_t xstart, uint32_t xend,
     40                        uint32_t outstep);
     41 };
     42 
     43 void RsdCpuScriptIntrinsic3DLUT::setGlobalObj(uint32_t slot, ObjectBase *data) {
     44     rsAssert(slot == 0);
     45     mLUT.set(static_cast<Allocation *>(data));
     46 }
     47 
     48 extern "C" void rsdIntrinsic3DLUT_K(void *dst, void const *in, size_t count,
     49                                       void const *lut,
     50                                       int32_t pitchy, int32_t pitchz,
     51                                       int dimx, int dimy, int dimz);
     52 
     53 
     54 void RsdCpuScriptIntrinsic3DLUT::kernel(const RsExpandKernelDriverInfo *info,
     55                                         uint32_t xstart, uint32_t xend,
     56                                         uint32_t outstep) {
     57     RsdCpuScriptIntrinsic3DLUT *cp = (RsdCpuScriptIntrinsic3DLUT *)info->usr;
     58 
     59     uchar4 *out = (uchar4 *)info->outPtr[0];
     60     uchar4 *in = (uchar4 *)info->inPtr[0];
     61     uint32_t x1 = xstart;
     62     uint32_t x2 = xend;
     63 
     64     const uchar *bp = (const uchar *)cp->mLUT->mHal.drvState.lod[0].mallocPtr;
     65 
     66     int4 dims = {
     67         static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimX - 1),
     68         static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimY - 1),
     69         static_cast<int>(cp->mLUT->mHal.drvState.lod[0].dimZ - 1),
     70         -1
     71     };
     72     const float4 m = (float4)(1.f / 255.f) * convert_float4(dims);
     73     const int4 coordMul = convert_int4(m * (float4)0x8000);
     74     const size_t stride_y = cp->mLUT->mHal.drvState.lod[0].stride;
     75     const size_t stride_z = stride_y * cp->mLUT->mHal.drvState.lod[0].dimY;
     76 
     77     //ALOGE("strides %zu %zu", stride_y, stride_z);
     78 
     79 #if defined(ARCH_ARM_USE_INTRINSICS)
     80     if (gArchUseSIMD) {
     81         int32_t len = x2 - x1;
     82         if(len > 0) {
     83             rsdIntrinsic3DLUT_K(out, in, len,
     84                                 bp, stride_y, stride_z,
     85                                 dims.x, dims.y, dims.z);
     86             x1 += len;
     87             out += len;
     88             in += len;
     89         }
     90     }
     91 #endif
     92 
     93     while (x1 < x2) {
     94         int4 baseCoord = convert_int4(*in) * coordMul;
     95         int4 coord1 = baseCoord >> (int4)15;
     96         //int4 coord2 = min(coord1 + 1, gDims - 1);
     97 
     98         int4 weight2 = baseCoord & 0x7fff;
     99         int4 weight1 = (int4)0x8000 - weight2;
    100 
    101         //ALOGE("coord1      %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w);
    102         const uchar *bp2 = bp + (coord1.x * 4) + (coord1.y * stride_y) + (coord1.z * stride_z);
    103         const uchar4 *pt_00 = (const uchar4 *)&bp2[0];
    104         const uchar4 *pt_10 = (const uchar4 *)&bp2[stride_y];
    105         const uchar4 *pt_01 = (const uchar4 *)&bp2[stride_z];
    106         const uchar4 *pt_11 = (const uchar4 *)&bp2[stride_y + stride_z];
    107 
    108         uint4 v000 = convert_uint4(pt_00[0]);
    109         uint4 v100 = convert_uint4(pt_00[1]);
    110         uint4 v010 = convert_uint4(pt_10[0]);
    111         uint4 v110 = convert_uint4(pt_10[1]);
    112         uint4 v001 = convert_uint4(pt_01[0]);
    113         uint4 v101 = convert_uint4(pt_01[1]);
    114         uint4 v011 = convert_uint4(pt_11[0]);
    115         uint4 v111 = convert_uint4(pt_11[1]);
    116 
    117         uint4 yz00 = ((v000 * weight1.x) + (v100 * weight2.x)) >> (int4)7;
    118         uint4 yz10 = ((v010 * weight1.x) + (v110 * weight2.x)) >> (int4)7;
    119         uint4 yz01 = ((v001 * weight1.x) + (v101 * weight2.x)) >> (int4)7;
    120         uint4 yz11 = ((v011 * weight1.x) + (v111 * weight2.x)) >> (int4)7;
    121 
    122         uint4 z0 = ((yz00 * weight1.y) + (yz10 * weight2.y)) >> (int4)15;
    123         uint4 z1 = ((yz01 * weight1.y) + (yz11 * weight2.y)) >> (int4)15;
    124 
    125         uint4 v = ((z0 * weight1.z) + (z1 * weight2.z)) >> (int4)15;
    126         uint4 v2 = (v + 0x7f) >> (int4)8;
    127 
    128         uchar4 ret = convert_uchar4(v2);
    129         ret.w = in->w;
    130 
    131         #if 0
    132         if (!x1) {
    133             ALOGE("in          %08x %08x %08x %08x", in->r, in->g, in->b, in->a);
    134             ALOGE("baseCoord   %08x %08x %08x %08x", baseCoord.x, baseCoord.y, baseCoord.z, baseCoord.w);
    135             ALOGE("coord1      %08x %08x %08x %08x", coord1.x, coord1.y, coord1.z, coord1.w);
    136             ALOGE("weight1     %08x %08x %08x %08x", weight1.x, weight1.y, weight1.z, weight1.w);
    137             ALOGE("weight2     %08x %08x %08x %08x", weight2.x, weight2.y, weight2.z, weight2.w);
    138 
    139             ALOGE("v000        %08x %08x %08x %08x", v000.x, v000.y, v000.z, v000.w);
    140             ALOGE("v100        %08x %08x %08x %08x", v100.x, v100.y, v100.z, v100.w);
    141             ALOGE("yz00        %08x %08x %08x %08x", yz00.x, yz00.y, yz00.z, yz00.w);
    142             ALOGE("z0          %08x %08x %08x %08x", z0.x, z0.y, z0.z, z0.w);
    143 
    144             ALOGE("v           %08x %08x %08x %08x", v.x, v.y, v.z, v.w);
    145             ALOGE("v2          %08x %08x %08x %08x", v2.x, v2.y, v2.z, v2.w);
    146         }
    147         #endif
    148         *out = ret;
    149 
    150 
    151         in++;
    152         out++;
    153         x1++;
    154     }
    155 }
    156 
    157 RsdCpuScriptIntrinsic3DLUT::RsdCpuScriptIntrinsic3DLUT(
    158     RsdCpuReferenceImpl *ctx, const Script *s, const Element *e) :
    159         RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_3DLUT) {
    160 
    161     mRootPtr = &kernel;
    162 }
    163 
    164 RsdCpuScriptIntrinsic3DLUT::~RsdCpuScriptIntrinsic3DLUT() {
    165 }
    166 
    167 void RsdCpuScriptIntrinsic3DLUT::populateScript(Script *s) {
    168     s->mHal.info.exportedVariableCount = 1;
    169 }
    170 
    171 void RsdCpuScriptIntrinsic3DLUT::invokeFreeChildren() {
    172     mLUT.clear();
    173 }
    174 
    175 RsdCpuScriptImpl * rsdIntrinsic_3DLUT(RsdCpuReferenceImpl *ctx,
    176                                     const Script *s, const Element *e) {
    177 
    178     return new RsdCpuScriptIntrinsic3DLUT(ctx, s, e);
    179 }
    180 
    181 } // namespace renderscript
    182 } // namespace android
    183