Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 using namespace android;
     22 using namespace android::renderscript;
     23 
     24 namespace android {
     25 namespace renderscript {
     26 
     27 
     28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
     29 public:
     30     virtual void populateScript(Script *);
     31 
     32     virtual ~RsdCpuScriptIntrinsicBlend();
     33     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     34 
     35 protected:
     36     static void kernel(const RsForEachStubParamStruct *p,
     37                           uint32_t xstart, uint32_t xend,
     38                           uint32_t instep, uint32_t outstep);
     39 };
     40 
     41 }
     42 }
     43 
     44 
     45 enum {
     46     BLEND_CLEAR = 0,
     47     BLEND_SRC = 1,
     48     BLEND_DST = 2,
     49     BLEND_SRC_OVER = 3,
     50     BLEND_DST_OVER = 4,
     51     BLEND_SRC_IN = 5,
     52     BLEND_DST_IN = 6,
     53     BLEND_SRC_OUT = 7,
     54     BLEND_DST_OUT = 8,
     55     BLEND_SRC_ATOP = 9,
     56     BLEND_DST_ATOP = 10,
     57     BLEND_XOR = 11,
     58 
     59     BLEND_NORMAL = 12,
     60     BLEND_AVERAGE = 13,
     61     BLEND_MULTIPLY = 14,
     62     BLEND_SCREEN = 15,
     63     BLEND_DARKEN = 16,
     64     BLEND_LIGHTEN = 17,
     65     BLEND_OVERLAY = 18,
     66     BLEND_HARDLIGHT = 19,
     67     BLEND_SOFTLIGHT = 20,
     68     BLEND_DIFFERENCE = 21,
     69     BLEND_NEGATION = 22,
     70     BLEND_EXCLUSION = 23,
     71     BLEND_COLOR_DODGE = 24,
     72     BLEND_INVERSE_COLOR_DODGE = 25,
     73     BLEND_SOFT_DODGE = 26,
     74     BLEND_COLOR_BURN = 27,
     75     BLEND_INVERSE_COLOR_BURN = 28,
     76     BLEND_SOFT_BURN = 29,
     77     BLEND_REFLECT = 30,
     78     BLEND_GLOW = 31,
     79     BLEND_FREEZE = 32,
     80     BLEND_HEAT = 33,
     81     BLEND_ADD = 34,
     82     BLEND_SUBTRACT = 35,
     83     BLEND_STAMP = 36,
     84     BLEND_RED = 37,
     85     BLEND_GREEN = 38,
     86     BLEND_BLUE = 39,
     87     BLEND_HUE = 40,
     88     BLEND_SATURATION = 41,
     89     BLEND_COLOR = 42,
     90     BLEND_LUMINOSITY = 43
     91 };
     92 
     93 extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
     94 extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
     95 extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
     96 extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
     97 extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
     98 extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
     99 extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
    100 extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
    101 extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
    102 extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
    103 extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
    104 extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
    105 
    106 void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
    107                                         uint32_t xstart, uint32_t xend,
    108                                         uint32_t instep, uint32_t outstep) {
    109     RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
    110 
    111     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
    112     uchar4 *out = (uchar4 *)p->out;
    113     uchar4 *in = (uchar4 *)p->in;
    114     uint32_t x1 = xstart;
    115     uint32_t x2 = xend;
    116 
    117     switch (p->slot) {
    118     case BLEND_CLEAR:
    119         for (;x1 < x2; x1++, out++) {
    120             *out = 0;
    121         }
    122         break;
    123     case BLEND_SRC:
    124         for (;x1 < x2; x1++, out++, in++) {
    125           *out = *in;
    126         }
    127         break;
    128     //BLEND_DST is a NOP
    129     case BLEND_DST:
    130         break;
    131     case BLEND_SRC_OVER:
    132 #if defined(ARCH_ARM_HAVE_VFP)
    133         if (gArchUseSIMD) {
    134             if((x1 + 8) < x2) {
    135                 uint32_t len = (x2 - x1) >> 3;
    136                 rsdIntrinsicBlendSrcOver_K(out, in, len);
    137                 x1 += len << 3;
    138                 out += len << 3;
    139                 in += len << 3;
    140             }
    141         }
    142 #endif
    143         for (;x1 < x2; x1++, out++, in++) {
    144             short4 in_s = convert_short4(*in);
    145             short4 out_s = convert_short4(*out);
    146             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
    147             *out = convert_uchar4(in_s);
    148         }
    149         break;
    150     case BLEND_DST_OVER:
    151 #if defined(ARCH_ARM_HAVE_VFP)
    152         if (gArchUseSIMD) {
    153             if((x1 + 8) < x2) {
    154                 uint32_t len = (x2 - x1) >> 3;
    155                 rsdIntrinsicBlendDstOver_K(out, in, len);
    156                 x1 += len << 3;
    157                 out += len << 3;
    158                 in += len << 3;
    159             }
    160         }
    161 #endif
    162         for (;x1 < x2; x1++, out++, in++) {
    163             short4 in_s = convert_short4(*in);
    164             short4 out_s = convert_short4(*out);
    165             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
    166             *out = convert_uchar4(in_s);
    167         }
    168         break;
    169     case BLEND_SRC_IN:
    170 #if defined(ARCH_ARM_HAVE_VFP)
    171         if (gArchUseSIMD) {
    172             if((x1 + 8) < x2) {
    173                 uint32_t len = (x2 - x1) >> 3;
    174                 rsdIntrinsicBlendSrcIn_K(out, in, len);
    175                 x1 += len << 3;
    176                 out += len << 3;
    177                 in += len << 3;
    178             }
    179         }
    180 #endif
    181         for (;x1 < x2; x1++, out++, in++) {
    182             short4 in_s = convert_short4(*in);
    183             in_s = (in_s * out->w) >> (short4)8;
    184             *out = convert_uchar4(in_s);
    185         }
    186         break;
    187     case BLEND_DST_IN:
    188 #if defined(ARCH_ARM_HAVE_VFP)
    189         if (gArchUseSIMD) {
    190             if((x1 + 8) < x2) {
    191                 uint32_t len = (x2 - x1) >> 3;
    192                 rsdIntrinsicBlendDstIn_K(out, in, len);
    193                 x1 += len << 3;
    194                 out += len << 3;
    195                 in += len << 3;
    196             }
    197         }
    198 #endif
    199         for (;x1 < x2; x1++, out++, in++) {
    200             short4 out_s = convert_short4(*out);
    201             out_s = (out_s * in->w) >> (short4)8;
    202             *out = convert_uchar4(out_s);
    203         }
    204         break;
    205     case BLEND_SRC_OUT:
    206 #if defined(ARCH_ARM_HAVE_VFP)
    207         if (gArchUseSIMD) {
    208             if((x1 + 8) < x2) {
    209                 uint32_t len = (x2 - x1) >> 3;
    210                 rsdIntrinsicBlendSrcOut_K(out, in, len);
    211                 x1 += len << 3;
    212                 out += len << 3;
    213                 in += len << 3;
    214             }
    215         }
    216 #endif
    217         for (;x1 < x2; x1++, out++, in++) {
    218             short4 in_s = convert_short4(*in);
    219             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
    220             *out = convert_uchar4(in_s);
    221         }
    222         break;
    223     case BLEND_DST_OUT:
    224 #if defined(ARCH_ARM_HAVE_VFP)
    225         if (gArchUseSIMD) {
    226             if((x1 + 8) < x2) {
    227                 uint32_t len = (x2 - x1) >> 3;
    228                 rsdIntrinsicBlendDstOut_K(out, in, len);
    229                 x1 += len << 3;
    230                 out += len << 3;
    231                 in += len << 3;
    232             }
    233         }
    234 #endif
    235         for (;x1 < x2; x1++, out++, in++) {
    236             short4 out_s = convert_short4(*out);
    237             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
    238             *out = convert_uchar4(out_s);
    239         }
    240         break;
    241     case BLEND_SRC_ATOP:
    242 #if defined(ARCH_ARM_HAVE_VFP)
    243         if (gArchUseSIMD) {
    244             if((x1 + 8) < x2) {
    245                 uint32_t len = (x2 - x1) >> 3;
    246                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
    247                 x1 += len << 3;
    248                 out += len << 3;
    249                 in += len << 3;
    250             }
    251         }
    252 #endif
    253         for (;x1 < x2; x1++, out++, in++) {
    254             short4 in_s = convert_short4(*in);
    255             short4 out_s = convert_short4(*out);
    256             out_s.xyz = ((in_s.xyz * out_s.w) +
    257               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
    258             *out = convert_uchar4(out_s);
    259         }
    260         break;
    261     case BLEND_DST_ATOP:
    262 #if defined(ARCH_ARM_HAVE_VFP)
    263         if (gArchUseSIMD) {
    264             if((x1 + 8) < x2) {
    265                 uint32_t len = (x2 - x1) >> 3;
    266                 rsdIntrinsicBlendDstAtop_K(out, in, len);
    267                 x1 += len << 3;
    268                 out += len << 3;
    269                 in += len << 3;
    270             }
    271         }
    272 #endif
    273         for (;x1 < x2; x1++, out++, in++) {
    274             short4 in_s = convert_short4(*in);
    275             short4 out_s = convert_short4(*out);
    276             out_s.xyz = ((out_s.xyz * in_s.w) +
    277               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
    278             *out = convert_uchar4(out_s);
    279         }
    280         break;
    281     case BLEND_XOR:
    282 #if defined(ARCH_ARM_HAVE_VFP)
    283         if (gArchUseSIMD) {
    284             if((x1 + 8) < x2) {
    285                 uint32_t len = (x2 - x1) >> 3;
    286                 rsdIntrinsicBlendXor_K(out, in, len);
    287                 x1 += len << 3;
    288                 out += len << 3;
    289                 in += len << 3;
    290             }
    291         }
    292 #endif
    293         for (;x1 < x2; x1++, out++, in++) {
    294             *out = *in ^ *out;
    295         }
    296         break;
    297     case BLEND_NORMAL:
    298         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
    299         rsAssert(false);
    300         break;
    301     case BLEND_AVERAGE:
    302         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
    303         rsAssert(false);
    304         break;
    305     case BLEND_MULTIPLY:
    306 #if defined(ARCH_ARM_HAVE_VFP)
    307         if (gArchUseSIMD) {
    308             if((x1 + 8) < x2) {
    309                 uint32_t len = (x2 - x1) >> 3;
    310                 rsdIntrinsicBlendMultiply_K(out, in, len);
    311                 x1 += len << 3;
    312                 out += len << 3;
    313                 in += len << 3;
    314             }
    315         }
    316 #endif
    317         for (;x1 < x2; x1++, out++, in++) {
    318           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
    319                                 >> (short4)8);
    320         }
    321         break;
    322     case BLEND_SCREEN:
    323         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
    324         rsAssert(false);
    325         break;
    326     case BLEND_DARKEN:
    327         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
    328         rsAssert(false);
    329         break;
    330     case BLEND_LIGHTEN:
    331         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
    332         rsAssert(false);
    333         break;
    334     case BLEND_OVERLAY:
    335         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
    336         rsAssert(false);
    337         break;
    338     case BLEND_HARDLIGHT:
    339         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
    340         rsAssert(false);
    341         break;
    342     case BLEND_SOFTLIGHT:
    343         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
    344         rsAssert(false);
    345         break;
    346     case BLEND_DIFFERENCE:
    347         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
    348         rsAssert(false);
    349         break;
    350     case BLEND_NEGATION:
    351         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
    352         rsAssert(false);
    353         break;
    354     case BLEND_EXCLUSION:
    355         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
    356         rsAssert(false);
    357         break;
    358     case BLEND_COLOR_DODGE:
    359         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
    360         rsAssert(false);
    361         break;
    362     case BLEND_INVERSE_COLOR_DODGE:
    363         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
    364         rsAssert(false);
    365         break;
    366     case BLEND_SOFT_DODGE:
    367         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
    368         rsAssert(false);
    369         break;
    370     case BLEND_COLOR_BURN:
    371         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
    372         rsAssert(false);
    373         break;
    374     case BLEND_INVERSE_COLOR_BURN:
    375         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
    376         rsAssert(false);
    377         break;
    378     case BLEND_SOFT_BURN:
    379         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
    380         rsAssert(false);
    381         break;
    382     case BLEND_REFLECT:
    383         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
    384         rsAssert(false);
    385         break;
    386     case BLEND_GLOW:
    387         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
    388         rsAssert(false);
    389         break;
    390     case BLEND_FREEZE:
    391         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
    392         rsAssert(false);
    393         break;
    394     case BLEND_HEAT:
    395         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
    396         rsAssert(false);
    397         break;
    398     case BLEND_ADD:
    399 #if defined(ARCH_ARM_HAVE_VFP)
    400         if (gArchUseSIMD) {
    401             if((x1 + 8) < x2) {
    402                 uint32_t len = (x2 - x1) >> 3;
    403                 rsdIntrinsicBlendAdd_K(out, in, len);
    404                 x1 += len << 3;
    405                 out += len << 3;
    406                 in += len << 3;
    407             }
    408         }
    409 #endif
    410         for (;x1 < x2; x1++, out++, in++) {
    411             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    412                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    413             out->x = (oR + iR) > 255 ? 255 : oR + iR;
    414             out->y = (oG + iG) > 255 ? 255 : oG + iG;
    415             out->z = (oB + iB) > 255 ? 255 : oB + iB;
    416             out->w = (oA + iA) > 255 ? 255 : oA + iA;
    417         }
    418         break;
    419     case BLEND_SUBTRACT:
    420 #if defined(ARCH_ARM_HAVE_VFP)
    421         if (gArchUseSIMD) {
    422             if((x1 + 8) < x2) {
    423                 uint32_t len = (x2 - x1) >> 3;
    424                 rsdIntrinsicBlendSub_K(out, in, len);
    425                 x1 += len << 3;
    426                 out += len << 3;
    427                 in += len << 3;
    428             }
    429         }
    430 #endif
    431         for (;x1 < x2; x1++, out++, in++) {
    432             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    433                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    434             out->x = (oR - iR) < 0 ? 0 : oR - iR;
    435             out->y = (oG - iG) < 0 ? 0 : oG - iG;
    436             out->z = (oB - iB) < 0 ? 0 : oB - iB;
    437             out->w = (oA - iA) < 0 ? 0 : oA - iA;
    438         }
    439         break;
    440     case BLEND_STAMP:
    441         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
    442         rsAssert(false);
    443         break;
    444     case BLEND_RED:
    445         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
    446         rsAssert(false);
    447         break;
    448     case BLEND_GREEN:
    449         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
    450         rsAssert(false);
    451         break;
    452     case BLEND_BLUE:
    453         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
    454         rsAssert(false);
    455         break;
    456     case BLEND_HUE:
    457         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
    458         rsAssert(false);
    459         break;
    460     case BLEND_SATURATION:
    461         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
    462         rsAssert(false);
    463         break;
    464     case BLEND_COLOR:
    465         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
    466         rsAssert(false);
    467         break;
    468     case BLEND_LUMINOSITY:
    469         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
    470         rsAssert(false);
    471         break;
    472 
    473     default:
    474         ALOGE("Called unimplemented value %d", p->slot);
    475         rsAssert(false);
    476 
    477     }
    478 }
    479 
    480 
    481 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
    482                                                        const Script *s, const Element *e)
    483             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
    484 
    485     mRootPtr = &kernel;
    486 }
    487 
    488 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
    489 }
    490 
    491 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
    492     s->mHal.info.exportedVariableCount = 0;
    493 }
    494 
    495 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
    496                                       const Script *s, const Element *e) {
    497     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
    498 }
    499 
    500 
    501 
    502