Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 using namespace android;
     22 using namespace android::renderscript;
     23 
     24 namespace android {
     25 namespace renderscript {
     26 
     27 
     28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
     29 public:
     30     virtual void populateScript(Script *);
     31 
     32     virtual ~RsdCpuScriptIntrinsicBlend();
     33     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     34 
     35 protected:
     36     static void kernel(const RsForEachStubParamStruct *p,
     37                           uint32_t xstart, uint32_t xend,
     38                           uint32_t instep, uint32_t outstep);
     39 };
     40 
     41 }
     42 }
     43 
     44 
     45 enum {
     46     BLEND_CLEAR = 0,
     47     BLEND_SRC = 1,
     48     BLEND_DST = 2,
     49     BLEND_SRC_OVER = 3,
     50     BLEND_DST_OVER = 4,
     51     BLEND_SRC_IN = 5,
     52     BLEND_DST_IN = 6,
     53     BLEND_SRC_OUT = 7,
     54     BLEND_DST_OUT = 8,
     55     BLEND_SRC_ATOP = 9,
     56     BLEND_DST_ATOP = 10,
     57     BLEND_XOR = 11,
     58 
     59     BLEND_NORMAL = 12,
     60     BLEND_AVERAGE = 13,
     61     BLEND_MULTIPLY = 14,
     62     BLEND_SCREEN = 15,
     63     BLEND_DARKEN = 16,
     64     BLEND_LIGHTEN = 17,
     65     BLEND_OVERLAY = 18,
     66     BLEND_HARDLIGHT = 19,
     67     BLEND_SOFTLIGHT = 20,
     68     BLEND_DIFFERENCE = 21,
     69     BLEND_NEGATION = 22,
     70     BLEND_EXCLUSION = 23,
     71     BLEND_COLOR_DODGE = 24,
     72     BLEND_INVERSE_COLOR_DODGE = 25,
     73     BLEND_SOFT_DODGE = 26,
     74     BLEND_COLOR_BURN = 27,
     75     BLEND_INVERSE_COLOR_BURN = 28,
     76     BLEND_SOFT_BURN = 29,
     77     BLEND_REFLECT = 30,
     78     BLEND_GLOW = 31,
     79     BLEND_FREEZE = 32,
     80     BLEND_HEAT = 33,
     81     BLEND_ADD = 34,
     82     BLEND_SUBTRACT = 35,
     83     BLEND_STAMP = 36,
     84     BLEND_RED = 37,
     85     BLEND_GREEN = 38,
     86     BLEND_BLUE = 39,
     87     BLEND_HUE = 40,
     88     BLEND_SATURATION = 41,
     89     BLEND_COLOR = 42,
     90     BLEND_LUMINOSITY = 43
     91 };
     92 
     93 extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
     94 extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
     95 extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
     96 extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
     97 extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
     98 extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
     99 extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
    100 extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
    101 extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
    102 extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
    103 extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
    104 extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
    105 
    106 //#undef ARCH_ARM_HAVE_NEON
    107 
    108 void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
    109                                         uint32_t xstart, uint32_t xend,
    110                                         uint32_t instep, uint32_t outstep) {
    111     RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
    112 
    113     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
    114     uchar4 *out = (uchar4 *)p->out;
    115     uchar4 *in = (uchar4 *)p->in;
    116     uint32_t x1 = xstart;
    117     uint32_t x2 = xend;
    118 
    119     switch (p->slot) {
    120     case BLEND_CLEAR:
    121         for (;x1 < x2; x1++, out++) {
    122             *out = 0;
    123         }
    124         break;
    125     case BLEND_SRC:
    126         for (;x1 < x2; x1++, out++, in++) {
    127           *out = *in;
    128         }
    129         break;
    130     //BLEND_DST is a NOP
    131     case BLEND_DST:
    132         break;
    133     case BLEND_SRC_OVER:
    134 #if defined(ARCH_ARM_HAVE_NEON)
    135         if((x1 + 8) < x2) {
    136             uint32_t len = (x2 - x1) >> 3;
    137             rsdIntrinsicBlendSrcOver_K(out, in, len);
    138             x1 += len << 3;
    139             out += len << 3;
    140             in += len << 3;
    141         }
    142 #endif
    143         for (;x1 < x2; x1++, out++, in++) {
    144             short4 in_s = convert_short4(*in);
    145             short4 out_s = convert_short4(*out);
    146             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
    147             *out = convert_uchar4(in_s);
    148         }
    149         break;
    150     case BLEND_DST_OVER:
    151 #if defined(ARCH_ARM_HAVE_NEON)
    152         if((x1 + 8) < x2) {
    153             uint32_t len = (x2 - x1) >> 3;
    154             rsdIntrinsicBlendDstOver_K(out, in, len);
    155             x1 += len << 3;
    156             out += len << 3;
    157             in += len << 3;
    158         }
    159 #endif
    160         for (;x1 < x2; x1++, out++, in++) {
    161             short4 in_s = convert_short4(*in);
    162             short4 out_s = convert_short4(*out);
    163             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
    164             *out = convert_uchar4(in_s);
    165         }
    166         break;
    167     case BLEND_SRC_IN:
    168 #if defined(ARCH_ARM_HAVE_NEON)
    169         if((x1 + 8) < x2) {
    170             uint32_t len = (x2 - x1) >> 3;
    171             rsdIntrinsicBlendSrcIn_K(out, in, len);
    172             x1 += len << 3;
    173             out += len << 3;
    174             in += len << 3;
    175         }
    176 #endif
    177         for (;x1 < x2; x1++, out++, in++) {
    178             short4 in_s = convert_short4(*in);
    179             in_s = (in_s * out->w) >> (short4)8;
    180             *out = convert_uchar4(in_s);
    181         }
    182         break;
    183     case BLEND_DST_IN:
    184 #if defined(ARCH_ARM_HAVE_NEON)
    185         if((x1 + 8) < x2) {
    186             uint32_t len = (x2 - x1) >> 3;
    187             rsdIntrinsicBlendDstIn_K(out, in, len);
    188             x1 += len << 3;
    189             out += len << 3;
    190             in += len << 3;
    191         }
    192 #endif
    193         for (;x1 < x2; x1++, out++, in++) {
    194             short4 out_s = convert_short4(*out);
    195             out_s = (out_s * in->w) >> (short4)8;
    196             *out = convert_uchar4(out_s);
    197         }
    198         break;
    199     case BLEND_SRC_OUT:
    200 #if defined(ARCH_ARM_HAVE_NEON)
    201         if((x1 + 8) < x2) {
    202             uint32_t len = (x2 - x1) >> 3;
    203             rsdIntrinsicBlendSrcOut_K(out, in, len);
    204             x1 += len << 3;
    205             out += len << 3;
    206             in += len << 3;
    207         }
    208 #endif
    209         for (;x1 < x2; x1++, out++, in++) {
    210             short4 in_s = convert_short4(*in);
    211             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
    212             *out = convert_uchar4(in_s);
    213         }
    214         break;
    215     case BLEND_DST_OUT:
    216 #if defined(ARCH_ARM_HAVE_NEON)
    217         if((x1 + 8) < x2) {
    218             uint32_t len = (x2 - x1) >> 3;
    219             rsdIntrinsicBlendDstOut_K(out, in, len);
    220             x1 += len << 3;
    221             out += len << 3;
    222             in += len << 3;
    223         }
    224 #endif
    225         for (;x1 < x2; x1++, out++, in++) {
    226             short4 out_s = convert_short4(*out);
    227             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
    228             *out = convert_uchar4(out_s);
    229         }
    230         break;
    231     case BLEND_SRC_ATOP:
    232 #if defined(ARCH_ARM_HAVE_NEON)
    233         if((x1 + 8) < x2) {
    234             uint32_t len = (x2 - x1) >> 3;
    235             rsdIntrinsicBlendSrcAtop_K(out, in, len);
    236             x1 += len << 3;
    237             out += len << 3;
    238             in += len << 3;
    239         }
    240 #endif
    241         for (;x1 < x2; x1++, out++, in++) {
    242             short4 in_s = convert_short4(*in);
    243             short4 out_s = convert_short4(*out);
    244             out_s.xyz = ((in_s.xyz * out_s.w) +
    245               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
    246             *out = convert_uchar4(out_s);
    247         }
    248         break;
    249     case BLEND_DST_ATOP:
    250 #if defined(ARCH_ARM_HAVE_NEON)
    251         if((x1 + 8) < x2) {
    252             uint32_t len = (x2 - x1) >> 3;
    253             rsdIntrinsicBlendDstAtop_K(out, in, len);
    254             x1 += len << 3;
    255             out += len << 3;
    256             in += len << 3;
    257         }
    258 #endif
    259         for (;x1 < x2; x1++, out++, in++) {
    260             short4 in_s = convert_short4(*in);
    261             short4 out_s = convert_short4(*out);
    262             out_s.xyz = ((out_s.xyz * in_s.w) +
    263               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
    264             *out = convert_uchar4(out_s);
    265         }
    266         break;
    267     case BLEND_XOR:
    268 #if defined(ARCH_ARM_HAVE_NEON)
    269         if((x1 + 8) < x2) {
    270             uint32_t len = (x2 - x1) >> 3;
    271             rsdIntrinsicBlendXor_K(out, in, len);
    272             x1 += len << 3;
    273             out += len << 3;
    274             in += len << 3;
    275         }
    276 #endif
    277         for (;x1 < x2; x1++, out++, in++) {
    278             *out = *in ^ *out;
    279         }
    280         break;
    281     case BLEND_NORMAL:
    282         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
    283         rsAssert(false);
    284         break;
    285     case BLEND_AVERAGE:
    286         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
    287         rsAssert(false);
    288         break;
    289     case BLEND_MULTIPLY:
    290 #if defined(ARCH_ARM_HAVE_NEON)
    291         if((x1 + 8) < x2) {
    292             uint32_t len = (x2 - x1) >> 3;
    293             rsdIntrinsicBlendMultiply_K(out, in, len);
    294             x1 += len << 3;
    295             out += len << 3;
    296             in += len << 3;
    297         }
    298 #endif
    299         for (;x1 < x2; x1++, out++, in++) {
    300           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
    301                                 >> (short4)8);
    302         }
    303         break;
    304     case BLEND_SCREEN:
    305         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
    306         rsAssert(false);
    307         break;
    308     case BLEND_DARKEN:
    309         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
    310         rsAssert(false);
    311         break;
    312     case BLEND_LIGHTEN:
    313         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
    314         rsAssert(false);
    315         break;
    316     case BLEND_OVERLAY:
    317         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
    318         rsAssert(false);
    319         break;
    320     case BLEND_HARDLIGHT:
    321         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
    322         rsAssert(false);
    323         break;
    324     case BLEND_SOFTLIGHT:
    325         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
    326         rsAssert(false);
    327         break;
    328     case BLEND_DIFFERENCE:
    329         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
    330         rsAssert(false);
    331         break;
    332     case BLEND_NEGATION:
    333         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
    334         rsAssert(false);
    335         break;
    336     case BLEND_EXCLUSION:
    337         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
    338         rsAssert(false);
    339         break;
    340     case BLEND_COLOR_DODGE:
    341         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
    342         rsAssert(false);
    343         break;
    344     case BLEND_INVERSE_COLOR_DODGE:
    345         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
    346         rsAssert(false);
    347         break;
    348     case BLEND_SOFT_DODGE:
    349         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
    350         rsAssert(false);
    351         break;
    352     case BLEND_COLOR_BURN:
    353         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
    354         rsAssert(false);
    355         break;
    356     case BLEND_INVERSE_COLOR_BURN:
    357         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
    358         rsAssert(false);
    359         break;
    360     case BLEND_SOFT_BURN:
    361         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
    362         rsAssert(false);
    363         break;
    364     case BLEND_REFLECT:
    365         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
    366         rsAssert(false);
    367         break;
    368     case BLEND_GLOW:
    369         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
    370         rsAssert(false);
    371         break;
    372     case BLEND_FREEZE:
    373         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
    374         rsAssert(false);
    375         break;
    376     case BLEND_HEAT:
    377         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
    378         rsAssert(false);
    379         break;
    380     case BLEND_ADD:
    381 #if defined(ARCH_ARM_HAVE_NEON)
    382         if((x1 + 8) < x2) {
    383             uint32_t len = (x2 - x1) >> 3;
    384             rsdIntrinsicBlendAdd_K(out, in, len);
    385             x1 += len << 3;
    386             out += len << 3;
    387             in += len << 3;
    388         }
    389 #endif
    390         for (;x1 < x2; x1++, out++, in++) {
    391             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    392                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    393             out->x = (oR + iR) > 255 ? 255 : oR + iR;
    394             out->y = (oG + iG) > 255 ? 255 : oG + iG;
    395             out->z = (oB + iB) > 255 ? 255 : oB + iB;
    396             out->w = (oA + iA) > 255 ? 255 : oA + iA;
    397         }
    398         break;
    399     case BLEND_SUBTRACT:
    400 #if defined(ARCH_ARM_HAVE_NEON)
    401         if((x1 + 8) < x2) {
    402             uint32_t len = (x2 - x1) >> 3;
    403             rsdIntrinsicBlendSub_K(out, in, len);
    404             x1 += len << 3;
    405             out += len << 3;
    406             in += len << 3;
    407         }
    408 #endif
    409         for (;x1 < x2; x1++, out++, in++) {
    410             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    411                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    412             out->x = (oR - iR) < 0 ? 0 : oR - iR;
    413             out->y = (oG - iG) < 0 ? 0 : oG - iG;
    414             out->z = (oB - iB) < 0 ? 0 : oB - iB;
    415             out->w = (oA - iA) < 0 ? 0 : oA - iA;
    416         }
    417         break;
    418     case BLEND_STAMP:
    419         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
    420         rsAssert(false);
    421         break;
    422     case BLEND_RED:
    423         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
    424         rsAssert(false);
    425         break;
    426     case BLEND_GREEN:
    427         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
    428         rsAssert(false);
    429         break;
    430     case BLEND_BLUE:
    431         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
    432         rsAssert(false);
    433         break;
    434     case BLEND_HUE:
    435         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
    436         rsAssert(false);
    437         break;
    438     case BLEND_SATURATION:
    439         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
    440         rsAssert(false);
    441         break;
    442     case BLEND_COLOR:
    443         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
    444         rsAssert(false);
    445         break;
    446     case BLEND_LUMINOSITY:
    447         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
    448         rsAssert(false);
    449         break;
    450 
    451     default:
    452         ALOGE("Called unimplemented value %d", p->slot);
    453         rsAssert(false);
    454 
    455     }
    456 }
    457 
    458 
    459 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
    460                                                        const Script *s, const Element *e)
    461             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
    462 
    463     mRootPtr = &kernel;
    464 }
    465 
    466 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
    467 }
    468 
    469 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
    470     s->mHal.info.exportedVariableCount = 0;
    471 }
    472 
    473 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
    474                                       const Script *s, const Element *e) {
    475     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
    476 }
    477 
    478 
    479 
    480