Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 using namespace android;
     22 using namespace android::renderscript;
     23 
     24 namespace android {
     25 namespace renderscript {
     26 
     27 
     28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
     29 public:
     30     virtual void populateScript(Script *);
     31 
     32     virtual ~RsdCpuScriptIntrinsicBlend();
     33     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     34 
     35 protected:
     36     static void kernel(const RsForEachStubParamStruct *p,
     37                           uint32_t xstart, uint32_t xend,
     38                           uint32_t instep, uint32_t outstep);
     39 };
     40 
     41 }
     42 }
     43 
     44 
     45 enum {
     46     BLEND_CLEAR = 0,
     47     BLEND_SRC = 1,
     48     BLEND_DST = 2,
     49     BLEND_SRC_OVER = 3,
     50     BLEND_DST_OVER = 4,
     51     BLEND_SRC_IN = 5,
     52     BLEND_DST_IN = 6,
     53     BLEND_SRC_OUT = 7,
     54     BLEND_DST_OUT = 8,
     55     BLEND_SRC_ATOP = 9,
     56     BLEND_DST_ATOP = 10,
     57     BLEND_XOR = 11,
     58 
     59     BLEND_NORMAL = 12,
     60     BLEND_AVERAGE = 13,
     61     BLEND_MULTIPLY = 14,
     62     BLEND_SCREEN = 15,
     63     BLEND_DARKEN = 16,
     64     BLEND_LIGHTEN = 17,
     65     BLEND_OVERLAY = 18,
     66     BLEND_HARDLIGHT = 19,
     67     BLEND_SOFTLIGHT = 20,
     68     BLEND_DIFFERENCE = 21,
     69     BLEND_NEGATION = 22,
     70     BLEND_EXCLUSION = 23,
     71     BLEND_COLOR_DODGE = 24,
     72     BLEND_INVERSE_COLOR_DODGE = 25,
     73     BLEND_SOFT_DODGE = 26,
     74     BLEND_COLOR_BURN = 27,
     75     BLEND_INVERSE_COLOR_BURN = 28,
     76     BLEND_SOFT_BURN = 29,
     77     BLEND_REFLECT = 30,
     78     BLEND_GLOW = 31,
     79     BLEND_FREEZE = 32,
     80     BLEND_HEAT = 33,
     81     BLEND_ADD = 34,
     82     BLEND_SUBTRACT = 35,
     83     BLEND_STAMP = 36,
     84     BLEND_RED = 37,
     85     BLEND_GREEN = 38,
     86     BLEND_BLUE = 39,
     87     BLEND_HUE = 40,
     88     BLEND_SATURATION = 41,
     89     BLEND_COLOR = 42,
     90     BLEND_LUMINOSITY = 43
     91 };
     92 
     93 #if defined(ARCH_ARM_USE_INTRINSICS)
     94 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
     95                     uint32_t xstart, uint32_t xend);
     96 #endif
     97 
     98 #if defined(ARCH_X86_HAVE_SSSE3)
     99 extern "C" void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
    100 extern "C" void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
    101 extern "C" void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
    102 extern "C" void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
    103 extern "C" void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
    104 extern "C" void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
    105 extern "C" void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
    106 extern "C" void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
    107 extern "C" void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
    108 extern "C" void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
    109 extern "C" void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
    110 extern "C" void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
    111 #endif
    112 
    113 void RsdCpuScriptIntrinsicBlend::kernel(const RsForEachStubParamStruct *p,
    114                                         uint32_t xstart, uint32_t xend,
    115                                         uint32_t instep, uint32_t outstep) {
    116     RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)p->usr;
    117 
    118     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
    119     uchar4 *out = (uchar4 *)p->out;
    120     uchar4 *in = (uchar4 *)p->in;
    121     uint32_t x1 = xstart;
    122     uint32_t x2 = xend;
    123 
    124 #if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
    125     if (gArchUseSIMD) {
    126         if (rsdIntrinsicBlend_K(out, in, p->slot, x1, x2) >= 0)
    127             return;
    128     }
    129 #endif
    130     switch (p->slot) {
    131     case BLEND_CLEAR:
    132         for (;x1 < x2; x1++, out++) {
    133             *out = 0;
    134         }
    135         break;
    136     case BLEND_SRC:
    137         for (;x1 < x2; x1++, out++, in++) {
    138           *out = *in;
    139         }
    140         break;
    141     //BLEND_DST is a NOP
    142     case BLEND_DST:
    143         break;
    144     case BLEND_SRC_OVER:
    145     #if defined(ARCH_X86_HAVE_SSSE3)
    146         if (gArchUseSIMD) {
    147             if ((x1 + 8) < x2) {
    148                 uint32_t len = (x2 - x1) >> 3;
    149                 rsdIntrinsicBlendSrcOver_K(out, in, len);
    150                 x1 += len << 3;
    151                 out += len << 3;
    152                 in += len << 3;
    153             }
    154         }
    155     #endif
    156         for (;x1 < x2; x1++, out++, in++) {
    157             short4 in_s = convert_short4(*in);
    158             short4 out_s = convert_short4(*out);
    159             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
    160             *out = convert_uchar4(in_s);
    161         }
    162         break;
    163     case BLEND_DST_OVER:
    164     #if defined(ARCH_X86_HAVE_SSSE3)
    165         if (gArchUseSIMD) {
    166             if ((x1 + 8) < x2) {
    167                 uint32_t len = (x2 - x1) >> 3;
    168                 rsdIntrinsicBlendDstOver_K(out, in, len);
    169                 x1 += len << 3;
    170                 out += len << 3;
    171                 in += len << 3;
    172             }
    173         }
    174      #endif
    175         for (;x1 < x2; x1++, out++, in++) {
    176             short4 in_s = convert_short4(*in);
    177             short4 out_s = convert_short4(*out);
    178             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
    179             *out = convert_uchar4(in_s);
    180         }
    181         break;
    182     case BLEND_SRC_IN:
    183     #if defined(ARCH_X86_HAVE_SSSE3)
    184         if (gArchUseSIMD) {
    185             if ((x1 + 8) < x2) {
    186                 uint32_t len = (x2 - x1) >> 3;
    187                 rsdIntrinsicBlendSrcIn_K(out, in, len);
    188                 x1 += len << 3;
    189                 out += len << 3;
    190                 in += len << 3;
    191             }
    192         }
    193     #endif
    194         for (;x1 < x2; x1++, out++, in++) {
    195             short4 in_s = convert_short4(*in);
    196             in_s = (in_s * out->w) >> (short4)8;
    197             *out = convert_uchar4(in_s);
    198         }
    199         break;
    200     case BLEND_DST_IN:
    201     #if defined(ARCH_X86_HAVE_SSSE3)
    202         if (gArchUseSIMD) {
    203             if ((x1 + 8) < x2) {
    204                 uint32_t len = (x2 - x1) >> 3;
    205                 rsdIntrinsicBlendDstIn_K(out, in, len);
    206                 x1 += len << 3;
    207                 out += len << 3;
    208                 in += len << 3;
    209             }
    210         }
    211      #endif
    212         for (;x1 < x2; x1++, out++, in++) {
    213             short4 out_s = convert_short4(*out);
    214             out_s = (out_s * in->w) >> (short4)8;
    215             *out = convert_uchar4(out_s);
    216         }
    217         break;
    218     case BLEND_SRC_OUT:
    219     #if defined(ARCH_X86_HAVE_SSSE3)
    220         if (gArchUseSIMD) {
    221             if ((x1 + 8) < x2) {
    222                 uint32_t len = (x2 - x1) >> 3;
    223                 rsdIntrinsicBlendSrcOut_K(out, in, len);
    224                 x1 += len << 3;
    225                 out += len << 3;
    226                 in += len << 3;
    227             }
    228         }
    229     #endif
    230         for (;x1 < x2; x1++, out++, in++) {
    231             short4 in_s = convert_short4(*in);
    232             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
    233             *out = convert_uchar4(in_s);
    234         }
    235         break;
    236     case BLEND_DST_OUT:
    237     #if defined(ARCH_X86_HAVE_SSSE3)
    238         if (gArchUseSIMD) {
    239             if ((x1 + 8) < x2) {
    240                 uint32_t len = (x2 - x1) >> 3;
    241                 rsdIntrinsicBlendDstOut_K(out, in, len);
    242                 x1 += len << 3;
    243                 out += len << 3;
    244                 in += len << 3;
    245             }
    246         }
    247     #endif
    248         for (;x1 < x2; x1++, out++, in++) {
    249             short4 out_s = convert_short4(*out);
    250             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
    251             *out = convert_uchar4(out_s);
    252         }
    253         break;
    254     case BLEND_SRC_ATOP:
    255     #if defined(ARCH_X86_HAVE_SSSE3)
    256         if (gArchUseSIMD) {
    257             if ((x1 + 8) < x2) {
    258                 uint32_t len = (x2 - x1) >> 3;
    259                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
    260                 x1 += len << 3;
    261                 out += len << 3;
    262                 in += len << 3;
    263             }
    264         }
    265     #endif
    266         for (;x1 < x2; x1++, out++, in++) {
    267             short4 in_s = convert_short4(*in);
    268             short4 out_s = convert_short4(*out);
    269             out_s.xyz = ((in_s.xyz * out_s.w) +
    270               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
    271             *out = convert_uchar4(out_s);
    272         }
    273         break;
    274     case BLEND_DST_ATOP:
    275     #if defined(ARCH_X86_HAVE_SSSE3)
    276         if (gArchUseSIMD) {
    277             if ((x1 + 8) < x2) {
    278                 uint32_t len = (x2 - x1) >> 3;
    279                 rsdIntrinsicBlendDstAtop_K(out, in, len);
    280                 x1 += len << 3;
    281                 out += len << 3;
    282                 in += len << 3;
    283             }
    284         }
    285      #endif
    286         for (;x1 < x2; x1++, out++, in++) {
    287             short4 in_s = convert_short4(*in);
    288             short4 out_s = convert_short4(*out);
    289             out_s.xyz = ((out_s.xyz * in_s.w) +
    290               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
    291             *out = convert_uchar4(out_s);
    292         }
    293         break;
    294     case BLEND_XOR:
    295     #if defined(ARCH_X86_HAVE_SSSE3)
    296         if (gArchUseSIMD) {
    297             if ((x1 + 8) < x2) {
    298                 uint32_t len = (x2 - x1) >> 3;
    299                 rsdIntrinsicBlendXor_K(out, in, len);
    300                 x1 += len << 3;
    301                 out += len << 3;
    302                 in += len << 3;
    303             }
    304         }
    305     #endif
    306         for (;x1 < x2; x1++, out++, in++) {
    307             *out = *in ^ *out;
    308         }
    309         break;
    310     case BLEND_NORMAL:
    311         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
    312         rsAssert(false);
    313         break;
    314     case BLEND_AVERAGE:
    315         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
    316         rsAssert(false);
    317         break;
    318     case BLEND_MULTIPLY:
    319     #if defined(ARCH_X86_HAVE_SSSE3)
    320         if (gArchUseSIMD) {
    321             if ((x1 + 8) < x2) {
    322                 uint32_t len = (x2 - x1) >> 3;
    323                 rsdIntrinsicBlendMultiply_K(out, in, len);
    324                 x1 += len << 3;
    325                 out += len << 3;
    326                 in += len << 3;
    327             }
    328         }
    329     #endif
    330         for (;x1 < x2; x1++, out++, in++) {
    331           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
    332                                 >> (short4)8);
    333         }
    334         break;
    335     case BLEND_SCREEN:
    336         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
    337         rsAssert(false);
    338         break;
    339     case BLEND_DARKEN:
    340         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
    341         rsAssert(false);
    342         break;
    343     case BLEND_LIGHTEN:
    344         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
    345         rsAssert(false);
    346         break;
    347     case BLEND_OVERLAY:
    348         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
    349         rsAssert(false);
    350         break;
    351     case BLEND_HARDLIGHT:
    352         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
    353         rsAssert(false);
    354         break;
    355     case BLEND_SOFTLIGHT:
    356         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
    357         rsAssert(false);
    358         break;
    359     case BLEND_DIFFERENCE:
    360         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
    361         rsAssert(false);
    362         break;
    363     case BLEND_NEGATION:
    364         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
    365         rsAssert(false);
    366         break;
    367     case BLEND_EXCLUSION:
    368         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
    369         rsAssert(false);
    370         break;
    371     case BLEND_COLOR_DODGE:
    372         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
    373         rsAssert(false);
    374         break;
    375     case BLEND_INVERSE_COLOR_DODGE:
    376         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
    377         rsAssert(false);
    378         break;
    379     case BLEND_SOFT_DODGE:
    380         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
    381         rsAssert(false);
    382         break;
    383     case BLEND_COLOR_BURN:
    384         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
    385         rsAssert(false);
    386         break;
    387     case BLEND_INVERSE_COLOR_BURN:
    388         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
    389         rsAssert(false);
    390         break;
    391     case BLEND_SOFT_BURN:
    392         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
    393         rsAssert(false);
    394         break;
    395     case BLEND_REFLECT:
    396         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
    397         rsAssert(false);
    398         break;
    399     case BLEND_GLOW:
    400         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
    401         rsAssert(false);
    402         break;
    403     case BLEND_FREEZE:
    404         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
    405         rsAssert(false);
    406         break;
    407     case BLEND_HEAT:
    408         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
    409         rsAssert(false);
    410         break;
    411     case BLEND_ADD:
    412     #if defined(ARCH_X86_HAVE_SSSE3)
    413         if (gArchUseSIMD) {
    414             if((x1 + 8) < x2) {
    415                 uint32_t len = (x2 - x1) >> 3;
    416                 rsdIntrinsicBlendAdd_K(out, in, len);
    417                 x1 += len << 3;
    418                 out += len << 3;
    419                 in += len << 3;
    420             }
    421         }
    422     #endif
    423         for (;x1 < x2; x1++, out++, in++) {
    424             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    425                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    426             out->x = (oR + iR) > 255 ? 255 : oR + iR;
    427             out->y = (oG + iG) > 255 ? 255 : oG + iG;
    428             out->z = (oB + iB) > 255 ? 255 : oB + iB;
    429             out->w = (oA + iA) > 255 ? 255 : oA + iA;
    430         }
    431         break;
    432     case BLEND_SUBTRACT:
    433     #if defined(ARCH_X86_HAVE_SSSE3)
    434         if (gArchUseSIMD) {
    435             if((x1 + 8) < x2) {
    436                 uint32_t len = (x2 - x1) >> 3;
    437                 rsdIntrinsicBlendSub_K(out, in, len);
    438                 x1 += len << 3;
    439                 out += len << 3;
    440                 in += len << 3;
    441             }
    442         }
    443     #endif
    444         for (;x1 < x2; x1++, out++, in++) {
    445             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    446                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    447             out->x = (oR - iR) < 0 ? 0 : oR - iR;
    448             out->y = (oG - iG) < 0 ? 0 : oG - iG;
    449             out->z = (oB - iB) < 0 ? 0 : oB - iB;
    450             out->w = (oA - iA) < 0 ? 0 : oA - iA;
    451         }
    452         break;
    453     case BLEND_STAMP:
    454         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
    455         rsAssert(false);
    456         break;
    457     case BLEND_RED:
    458         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
    459         rsAssert(false);
    460         break;
    461     case BLEND_GREEN:
    462         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
    463         rsAssert(false);
    464         break;
    465     case BLEND_BLUE:
    466         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
    467         rsAssert(false);
    468         break;
    469     case BLEND_HUE:
    470         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
    471         rsAssert(false);
    472         break;
    473     case BLEND_SATURATION:
    474         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
    475         rsAssert(false);
    476         break;
    477     case BLEND_COLOR:
    478         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
    479         rsAssert(false);
    480         break;
    481     case BLEND_LUMINOSITY:
    482         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
    483         rsAssert(false);
    484         break;
    485 
    486     default:
    487         ALOGE("Called unimplemented value %d", p->slot);
    488         rsAssert(false);
    489 
    490     }
    491 }
    492 
    493 
    494 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
    495                                                        const Script *s, const Element *e)
    496             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
    497 
    498     mRootPtr = &kernel;
    499 }
    500 
    501 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
    502 }
    503 
    504 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
    505     s->mHal.info.exportedVariableCount = 0;
    506 }
    507 
    508 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
    509                                       const Script *s, const Element *e) {
    510     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
    511 }
    512 
    513 
    514 
    515