Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 namespace android {
     22 namespace renderscript {
     23 
     24 
     25 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
     26 public:
     27     void populateScript(Script *) override;
     28 
     29     ~RsdCpuScriptIntrinsicBlend() override;
     30     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     31 
     32 protected:
     33     static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
     34                        uint32_t xend, uint32_t outstep);
     35 };
     36 
     37 } // namespace renderscript
     38 } // namespace android
     39 
     40 
     41 enum {
     42     BLEND_CLEAR = 0,
     43     BLEND_SRC = 1,
     44     BLEND_DST = 2,
     45     BLEND_SRC_OVER = 3,
     46     BLEND_DST_OVER = 4,
     47     BLEND_SRC_IN = 5,
     48     BLEND_DST_IN = 6,
     49     BLEND_SRC_OUT = 7,
     50     BLEND_DST_OUT = 8,
     51     BLEND_SRC_ATOP = 9,
     52     BLEND_DST_ATOP = 10,
     53     BLEND_XOR = 11,
     54 
     55     BLEND_NORMAL = 12,
     56     BLEND_AVERAGE = 13,
     57     BLEND_MULTIPLY = 14,
     58     BLEND_SCREEN = 15,
     59     BLEND_DARKEN = 16,
     60     BLEND_LIGHTEN = 17,
     61     BLEND_OVERLAY = 18,
     62     BLEND_HARDLIGHT = 19,
     63     BLEND_SOFTLIGHT = 20,
     64     BLEND_DIFFERENCE = 21,
     65     BLEND_NEGATION = 22,
     66     BLEND_EXCLUSION = 23,
     67     BLEND_COLOR_DODGE = 24,
     68     BLEND_INVERSE_COLOR_DODGE = 25,
     69     BLEND_SOFT_DODGE = 26,
     70     BLEND_COLOR_BURN = 27,
     71     BLEND_INVERSE_COLOR_BURN = 28,
     72     BLEND_SOFT_BURN = 29,
     73     BLEND_REFLECT = 30,
     74     BLEND_GLOW = 31,
     75     BLEND_FREEZE = 32,
     76     BLEND_HEAT = 33,
     77     BLEND_ADD = 34,
     78     BLEND_SUBTRACT = 35,
     79     BLEND_STAMP = 36,
     80     BLEND_RED = 37,
     81     BLEND_GREEN = 38,
     82     BLEND_BLUE = 39,
     83     BLEND_HUE = 40,
     84     BLEND_SATURATION = 41,
     85     BLEND_COLOR = 42,
     86     BLEND_LUMINOSITY = 43
     87 };
     88 
     89 #if defined(ARCH_ARM_USE_INTRINSICS)
     90 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
     91                     uint32_t xstart, uint32_t xend);
     92 #endif
     93 
     94 #if defined(ARCH_X86_HAVE_SSSE3)
     95 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
     96 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
     97 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
     98 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
     99 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
    100 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
    101 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
    102 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
    103 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
    104 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
    105 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
    106 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
    107 #endif
    108 
    109 namespace android {
    110 namespace renderscript {
    111 
    112 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
    113                                         uint32_t xstart, uint32_t xend,
    114                                         uint32_t outstep) {
    115     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
    116     uchar4 *out = (uchar4 *)info->outPtr[0];
    117     uchar4 *in = (uchar4 *)info->inPtr[0];
    118     uint32_t x1 = xstart;
    119     uint32_t x2 = xend;
    120 
    121 #if defined(ARCH_ARM_USE_INTRINSICS)
    122     if (gArchUseSIMD) {
    123         if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
    124             return;
    125     }
    126 #endif
    127     switch (info->slot) {
    128     case BLEND_CLEAR:
    129         for (;x1 < x2; x1++, out++) {
    130             *out = 0;
    131         }
    132         break;
    133     case BLEND_SRC:
    134         for (;x1 < x2; x1++, out++, in++) {
    135           *out = *in;
    136         }
    137         break;
    138     //BLEND_DST is a NOP
    139     case BLEND_DST:
    140         break;
    141     case BLEND_SRC_OVER:
    142     #if defined(ARCH_X86_HAVE_SSSE3)
    143         if (gArchUseSIMD) {
    144             if ((x1 + 8) < x2) {
    145                 uint32_t len = (x2 - x1) >> 3;
    146                 rsdIntrinsicBlendSrcOver_K(out, in, len);
    147                 x1 += len << 3;
    148                 out += len << 3;
    149                 in += len << 3;
    150             }
    151         }
    152     #endif
    153         for (;x1 < x2; x1++, out++, in++) {
    154             short4 in_s = convert_short4(*in);
    155             short4 out_s = convert_short4(*out);
    156             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
    157             *out = convert_uchar4(in_s);
    158         }
    159         break;
    160     case BLEND_DST_OVER:
    161     #if defined(ARCH_X86_HAVE_SSSE3)
    162         if (gArchUseSIMD) {
    163             if ((x1 + 8) < x2) {
    164                 uint32_t len = (x2 - x1) >> 3;
    165                 rsdIntrinsicBlendDstOver_K(out, in, len);
    166                 x1 += len << 3;
    167                 out += len << 3;
    168                 in += len << 3;
    169             }
    170         }
    171      #endif
    172         for (;x1 < x2; x1++, out++, in++) {
    173             short4 in_s = convert_short4(*in);
    174             short4 out_s = convert_short4(*out);
    175             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
    176             *out = convert_uchar4(in_s);
    177         }
    178         break;
    179     case BLEND_SRC_IN:
    180     #if defined(ARCH_X86_HAVE_SSSE3)
    181         if (gArchUseSIMD) {
    182             if ((x1 + 8) < x2) {
    183                 uint32_t len = (x2 - x1) >> 3;
    184                 rsdIntrinsicBlendSrcIn_K(out, in, len);
    185                 x1 += len << 3;
    186                 out += len << 3;
    187                 in += len << 3;
    188             }
    189         }
    190     #endif
    191         for (;x1 < x2; x1++, out++, in++) {
    192             short4 in_s = convert_short4(*in);
    193             in_s = (in_s * out->w) >> (short4)8;
    194             *out = convert_uchar4(in_s);
    195         }
    196         break;
    197     case BLEND_DST_IN:
    198     #if defined(ARCH_X86_HAVE_SSSE3)
    199         if (gArchUseSIMD) {
    200             if ((x1 + 8) < x2) {
    201                 uint32_t len = (x2 - x1) >> 3;
    202                 rsdIntrinsicBlendDstIn_K(out, in, len);
    203                 x1 += len << 3;
    204                 out += len << 3;
    205                 in += len << 3;
    206             }
    207         }
    208      #endif
    209         for (;x1 < x2; x1++, out++, in++) {
    210             short4 out_s = convert_short4(*out);
    211             out_s = (out_s * in->w) >> (short4)8;
    212             *out = convert_uchar4(out_s);
    213         }
    214         break;
    215     case BLEND_SRC_OUT:
    216     #if defined(ARCH_X86_HAVE_SSSE3)
    217         if (gArchUseSIMD) {
    218             if ((x1 + 8) < x2) {
    219                 uint32_t len = (x2 - x1) >> 3;
    220                 rsdIntrinsicBlendSrcOut_K(out, in, len);
    221                 x1 += len << 3;
    222                 out += len << 3;
    223                 in += len << 3;
    224             }
    225         }
    226     #endif
    227         for (;x1 < x2; x1++, out++, in++) {
    228             short4 in_s = convert_short4(*in);
    229             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
    230             *out = convert_uchar4(in_s);
    231         }
    232         break;
    233     case BLEND_DST_OUT:
    234     #if defined(ARCH_X86_HAVE_SSSE3)
    235         if (gArchUseSIMD) {
    236             if ((x1 + 8) < x2) {
    237                 uint32_t len = (x2 - x1) >> 3;
    238                 rsdIntrinsicBlendDstOut_K(out, in, len);
    239                 x1 += len << 3;
    240                 out += len << 3;
    241                 in += len << 3;
    242             }
    243         }
    244     #endif
    245         for (;x1 < x2; x1++, out++, in++) {
    246             short4 out_s = convert_short4(*out);
    247             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
    248             *out = convert_uchar4(out_s);
    249         }
    250         break;
    251     case BLEND_SRC_ATOP:
    252     #if defined(ARCH_X86_HAVE_SSSE3)
    253         if (gArchUseSIMD) {
    254             if ((x1 + 8) < x2) {
    255                 uint32_t len = (x2 - x1) >> 3;
    256                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
    257                 x1 += len << 3;
    258                 out += len << 3;
    259                 in += len << 3;
    260             }
    261         }
    262     #endif
    263         for (;x1 < x2; x1++, out++, in++) {
    264             short4 in_s = convert_short4(*in);
    265             short4 out_s = convert_short4(*out);
    266             out_s.xyz = ((in_s.xyz * out_s.w) +
    267               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
    268             *out = convert_uchar4(out_s);
    269         }
    270         break;
    271     case BLEND_DST_ATOP:
    272     #if defined(ARCH_X86_HAVE_SSSE3)
    273         if (gArchUseSIMD) {
    274             if ((x1 + 8) < x2) {
    275                 uint32_t len = (x2 - x1) >> 3;
    276                 rsdIntrinsicBlendDstAtop_K(out, in, len);
    277                 x1 += len << 3;
    278                 out += len << 3;
    279                 in += len << 3;
    280             }
    281         }
    282      #endif
    283         for (;x1 < x2; x1++, out++, in++) {
    284             short4 in_s = convert_short4(*in);
    285             short4 out_s = convert_short4(*out);
    286             out_s.xyz = ((out_s.xyz * in_s.w) +
    287               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
    288             out_s.w = in_s.w;
    289             *out = convert_uchar4(out_s);
    290         }
    291         break;
    292     case BLEND_XOR:
    293     #if defined(ARCH_X86_HAVE_SSSE3)
    294         if (gArchUseSIMD) {
    295             if ((x1 + 8) < x2) {
    296                 uint32_t len = (x2 - x1) >> 3;
    297                 rsdIntrinsicBlendXor_K(out, in, len);
    298                 x1 += len << 3;
    299                 out += len << 3;
    300                 in += len << 3;
    301             }
    302         }
    303     #endif
    304         for (;x1 < x2; x1++, out++, in++) {
    305             *out = *in ^ *out;
    306         }
    307         break;
    308     case BLEND_NORMAL:
    309         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
    310         rsAssert(false);
    311         break;
    312     case BLEND_AVERAGE:
    313         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
    314         rsAssert(false);
    315         break;
    316     case BLEND_MULTIPLY:
    317     #if defined(ARCH_X86_HAVE_SSSE3)
    318         if (gArchUseSIMD) {
    319             if ((x1 + 8) < x2) {
    320                 uint32_t len = (x2 - x1) >> 3;
    321                 rsdIntrinsicBlendMultiply_K(out, in, len);
    322                 x1 += len << 3;
    323                 out += len << 3;
    324                 in += len << 3;
    325             }
    326         }
    327     #endif
    328         for (;x1 < x2; x1++, out++, in++) {
    329           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
    330                                 >> (short4)8);
    331         }
    332         break;
    333     case BLEND_SCREEN:
    334         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
    335         rsAssert(false);
    336         break;
    337     case BLEND_DARKEN:
    338         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
    339         rsAssert(false);
    340         break;
    341     case BLEND_LIGHTEN:
    342         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
    343         rsAssert(false);
    344         break;
    345     case BLEND_OVERLAY:
    346         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
    347         rsAssert(false);
    348         break;
    349     case BLEND_HARDLIGHT:
    350         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
    351         rsAssert(false);
    352         break;
    353     case BLEND_SOFTLIGHT:
    354         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
    355         rsAssert(false);
    356         break;
    357     case BLEND_DIFFERENCE:
    358         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
    359         rsAssert(false);
    360         break;
    361     case BLEND_NEGATION:
    362         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
    363         rsAssert(false);
    364         break;
    365     case BLEND_EXCLUSION:
    366         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
    367         rsAssert(false);
    368         break;
    369     case BLEND_COLOR_DODGE:
    370         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
    371         rsAssert(false);
    372         break;
    373     case BLEND_INVERSE_COLOR_DODGE:
    374         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
    375         rsAssert(false);
    376         break;
    377     case BLEND_SOFT_DODGE:
    378         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
    379         rsAssert(false);
    380         break;
    381     case BLEND_COLOR_BURN:
    382         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
    383         rsAssert(false);
    384         break;
    385     case BLEND_INVERSE_COLOR_BURN:
    386         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
    387         rsAssert(false);
    388         break;
    389     case BLEND_SOFT_BURN:
    390         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
    391         rsAssert(false);
    392         break;
    393     case BLEND_REFLECT:
    394         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
    395         rsAssert(false);
    396         break;
    397     case BLEND_GLOW:
    398         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
    399         rsAssert(false);
    400         break;
    401     case BLEND_FREEZE:
    402         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
    403         rsAssert(false);
    404         break;
    405     case BLEND_HEAT:
    406         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
    407         rsAssert(false);
    408         break;
    409     case BLEND_ADD:
    410     #if defined(ARCH_X86_HAVE_SSSE3)
    411         if (gArchUseSIMD) {
    412             if((x1 + 8) < x2) {
    413                 uint32_t len = (x2 - x1) >> 3;
    414                 rsdIntrinsicBlendAdd_K(out, in, len);
    415                 x1 += len << 3;
    416                 out += len << 3;
    417                 in += len << 3;
    418             }
    419         }
    420     #endif
    421         for (;x1 < x2; x1++, out++, in++) {
    422             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    423                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    424             out->x = (oR + iR) > 255 ? 255 : oR + iR;
    425             out->y = (oG + iG) > 255 ? 255 : oG + iG;
    426             out->z = (oB + iB) > 255 ? 255 : oB + iB;
    427             out->w = (oA + iA) > 255 ? 255 : oA + iA;
    428         }
    429         break;
    430     case BLEND_SUBTRACT:
    431     #if defined(ARCH_X86_HAVE_SSSE3)
    432         if (gArchUseSIMD) {
    433             if((x1 + 8) < x2) {
    434                 uint32_t len = (x2 - x1) >> 3;
    435                 rsdIntrinsicBlendSub_K(out, in, len);
    436                 x1 += len << 3;
    437                 out += len << 3;
    438                 in += len << 3;
    439             }
    440         }
    441     #endif
    442         for (;x1 < x2; x1++, out++, in++) {
    443             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    444                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    445             out->x = (oR - iR) < 0 ? 0 : oR - iR;
    446             out->y = (oG - iG) < 0 ? 0 : oG - iG;
    447             out->z = (oB - iB) < 0 ? 0 : oB - iB;
    448             out->w = (oA - iA) < 0 ? 0 : oA - iA;
    449         }
    450         break;
    451     case BLEND_STAMP:
    452         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
    453         rsAssert(false);
    454         break;
    455     case BLEND_RED:
    456         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
    457         rsAssert(false);
    458         break;
    459     case BLEND_GREEN:
    460         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
    461         rsAssert(false);
    462         break;
    463     case BLEND_BLUE:
    464         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
    465         rsAssert(false);
    466         break;
    467     case BLEND_HUE:
    468         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
    469         rsAssert(false);
    470         break;
    471     case BLEND_SATURATION:
    472         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
    473         rsAssert(false);
    474         break;
    475     case BLEND_COLOR:
    476         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
    477         rsAssert(false);
    478         break;
    479     case BLEND_LUMINOSITY:
    480         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
    481         rsAssert(false);
    482         break;
    483 
    484     default:
    485         ALOGE("Called unimplemented value %d", info->slot);
    486         rsAssert(false);
    487 
    488     }
    489 }
    490 
    491 
    492 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
    493                                                        const Script *s, const Element *e)
    494             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
    495 
    496     mRootPtr = &kernel;
    497 }
    498 
    499 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
    500 }
    501 
    502 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
    503     s->mHal.info.exportedVariableCount = 0;
    504 }
    505 
    506 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
    507                                       const Script *s, const Element *e) {
    508     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
    509 }
    510 
    511 } // namespace renderscript
    512 } // namespace android
    513