Home | History | Annotate | Download | only in cpu_ref
      1 /*
      2  * Copyright (C) 2012 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 
     18 #include "rsCpuIntrinsic.h"
     19 #include "rsCpuIntrinsicInlines.h"
     20 
     21 using namespace android;
     22 using namespace android::renderscript;
     23 
     24 namespace android {
     25 namespace renderscript {
     26 
     27 
     28 class RsdCpuScriptIntrinsicBlend : public RsdCpuScriptIntrinsic {
     29 public:
     30     void populateScript(Script *) override;
     31 
     32     ~RsdCpuScriptIntrinsicBlend() override;
     33     RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx, const Script *s, const Element *e);
     34 
     35 protected:
     36     static void kernel(const RsExpandKernelDriverInfo *info, uint32_t xstart,
     37                        uint32_t xend, uint32_t outstep);
     38 };
     39 
     40 }
     41 }
     42 
     43 
     44 enum {
     45     BLEND_CLEAR = 0,
     46     BLEND_SRC = 1,
     47     BLEND_DST = 2,
     48     BLEND_SRC_OVER = 3,
     49     BLEND_DST_OVER = 4,
     50     BLEND_SRC_IN = 5,
     51     BLEND_DST_IN = 6,
     52     BLEND_SRC_OUT = 7,
     53     BLEND_DST_OUT = 8,
     54     BLEND_SRC_ATOP = 9,
     55     BLEND_DST_ATOP = 10,
     56     BLEND_XOR = 11,
     57 
     58     BLEND_NORMAL = 12,
     59     BLEND_AVERAGE = 13,
     60     BLEND_MULTIPLY = 14,
     61     BLEND_SCREEN = 15,
     62     BLEND_DARKEN = 16,
     63     BLEND_LIGHTEN = 17,
     64     BLEND_OVERLAY = 18,
     65     BLEND_HARDLIGHT = 19,
     66     BLEND_SOFTLIGHT = 20,
     67     BLEND_DIFFERENCE = 21,
     68     BLEND_NEGATION = 22,
     69     BLEND_EXCLUSION = 23,
     70     BLEND_COLOR_DODGE = 24,
     71     BLEND_INVERSE_COLOR_DODGE = 25,
     72     BLEND_SOFT_DODGE = 26,
     73     BLEND_COLOR_BURN = 27,
     74     BLEND_INVERSE_COLOR_BURN = 28,
     75     BLEND_SOFT_BURN = 29,
     76     BLEND_REFLECT = 30,
     77     BLEND_GLOW = 31,
     78     BLEND_FREEZE = 32,
     79     BLEND_HEAT = 33,
     80     BLEND_ADD = 34,
     81     BLEND_SUBTRACT = 35,
     82     BLEND_STAMP = 36,
     83     BLEND_RED = 37,
     84     BLEND_GREEN = 38,
     85     BLEND_BLUE = 39,
     86     BLEND_HUE = 40,
     87     BLEND_SATURATION = 41,
     88     BLEND_COLOR = 42,
     89     BLEND_LUMINOSITY = 43
     90 };
     91 
     92 #if defined(ARCH_ARM_USE_INTRINSICS)
     93 extern "C" int rsdIntrinsicBlend_K(uchar4 *out, uchar4 const *in, int slot,
     94                     uint32_t xstart, uint32_t xend);
     95 #endif
     96 
     97 #if defined(ARCH_X86_HAVE_SSSE3)
     98 extern void rsdIntrinsicBlendSrcOver_K(void *dst, const void *src, uint32_t count8);
     99 extern void rsdIntrinsicBlendDstOver_K(void *dst, const void *src, uint32_t count8);
    100 extern void rsdIntrinsicBlendSrcIn_K(void *dst, const void *src, uint32_t count8);
    101 extern void rsdIntrinsicBlendDstIn_K(void *dst, const void *src, uint32_t count8);
    102 extern void rsdIntrinsicBlendSrcOut_K(void *dst, const void *src, uint32_t count8);
    103 extern void rsdIntrinsicBlendDstOut_K(void *dst, const void *src, uint32_t count8);
    104 extern void rsdIntrinsicBlendSrcAtop_K(void *dst, const void *src, uint32_t count8);
    105 extern void rsdIntrinsicBlendDstAtop_K(void *dst, const void *src, uint32_t count8);
    106 extern void rsdIntrinsicBlendXor_K(void *dst, const void *src, uint32_t count8);
    107 extern void rsdIntrinsicBlendMultiply_K(void *dst, const void *src, uint32_t count8);
    108 extern void rsdIntrinsicBlendAdd_K(void *dst, const void *src, uint32_t count8);
    109 extern void rsdIntrinsicBlendSub_K(void *dst, const void *src, uint32_t count8);
    110 #endif
    111 
    112 void RsdCpuScriptIntrinsicBlend::kernel(const RsExpandKernelDriverInfo *info,
    113                                         uint32_t xstart, uint32_t xend,
    114                                         uint32_t outstep) {
    115     RsdCpuScriptIntrinsicBlend *cp = (RsdCpuScriptIntrinsicBlend *)info->usr;
    116 
    117     // instep/outstep can be ignored--sizeof(uchar4) known at compile time
    118     uchar4 *out = (uchar4 *)info->outPtr[0];
    119     uchar4 *in = (uchar4 *)info->inPtr[0];
    120     uint32_t x1 = xstart;
    121     uint32_t x2 = xend;
    122 
    123 #if defined(ARCH_ARM_USE_INTRINSICS) && !defined(ARCH_ARM64_USE_INTRINSICS)
    124     // Bug: 22047392 - Skip optimized version for BLEND_DST_ATOP until this
    125     // been fixed.
    126     if (gArchUseSIMD && info->slot != BLEND_DST_ATOP) {
    127         if (rsdIntrinsicBlend_K(out, in, info->slot, x1, x2) >= 0)
    128             return;
    129     }
    130 #endif
    131     switch (info->slot) {
    132     case BLEND_CLEAR:
    133         for (;x1 < x2; x1++, out++) {
    134             *out = 0;
    135         }
    136         break;
    137     case BLEND_SRC:
    138         for (;x1 < x2; x1++, out++, in++) {
    139           *out = *in;
    140         }
    141         break;
    142     //BLEND_DST is a NOP
    143     case BLEND_DST:
    144         break;
    145     case BLEND_SRC_OVER:
    146     #if defined(ARCH_X86_HAVE_SSSE3)
    147         if (gArchUseSIMD) {
    148             if ((x1 + 8) < x2) {
    149                 uint32_t len = (x2 - x1) >> 3;
    150                 rsdIntrinsicBlendSrcOver_K(out, in, len);
    151                 x1 += len << 3;
    152                 out += len << 3;
    153                 in += len << 3;
    154             }
    155         }
    156     #endif
    157         for (;x1 < x2; x1++, out++, in++) {
    158             short4 in_s = convert_short4(*in);
    159             short4 out_s = convert_short4(*out);
    160             in_s = in_s + ((out_s * (short4)(255 - in_s.w)) >> (short4)8);
    161             *out = convert_uchar4(in_s);
    162         }
    163         break;
    164     case BLEND_DST_OVER:
    165     #if defined(ARCH_X86_HAVE_SSSE3)
    166         if (gArchUseSIMD) {
    167             if ((x1 + 8) < x2) {
    168                 uint32_t len = (x2 - x1) >> 3;
    169                 rsdIntrinsicBlendDstOver_K(out, in, len);
    170                 x1 += len << 3;
    171                 out += len << 3;
    172                 in += len << 3;
    173             }
    174         }
    175      #endif
    176         for (;x1 < x2; x1++, out++, in++) {
    177             short4 in_s = convert_short4(*in);
    178             short4 out_s = convert_short4(*out);
    179             in_s = out_s + ((in_s * (short4)(255 - out_s.w)) >> (short4)8);
    180             *out = convert_uchar4(in_s);
    181         }
    182         break;
    183     case BLEND_SRC_IN:
    184     #if defined(ARCH_X86_HAVE_SSSE3)
    185         if (gArchUseSIMD) {
    186             if ((x1 + 8) < x2) {
    187                 uint32_t len = (x2 - x1) >> 3;
    188                 rsdIntrinsicBlendSrcIn_K(out, in, len);
    189                 x1 += len << 3;
    190                 out += len << 3;
    191                 in += len << 3;
    192             }
    193         }
    194     #endif
    195         for (;x1 < x2; x1++, out++, in++) {
    196             short4 in_s = convert_short4(*in);
    197             in_s = (in_s * out->w) >> (short4)8;
    198             *out = convert_uchar4(in_s);
    199         }
    200         break;
    201     case BLEND_DST_IN:
    202     #if defined(ARCH_X86_HAVE_SSSE3)
    203         if (gArchUseSIMD) {
    204             if ((x1 + 8) < x2) {
    205                 uint32_t len = (x2 - x1) >> 3;
    206                 rsdIntrinsicBlendDstIn_K(out, in, len);
    207                 x1 += len << 3;
    208                 out += len << 3;
    209                 in += len << 3;
    210             }
    211         }
    212      #endif
    213         for (;x1 < x2; x1++, out++, in++) {
    214             short4 out_s = convert_short4(*out);
    215             out_s = (out_s * in->w) >> (short4)8;
    216             *out = convert_uchar4(out_s);
    217         }
    218         break;
    219     case BLEND_SRC_OUT:
    220     #if defined(ARCH_X86_HAVE_SSSE3)
    221         if (gArchUseSIMD) {
    222             if ((x1 + 8) < x2) {
    223                 uint32_t len = (x2 - x1) >> 3;
    224                 rsdIntrinsicBlendSrcOut_K(out, in, len);
    225                 x1 += len << 3;
    226                 out += len << 3;
    227                 in += len << 3;
    228             }
    229         }
    230     #endif
    231         for (;x1 < x2; x1++, out++, in++) {
    232             short4 in_s = convert_short4(*in);
    233             in_s = (in_s * (short4)(255 - out->w)) >> (short4)8;
    234             *out = convert_uchar4(in_s);
    235         }
    236         break;
    237     case BLEND_DST_OUT:
    238     #if defined(ARCH_X86_HAVE_SSSE3)
    239         if (gArchUseSIMD) {
    240             if ((x1 + 8) < x2) {
    241                 uint32_t len = (x2 - x1) >> 3;
    242                 rsdIntrinsicBlendDstOut_K(out, in, len);
    243                 x1 += len << 3;
    244                 out += len << 3;
    245                 in += len << 3;
    246             }
    247         }
    248     #endif
    249         for (;x1 < x2; x1++, out++, in++) {
    250             short4 out_s = convert_short4(*out);
    251             out_s = (out_s * (short4)(255 - in->w)) >> (short4)8;
    252             *out = convert_uchar4(out_s);
    253         }
    254         break;
    255     case BLEND_SRC_ATOP:
    256     #if defined(ARCH_X86_HAVE_SSSE3)
    257         if (gArchUseSIMD) {
    258             if ((x1 + 8) < x2) {
    259                 uint32_t len = (x2 - x1) >> 3;
    260                 rsdIntrinsicBlendSrcAtop_K(out, in, len);
    261                 x1 += len << 3;
    262                 out += len << 3;
    263                 in += len << 3;
    264             }
    265         }
    266     #endif
    267         for (;x1 < x2; x1++, out++, in++) {
    268             short4 in_s = convert_short4(*in);
    269             short4 out_s = convert_short4(*out);
    270             out_s.xyz = ((in_s.xyz * out_s.w) +
    271               (out_s.xyz * ((short3)255 - (short3)in_s.w))) >> (short3)8;
    272             *out = convert_uchar4(out_s);
    273         }
    274         break;
    275     case BLEND_DST_ATOP:
    276     // Bug: 22047392 - We need to make sure that "out->w = in->w;" in all
    277     // accelerated versions before re-enabling optimizations.
    278     #if false  // Bug: 22047392
    279     #if defined(ARCH_X86_HAVE_SSSE3)
    280         if (gArchUseSIMD) {
    281             if ((x1 + 8) < x2) {
    282                 uint32_t len = (x2 - x1) >> 3;
    283                 rsdIntrinsicBlendDstAtop_K(out, in, len);
    284                 x1 += len << 3;
    285                 out += len << 3;
    286                 in += len << 3;
    287             }
    288         }
    289      #endif
    290      #endif  // false for Bug: 22047392
    291         for (;x1 < x2; x1++, out++, in++) {
    292             short4 in_s = convert_short4(*in);
    293             short4 out_s = convert_short4(*out);
    294             out_s.xyz = ((out_s.xyz * in_s.w) +
    295               (in_s.xyz * ((short3)255 - (short3)out_s.w))) >> (short3)8;
    296             out_s.w = in_s.w;
    297             *out = convert_uchar4(out_s);
    298         }
    299         break;
    300     case BLEND_XOR:
    301     #if defined(ARCH_X86_HAVE_SSSE3)
    302         if (gArchUseSIMD) {
    303             if ((x1 + 8) < x2) {
    304                 uint32_t len = (x2 - x1) >> 3;
    305                 rsdIntrinsicBlendXor_K(out, in, len);
    306                 x1 += len << 3;
    307                 out += len << 3;
    308                 in += len << 3;
    309             }
    310         }
    311     #endif
    312         for (;x1 < x2; x1++, out++, in++) {
    313             *out = *in ^ *out;
    314         }
    315         break;
    316     case BLEND_NORMAL:
    317         ALOGE("Called unimplemented blend intrinsic BLEND_NORMAL");
    318         rsAssert(false);
    319         break;
    320     case BLEND_AVERAGE:
    321         ALOGE("Called unimplemented blend intrinsic BLEND_AVERAGE");
    322         rsAssert(false);
    323         break;
    324     case BLEND_MULTIPLY:
    325     #if defined(ARCH_X86_HAVE_SSSE3)
    326         if (gArchUseSIMD) {
    327             if ((x1 + 8) < x2) {
    328                 uint32_t len = (x2 - x1) >> 3;
    329                 rsdIntrinsicBlendMultiply_K(out, in, len);
    330                 x1 += len << 3;
    331                 out += len << 3;
    332                 in += len << 3;
    333             }
    334         }
    335     #endif
    336         for (;x1 < x2; x1++, out++, in++) {
    337           *out = convert_uchar4((convert_short4(*in) * convert_short4(*out))
    338                                 >> (short4)8);
    339         }
    340         break;
    341     case BLEND_SCREEN:
    342         ALOGE("Called unimplemented blend intrinsic BLEND_SCREEN");
    343         rsAssert(false);
    344         break;
    345     case BLEND_DARKEN:
    346         ALOGE("Called unimplemented blend intrinsic BLEND_DARKEN");
    347         rsAssert(false);
    348         break;
    349     case BLEND_LIGHTEN:
    350         ALOGE("Called unimplemented blend intrinsic BLEND_LIGHTEN");
    351         rsAssert(false);
    352         break;
    353     case BLEND_OVERLAY:
    354         ALOGE("Called unimplemented blend intrinsic BLEND_OVERLAY");
    355         rsAssert(false);
    356         break;
    357     case BLEND_HARDLIGHT:
    358         ALOGE("Called unimplemented blend intrinsic BLEND_HARDLIGHT");
    359         rsAssert(false);
    360         break;
    361     case BLEND_SOFTLIGHT:
    362         ALOGE("Called unimplemented blend intrinsic BLEND_SOFTLIGHT");
    363         rsAssert(false);
    364         break;
    365     case BLEND_DIFFERENCE:
    366         ALOGE("Called unimplemented blend intrinsic BLEND_DIFFERENCE");
    367         rsAssert(false);
    368         break;
    369     case BLEND_NEGATION:
    370         ALOGE("Called unimplemented blend intrinsic BLEND_NEGATION");
    371         rsAssert(false);
    372         break;
    373     case BLEND_EXCLUSION:
    374         ALOGE("Called unimplemented blend intrinsic BLEND_EXCLUSION");
    375         rsAssert(false);
    376         break;
    377     case BLEND_COLOR_DODGE:
    378         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_DODGE");
    379         rsAssert(false);
    380         break;
    381     case BLEND_INVERSE_COLOR_DODGE:
    382         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_DODGE");
    383         rsAssert(false);
    384         break;
    385     case BLEND_SOFT_DODGE:
    386         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_DODGE");
    387         rsAssert(false);
    388         break;
    389     case BLEND_COLOR_BURN:
    390         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR_BURN");
    391         rsAssert(false);
    392         break;
    393     case BLEND_INVERSE_COLOR_BURN:
    394         ALOGE("Called unimplemented blend intrinsic BLEND_INVERSE_COLOR_BURN");
    395         rsAssert(false);
    396         break;
    397     case BLEND_SOFT_BURN:
    398         ALOGE("Called unimplemented blend intrinsic BLEND_SOFT_BURN");
    399         rsAssert(false);
    400         break;
    401     case BLEND_REFLECT:
    402         ALOGE("Called unimplemented blend intrinsic BLEND_REFLECT");
    403         rsAssert(false);
    404         break;
    405     case BLEND_GLOW:
    406         ALOGE("Called unimplemented blend intrinsic BLEND_GLOW");
    407         rsAssert(false);
    408         break;
    409     case BLEND_FREEZE:
    410         ALOGE("Called unimplemented blend intrinsic BLEND_FREEZE");
    411         rsAssert(false);
    412         break;
    413     case BLEND_HEAT:
    414         ALOGE("Called unimplemented blend intrinsic BLEND_HEAT");
    415         rsAssert(false);
    416         break;
    417     case BLEND_ADD:
    418     #if defined(ARCH_X86_HAVE_SSSE3)
    419         if (gArchUseSIMD) {
    420             if((x1 + 8) < x2) {
    421                 uint32_t len = (x2 - x1) >> 3;
    422                 rsdIntrinsicBlendAdd_K(out, in, len);
    423                 x1 += len << 3;
    424                 out += len << 3;
    425                 in += len << 3;
    426             }
    427         }
    428     #endif
    429         for (;x1 < x2; x1++, out++, in++) {
    430             uint32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    431                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    432             out->x = (oR + iR) > 255 ? 255 : oR + iR;
    433             out->y = (oG + iG) > 255 ? 255 : oG + iG;
    434             out->z = (oB + iB) > 255 ? 255 : oB + iB;
    435             out->w = (oA + iA) > 255 ? 255 : oA + iA;
    436         }
    437         break;
    438     case BLEND_SUBTRACT:
    439     #if defined(ARCH_X86_HAVE_SSSE3)
    440         if (gArchUseSIMD) {
    441             if((x1 + 8) < x2) {
    442                 uint32_t len = (x2 - x1) >> 3;
    443                 rsdIntrinsicBlendSub_K(out, in, len);
    444                 x1 += len << 3;
    445                 out += len << 3;
    446                 in += len << 3;
    447             }
    448         }
    449     #endif
    450         for (;x1 < x2; x1++, out++, in++) {
    451             int32_t iR = in->x, iG = in->y, iB = in->z, iA = in->w,
    452                 oR = out->x, oG = out->y, oB = out->z, oA = out->w;
    453             out->x = (oR - iR) < 0 ? 0 : oR - iR;
    454             out->y = (oG - iG) < 0 ? 0 : oG - iG;
    455             out->z = (oB - iB) < 0 ? 0 : oB - iB;
    456             out->w = (oA - iA) < 0 ? 0 : oA - iA;
    457         }
    458         break;
    459     case BLEND_STAMP:
    460         ALOGE("Called unimplemented blend intrinsic BLEND_STAMP");
    461         rsAssert(false);
    462         break;
    463     case BLEND_RED:
    464         ALOGE("Called unimplemented blend intrinsic BLEND_RED");
    465         rsAssert(false);
    466         break;
    467     case BLEND_GREEN:
    468         ALOGE("Called unimplemented blend intrinsic BLEND_GREEN");
    469         rsAssert(false);
    470         break;
    471     case BLEND_BLUE:
    472         ALOGE("Called unimplemented blend intrinsic BLEND_BLUE");
    473         rsAssert(false);
    474         break;
    475     case BLEND_HUE:
    476         ALOGE("Called unimplemented blend intrinsic BLEND_HUE");
    477         rsAssert(false);
    478         break;
    479     case BLEND_SATURATION:
    480         ALOGE("Called unimplemented blend intrinsic BLEND_SATURATION");
    481         rsAssert(false);
    482         break;
    483     case BLEND_COLOR:
    484         ALOGE("Called unimplemented blend intrinsic BLEND_COLOR");
    485         rsAssert(false);
    486         break;
    487     case BLEND_LUMINOSITY:
    488         ALOGE("Called unimplemented blend intrinsic BLEND_LUMINOSITY");
    489         rsAssert(false);
    490         break;
    491 
    492     default:
    493         ALOGE("Called unimplemented value %d", info->slot);
    494         rsAssert(false);
    495 
    496     }
    497 }
    498 
    499 
    500 RsdCpuScriptIntrinsicBlend::RsdCpuScriptIntrinsicBlend(RsdCpuReferenceImpl *ctx,
    501                                                        const Script *s, const Element *e)
    502             : RsdCpuScriptIntrinsic(ctx, s, e, RS_SCRIPT_INTRINSIC_ID_BLEND) {
    503 
    504     mRootPtr = &kernel;
    505 }
    506 
    507 RsdCpuScriptIntrinsicBlend::~RsdCpuScriptIntrinsicBlend() {
    508 }
    509 
    510 void RsdCpuScriptIntrinsicBlend::populateScript(Script *s) {
    511     s->mHal.info.exportedVariableCount = 0;
    512 }
    513 
    514 RsdCpuScriptImpl * rsdIntrinsic_Blend(RsdCpuReferenceImpl *ctx,
    515                                       const Script *s, const Element *e) {
    516     return new RsdCpuScriptIntrinsicBlend(ctx, s, e);
    517 }
    518