Home | History | Annotate | Download | only in core
      1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
      2  *
      3  * Use of this source code is governed by a BSD-style license that can be
      4  * found in the LICENSE file.
      5  */
      6 
      7 #include "SkBitmapProcState.h"
      8 #include "SkPerspIter.h"
      9 #include "SkShader.h"
     10 #include "SkUtils.h"
     11 #include "SkUtilsArm.h"
     12 #include "SkBitmapProcState_utils.h"
     13 
     14 /*  returns 0...(n-1) given any x (positive or negative).
     15 
     16     As an example, if n (which is always positive) is 5...
     17 
     18           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
     19     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
     20  */
     21 static inline int sk_int_mod(int x, int n) {
     22     SkASSERT(n > 0);
     23     if ((unsigned)x >= (unsigned)n) {
     24         if (x < 0) {
     25             x = n + ~(~x % n);
     26         } else {
     27             x = x % n;
     28         }
     29     }
     30     return x;
     31 }
     32 
     33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     35 
     36 // Compile neon code paths if needed
     37 #if !SK_ARM_NEON_IS_NONE
     38 
     39 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
     40 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
     41 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
     42 
     43 #endif // !SK_ARM_NEON_IS_NONE
     44 
     45 // Compile non-neon code path if needed
     46 #if !SK_ARM_NEON_IS_ALWAYS
     47 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
     48 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
     49 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
     50 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
     51 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
     52 #define CHECK_FOR_DECAL
     53 #include "SkBitmapProcState_matrix.h"
     54 
     55 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
     56 #define TILEX_PROCF(fx, max)    SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
     57 #define TILEY_PROCF(fy, max)    SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
     58 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     59 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     60 #include "SkBitmapProcState_matrix.h"
     61 #endif
     62 
     63 #define MAKENAME(suffix)        GeneralXY ## suffix
     64 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
     65                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
     66                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
     67                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
     68 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
     69 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
     70 #define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
     71 #define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
     72 #define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
     73 #define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
     74 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
     75 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
     76 #include "SkBitmapProcState_matrix.h"
     77 
     78 static inline U16CPU fixed_clamp(SkFixed x)
     79 {
     80     if (x < 0) {
     81         x = 0;
     82     }
     83     if (x >> 16) {
     84         x = 0xFFFF;
     85     }
     86     return x;
     87 }
     88 
     89 static inline U16CPU fixed_repeat(SkFixed x)
     90 {
     91     return x & 0xFFFF;
     92 }
     93 
     94 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
     95 // See http://code.google.com/p/skia/issues/detail?id=472
     96 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
     97 #pragma optimize("", off)
     98 #endif
     99 
    100 static inline U16CPU fixed_mirror(SkFixed x)
    101 {
    102     SkFixed s = x << 15 >> 31;
    103     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
    104     return (x ^ s) & 0xFFFF;
    105 }
    106 
    107 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
    108 #pragma optimize("", on)
    109 #endif
    110 
    111 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
    112 {
    113     if (SkShader::kClamp_TileMode == m)
    114         return fixed_clamp;
    115     if (SkShader::kRepeat_TileMode == m)
    116         return fixed_repeat;
    117     SkASSERT(SkShader::kMirror_TileMode == m);
    118     return fixed_mirror;
    119 }
    120 
    121 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
    122     return (x >> 12) & 0xF;
    123 }
    124 
    125 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
    126     return ((x * scale) >> 12) & 0xF;
    127 }
    128 
    129 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
    130     if (SkShader::kClamp_TileMode == m) {
    131         return fixed_clamp_lowbits;
    132     } else {
    133         SkASSERT(SkShader::kMirror_TileMode == m ||
    134                  SkShader::kRepeat_TileMode == m);
    135         // mirror and repeat have the same behavior for the low bits.
    136         return fixed_repeat_or_mirrow_lowbits;
    137     }
    138 }
    139 
    140 static inline U16CPU int_clamp(int x, int n) {
    141     if (x >= n) {
    142         x = n - 1;
    143     }
    144     if (x < 0) {
    145         x = 0;
    146     }
    147     return x;
    148 }
    149 
    150 static inline U16CPU int_repeat(int x, int n) {
    151     return sk_int_mod(x, n);
    152 }
    153 
    154 static inline U16CPU int_mirror(int x, int n) {
    155     x = sk_int_mod(x, 2 * n);
    156     if (x >= n) {
    157         x = n + ~(x - n);
    158     }
    159     return x;
    160 }
    161 
    162 #if 0
    163 static void test_int_tileprocs() {
    164     for (int i = -8; i <= 8; i++) {
    165         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
    166     }
    167 }
    168 #endif
    169 
    170 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
    171     if (SkShader::kClamp_TileMode == tm)
    172         return int_clamp;
    173     if (SkShader::kRepeat_TileMode == tm)
    174         return int_repeat;
    175     SkASSERT(SkShader::kMirror_TileMode == tm);
    176     return int_mirror;
    177 }
    178 
    179 //////////////////////////////////////////////////////////////////////////////
    180 
    181 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
    182 {
    183     int i;
    184 
    185     for (i = (count >> 2); i > 0; --i)
    186     {
    187         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    188         fx += dx+dx;
    189         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    190         fx += dx+dx;
    191     }
    192     count &= 3;
    193 
    194     uint16_t* xx = (uint16_t*)dst;
    195     for (i = count; i > 0; --i) {
    196         *xx++ = SkToU16(fx >> 16); fx += dx;
    197     }
    198 }
    199 
    200 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
    201 {
    202 
    203 
    204     if (count & 1)
    205     {
    206         SkASSERT((fx >> (16 + 14)) == 0);
    207         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    208         fx += dx;
    209     }
    210     while ((count -= 2) >= 0)
    211     {
    212         SkASSERT((fx >> (16 + 14)) == 0);
    213         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    214         fx += dx;
    215 
    216         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    217         fx += dx;
    218     }
    219 }
    220 
    221 ///////////////////////////////////////////////////////////////////////////////
    222 // stores the same as SCALE, but is cheaper to compute. Also since there is no
    223 // scale, we don't need/have a FILTER version
    224 
    225 static void fill_sequential(uint16_t xptr[], int start, int count) {
    226 #if 1
    227     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
    228         *xptr++ = start++;
    229         count -= 1;
    230     }
    231     if (count > 3) {
    232         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
    233         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
    234         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
    235         start += count & ~3;
    236         int qcount = count >> 2;
    237         do {
    238             *xxptr++ = pattern0;
    239             pattern0 += 0x40004;
    240             *xxptr++ = pattern1;
    241             pattern1 += 0x40004;
    242         } while (--qcount != 0);
    243         xptr = reinterpret_cast<uint16_t*>(xxptr);
    244         count &= 3;
    245     }
    246     while (--count >= 0) {
    247         *xptr++ = start++;
    248     }
    249 #else
    250     for (int i = 0; i < count; i++) {
    251         *xptr++ = start++;
    252     }
    253 #endif
    254 }
    255 
    256 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
    257                                    int x, int y) {
    258     SkPoint pt;
    259     s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
    260                SkIntToScalar(y) + SK_ScalarHalf, &pt);
    261     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
    262                            s.fBitmap->height());
    263     *xy += 1;   // bump the ptr
    264     // return our starting X position
    265     return SkScalarToFixed(pt.fX) >> 16;
    266 }
    267 
    268 static void clampx_nofilter_trans(const SkBitmapProcState& s,
    269                                   uint32_t xy[], int count, int x, int y) {
    270     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    271 
    272     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    273     const int width = s.fBitmap->width();
    274     if (1 == width) {
    275         // all of the following X values must be 0
    276         memset(xy, 0, count * sizeof(uint16_t));
    277         return;
    278     }
    279 
    280     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    281     int n;
    282 
    283     // fill before 0 as needed
    284     if (xpos < 0) {
    285         n = -xpos;
    286         if (n > count) {
    287             n = count;
    288         }
    289         memset(xptr, 0, n * sizeof(uint16_t));
    290         count -= n;
    291         if (0 == count) {
    292             return;
    293         }
    294         xptr += n;
    295         xpos = 0;
    296     }
    297 
    298     // fill in 0..width-1 if needed
    299     if (xpos < width) {
    300         n = width - xpos;
    301         if (n > count) {
    302             n = count;
    303         }
    304         fill_sequential(xptr, xpos, n);
    305         count -= n;
    306         if (0 == count) {
    307             return;
    308         }
    309         xptr += n;
    310     }
    311 
    312     // fill the remaining with the max value
    313     sk_memset16(xptr, width - 1, count);
    314 }
    315 
    316 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
    317                                    uint32_t xy[], int count, int x, int y) {
    318     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    319 
    320     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    321     const int width = s.fBitmap->width();
    322     if (1 == width) {
    323         // all of the following X values must be 0
    324         memset(xy, 0, count * sizeof(uint16_t));
    325         return;
    326     }
    327 
    328     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    329     int start = sk_int_mod(xpos, width);
    330     int n = width - start;
    331     if (n > count) {
    332         n = count;
    333     }
    334     fill_sequential(xptr, start, n);
    335     xptr += n;
    336     count -= n;
    337 
    338     while (count >= width) {
    339         fill_sequential(xptr, 0, width);
    340         xptr += width;
    341         count -= width;
    342     }
    343 
    344     if (count > 0) {
    345         fill_sequential(xptr, 0, count);
    346     }
    347 }
    348 
    349 static void fill_backwards(uint16_t xptr[], int pos, int count) {
    350     for (int i = 0; i < count; i++) {
    351         SkASSERT(pos >= 0);
    352         xptr[i] = pos--;
    353     }
    354 }
    355 
    356 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
    357                                    uint32_t xy[], int count, int x, int y) {
    358     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    359 
    360     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    361     const int width = s.fBitmap->width();
    362     if (1 == width) {
    363         // all of the following X values must be 0
    364         memset(xy, 0, count * sizeof(uint16_t));
    365         return;
    366     }
    367 
    368     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    369     // need to know our start, and our initial phase (forward or backward)
    370     bool forward;
    371     int n;
    372     int start = sk_int_mod(xpos, 2 * width);
    373     if (start >= width) {
    374         start = width + ~(start - width);
    375         forward = false;
    376         n = start + 1;  // [start .. 0]
    377     } else {
    378         forward = true;
    379         n = width - start;  // [start .. width)
    380     }
    381     if (n > count) {
    382         n = count;
    383     }
    384     if (forward) {
    385         fill_sequential(xptr, start, n);
    386     } else {
    387         fill_backwards(xptr, start, n);
    388     }
    389     forward = !forward;
    390     xptr += n;
    391     count -= n;
    392 
    393     while (count >= width) {
    394         if (forward) {
    395             fill_sequential(xptr, 0, width);
    396         } else {
    397             fill_backwards(xptr, width - 1, width);
    398         }
    399         forward = !forward;
    400         xptr += width;
    401         count -= width;
    402     }
    403 
    404     if (count > 0) {
    405         if (forward) {
    406             fill_sequential(xptr, 0, count);
    407         } else {
    408             fill_backwards(xptr, width - 1, count);
    409         }
    410     }
    411 }
    412 
    413 ///////////////////////////////////////////////////////////////////////////////
    414 
    415 SkBitmapProcState::MatrixProc
    416 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
    417 //    test_int_tileprocs();
    418     // check for our special case when there is no scale/affine/perspective
    419     if (trivial_matrix) {
    420         SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel);
    421         fIntTileProcY = choose_int_tile_proc(fTileModeY);
    422         switch (fTileModeX) {
    423             case SkShader::kClamp_TileMode:
    424                 return clampx_nofilter_trans;
    425             case SkShader::kRepeat_TileMode:
    426                 return repeatx_nofilter_trans;
    427             case SkShader::kMirror_TileMode:
    428                 return mirrorx_nofilter_trans;
    429         }
    430     }
    431 
    432     int index = 0;
    433     if (fFilterLevel != SkPaint::kNone_FilterLevel) {
    434         index = 1;
    435     }
    436     if (fInvType & SkMatrix::kPerspective_Mask) {
    437         index += 4;
    438     } else if (fInvType & SkMatrix::kAffine_Mask) {
    439         index += 2;
    440     }
    441 
    442     if (SkShader::kClamp_TileMode == fTileModeX &&
    443         SkShader::kClamp_TileMode == fTileModeY)
    444     {
    445         // clamp gets special version of filterOne
    446         fFilterOneX = SK_Fixed1;
    447         fFilterOneY = SK_Fixed1;
    448         return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
    449     }
    450 
    451     // all remaining procs use this form for filterOne
    452     fFilterOneX = SK_Fixed1 / fBitmap->width();
    453     fFilterOneY = SK_Fixed1 / fBitmap->height();
    454 
    455     if (SkShader::kRepeat_TileMode == fTileModeX &&
    456         SkShader::kRepeat_TileMode == fTileModeY)
    457     {
    458         return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
    459     }
    460 
    461     fTileProcX = choose_tile_proc(fTileModeX);
    462     fTileProcY = choose_tile_proc(fTileModeY);
    463     fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
    464     fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
    465     return GeneralXY_Procs[index];
    466 }
    467