Home | History | Annotate | Download | only in core
      1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
      2  *
      3  * Use of this source code is governed by a BSD-style license that can be
      4  * found in the LICENSE file.
      5  */
      6 
      7 #include "SkBitmapProcState.h"
      8 #include "SkPerspIter.h"
      9 #include "SkShader.h"
     10 #include "SkUtils.h"
     11 #include "SkUtilsArm.h"
     12 #include "SkBitmapProcState_utils.h"
     13 
     14 /*  returns 0...(n-1) given any x (positive or negative).
     15 
     16     As an example, if n (which is always positive) is 5...
     17 
     18           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
     19     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
     20  */
     21 static inline int sk_int_mod(int x, int n) {
     22     SkASSERT(n > 0);
     23     if ((unsigned)x >= (unsigned)n) {
     24         if (x < 0) {
     25             x = n + ~(~x % n);
     26         } else {
     27             x = x % n;
     28         }
     29     }
     30     return x;
     31 }
     32 
     33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     35 
     36 #include "SkBitmapProcState_matrix_template.h"
     37 
     38 ///////////////////////////////////////////////////////////////////////////////
     39 
     40 // Compile neon code paths if needed
     41 #if !SK_ARM_NEON_IS_NONE
     42 
     43 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
     44 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
     45 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
     46 
     47 #endif // !SK_ARM_NEON_IS_NONE
     48 
     49 // Compile non-neon code path if needed
     50 #if !SK_ARM_NEON_IS_ALWAYS
     51 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
     52 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
     53 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
     54 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
     55 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
     56 #define CHECK_FOR_DECAL
     57 #include "SkBitmapProcState_matrix.h"
     58 
     59 struct ClampTileProcs {
     60     static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
     61         return SkClampMax(fx >> 16, max);
     62     }
     63     static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
     64         return SkClampMax(fy >> 16, max);
     65     }
     66 };
     67 
     68 // Referenced in opts_check_x86.cpp
     69 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
     70                                   int count, int x, int y) {
     71     return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
     72 }
     73 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
     74                                   int count, int x, int y) {
     75     return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
     76 }
     77 
     78 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
     79     // only clamp lives in the right coord space to check for decal
     80     ClampX_ClampY_nofilter_scale,
     81     ClampX_ClampY_filter_scale,
     82     ClampX_ClampY_nofilter_affine,
     83     ClampX_ClampY_filter_affine,
     84     NoFilterProc_Persp<ClampTileProcs>,
     85     ClampX_ClampY_filter_persp
     86 };
     87 
     88 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
     89 #define TILEX_PROCF(fx, max)    SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
     90 #define TILEY_PROCF(fy, max)    SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
     91 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     92 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     93 #include "SkBitmapProcState_matrix.h"
     94 
     95 struct RepeatTileProcs {
     96     static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
     97         return SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1));
     98     }
     99     static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
    100         return SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1));
    101     }
    102 };
    103 
    104 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
    105     NoFilterProc_Scale<RepeatTileProcs, false>,
    106     RepeatX_RepeatY_filter_scale,
    107     NoFilterProc_Affine<RepeatTileProcs>,
    108     RepeatX_RepeatY_filter_affine,
    109     NoFilterProc_Persp<RepeatTileProcs>,
    110     RepeatX_RepeatY_filter_persp
    111 };
    112 #endif
    113 
    114 #define MAKENAME(suffix)        GeneralXY ## suffix
    115 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
    116                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
    117                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
    118                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
    119 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
    120 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
    121 #define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
    122 #define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
    123 #define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
    124 #define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
    125 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
    126 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
    127 #include "SkBitmapProcState_matrix.h"
    128 
    129 struct GeneralTileProcs {
    130     static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
    131         return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
    132     }
    133     static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
    134         return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
    135     }
    136 };
    137 
    138 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
    139     NoFilterProc_Scale<GeneralTileProcs, false>,
    140     GeneralXY_filter_scale,
    141     NoFilterProc_Affine<GeneralTileProcs>,
    142     GeneralXY_filter_affine,
    143     NoFilterProc_Persp<GeneralTileProcs>,
    144     GeneralXY_filter_persp
    145 };
    146 
    147 ///////////////////////////////////////////////////////////////////////////////
    148 
    149 static inline U16CPU fixed_clamp(SkFixed x) {
    150     if (x < 0) {
    151         x = 0;
    152     }
    153     if (x >> 16) {
    154         x = 0xFFFF;
    155     }
    156     return x;
    157 }
    158 
    159 static inline U16CPU fixed_repeat(SkFixed x) {
    160     return x & 0xFFFF;
    161 }
    162 
    163 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
    164 // See http://code.google.com/p/skia/issues/detail?id=472
    165 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
    166 #pragma optimize("", off)
    167 #endif
    168 
    169 static inline U16CPU fixed_mirror(SkFixed x) {
    170     SkFixed s = x << 15 >> 31;
    171     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
    172     return (x ^ s) & 0xFFFF;
    173 }
    174 
    175 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
    176 #pragma optimize("", on)
    177 #endif
    178 
    179 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
    180     if (SkShader::kClamp_TileMode == m) {
    181         return fixed_clamp;
    182     }
    183     if (SkShader::kRepeat_TileMode == m) {
    184         return fixed_repeat;
    185     }
    186     SkASSERT(SkShader::kMirror_TileMode == m);
    187     return fixed_mirror;
    188 }
    189 
    190 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
    191     return (x >> 12) & 0xF;
    192 }
    193 
    194 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
    195     return ((x * scale) >> 12) & 0xF;
    196 }
    197 
    198 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
    199     if (SkShader::kClamp_TileMode == m) {
    200         return fixed_clamp_lowbits;
    201     } else {
    202         SkASSERT(SkShader::kMirror_TileMode == m ||
    203                  SkShader::kRepeat_TileMode == m);
    204         // mirror and repeat have the same behavior for the low bits.
    205         return fixed_repeat_or_mirrow_lowbits;
    206     }
    207 }
    208 
    209 static inline U16CPU int_clamp(int x, int n) {
    210     if (x >= n) {
    211         x = n - 1;
    212     }
    213     if (x < 0) {
    214         x = 0;
    215     }
    216     return x;
    217 }
    218 
    219 static inline U16CPU int_repeat(int x, int n) {
    220     return sk_int_mod(x, n);
    221 }
    222 
    223 static inline U16CPU int_mirror(int x, int n) {
    224     x = sk_int_mod(x, 2 * n);
    225     if (x >= n) {
    226         x = n + ~(x - n);
    227     }
    228     return x;
    229 }
    230 
    231 #if 0
    232 static void test_int_tileprocs() {
    233     for (int i = -8; i <= 8; i++) {
    234         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
    235     }
    236 }
    237 #endif
    238 
    239 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
    240     if (SkShader::kClamp_TileMode == tm)
    241         return int_clamp;
    242     if (SkShader::kRepeat_TileMode == tm)
    243         return int_repeat;
    244     SkASSERT(SkShader::kMirror_TileMode == tm);
    245     return int_mirror;
    246 }
    247 
    248 //////////////////////////////////////////////////////////////////////////////
    249 
    250 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
    251     int i;
    252 
    253     for (i = (count >> 2); i > 0; --i) {
    254         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    255         fx += dx+dx;
    256         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    257         fx += dx+dx;
    258     }
    259     count &= 3;
    260 
    261     uint16_t* xx = (uint16_t*)dst;
    262     for (i = count; i > 0; --i) {
    263         *xx++ = SkToU16(fx >> 16); fx += dx;
    264     }
    265 }
    266 
    267 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
    268     if (count & 1) {
    269         SkASSERT((fx >> (16 + 14)) == 0);
    270         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    271         fx += dx;
    272     }
    273     while ((count -= 2) >= 0) {
    274         SkASSERT((fx >> (16 + 14)) == 0);
    275         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    276         fx += dx;
    277 
    278         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    279         fx += dx;
    280     }
    281 }
    282 
    283 ///////////////////////////////////////////////////////////////////////////////
    284 // stores the same as SCALE, but is cheaper to compute. Also since there is no
    285 // scale, we don't need/have a FILTER version
    286 
    287 static void fill_sequential(uint16_t xptr[], int start, int count) {
    288 #if 1
    289     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
    290         *xptr++ = start++;
    291         count -= 1;
    292     }
    293     if (count > 3) {
    294         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
    295         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
    296         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
    297         start += count & ~3;
    298         int qcount = count >> 2;
    299         do {
    300             *xxptr++ = pattern0;
    301             pattern0 += 0x40004;
    302             *xxptr++ = pattern1;
    303             pattern1 += 0x40004;
    304         } while (--qcount != 0);
    305         xptr = reinterpret_cast<uint16_t*>(xxptr);
    306         count &= 3;
    307     }
    308     while (--count >= 0) {
    309         *xptr++ = start++;
    310     }
    311 #else
    312     for (int i = 0; i < count; i++) {
    313         *xptr++ = start++;
    314     }
    315 #endif
    316 }
    317 
    318 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
    319                                    int x, int y) {
    320     SkPoint pt;
    321     s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
    322                SkIntToScalar(y) + SK_ScalarHalf, &pt);
    323     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
    324                            s.fBitmap->height());
    325     *xy += 1;   // bump the ptr
    326     // return our starting X position
    327     return SkScalarToFixed(pt.fX) >> 16;
    328 }
    329 
    330 static void clampx_nofilter_trans(const SkBitmapProcState& s,
    331                                   uint32_t xy[], int count, int x, int y) {
    332     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    333 
    334     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    335     const int width = s.fBitmap->width();
    336     if (1 == width) {
    337         // all of the following X values must be 0
    338         memset(xy, 0, count * sizeof(uint16_t));
    339         return;
    340     }
    341 
    342     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    343     int n;
    344 
    345     // fill before 0 as needed
    346     if (xpos < 0) {
    347         n = -xpos;
    348         if (n > count) {
    349             n = count;
    350         }
    351         memset(xptr, 0, n * sizeof(uint16_t));
    352         count -= n;
    353         if (0 == count) {
    354             return;
    355         }
    356         xptr += n;
    357         xpos = 0;
    358     }
    359 
    360     // fill in 0..width-1 if needed
    361     if (xpos < width) {
    362         n = width - xpos;
    363         if (n > count) {
    364             n = count;
    365         }
    366         fill_sequential(xptr, xpos, n);
    367         count -= n;
    368         if (0 == count) {
    369             return;
    370         }
    371         xptr += n;
    372     }
    373 
    374     // fill the remaining with the max value
    375     sk_memset16(xptr, width - 1, count);
    376 }
    377 
    378 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
    379                                    uint32_t xy[], int count, int x, int y) {
    380     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    381 
    382     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    383     const int width = s.fBitmap->width();
    384     if (1 == width) {
    385         // all of the following X values must be 0
    386         memset(xy, 0, count * sizeof(uint16_t));
    387         return;
    388     }
    389 
    390     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    391     int start = sk_int_mod(xpos, width);
    392     int n = width - start;
    393     if (n > count) {
    394         n = count;
    395     }
    396     fill_sequential(xptr, start, n);
    397     xptr += n;
    398     count -= n;
    399 
    400     while (count >= width) {
    401         fill_sequential(xptr, 0, width);
    402         xptr += width;
    403         count -= width;
    404     }
    405 
    406     if (count > 0) {
    407         fill_sequential(xptr, 0, count);
    408     }
    409 }
    410 
    411 static void fill_backwards(uint16_t xptr[], int pos, int count) {
    412     for (int i = 0; i < count; i++) {
    413         SkASSERT(pos >= 0);
    414         xptr[i] = pos--;
    415     }
    416 }
    417 
    418 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
    419                                    uint32_t xy[], int count, int x, int y) {
    420     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    421 
    422     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    423     const int width = s.fBitmap->width();
    424     if (1 == width) {
    425         // all of the following X values must be 0
    426         memset(xy, 0, count * sizeof(uint16_t));
    427         return;
    428     }
    429 
    430     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    431     // need to know our start, and our initial phase (forward or backward)
    432     bool forward;
    433     int n;
    434     int start = sk_int_mod(xpos, 2 * width);
    435     if (start >= width) {
    436         start = width + ~(start - width);
    437         forward = false;
    438         n = start + 1;  // [start .. 0]
    439     } else {
    440         forward = true;
    441         n = width - start;  // [start .. width)
    442     }
    443     if (n > count) {
    444         n = count;
    445     }
    446     if (forward) {
    447         fill_sequential(xptr, start, n);
    448     } else {
    449         fill_backwards(xptr, start, n);
    450     }
    451     forward = !forward;
    452     xptr += n;
    453     count -= n;
    454 
    455     while (count >= width) {
    456         if (forward) {
    457             fill_sequential(xptr, 0, width);
    458         } else {
    459             fill_backwards(xptr, width - 1, width);
    460         }
    461         forward = !forward;
    462         xptr += width;
    463         count -= width;
    464     }
    465 
    466     if (count > 0) {
    467         if (forward) {
    468             fill_sequential(xptr, 0, count);
    469         } else {
    470             fill_backwards(xptr, width - 1, count);
    471         }
    472     }
    473 }
    474 
    475 ///////////////////////////////////////////////////////////////////////////////
    476 
    477 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
    478 //    test_int_tileprocs();
    479     // check for our special case when there is no scale/affine/perspective
    480     if (trivial_matrix && SkPaint::kNone_FilterLevel == fFilterLevel) {
    481         fIntTileProcY = choose_int_tile_proc(fTileModeY);
    482         switch (fTileModeX) {
    483             case SkShader::kClamp_TileMode:
    484                 return clampx_nofilter_trans;
    485             case SkShader::kRepeat_TileMode:
    486                 return repeatx_nofilter_trans;
    487             case SkShader::kMirror_TileMode:
    488                 return mirrorx_nofilter_trans;
    489         }
    490     }
    491 
    492     int index = 0;
    493     if (fFilterLevel != SkPaint::kNone_FilterLevel) {
    494         index = 1;
    495     }
    496     if (fInvType & SkMatrix::kPerspective_Mask) {
    497         index += 4;
    498     } else if (fInvType & SkMatrix::kAffine_Mask) {
    499         index += 2;
    500     }
    501 
    502     if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
    503         // clamp gets special version of filterOne
    504         fFilterOneX = SK_Fixed1;
    505         fFilterOneY = SK_Fixed1;
    506         return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
    507     }
    508 
    509     // all remaining procs use this form for filterOne
    510     fFilterOneX = SK_Fixed1 / fBitmap->width();
    511     fFilterOneY = SK_Fixed1 / fBitmap->height();
    512 
    513     if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
    514         return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
    515     }
    516 
    517     fTileProcX = choose_tile_proc(fTileModeX);
    518     fTileProcY = choose_tile_proc(fTileModeY);
    519     fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
    520     fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
    521     return GeneralXY_Procs[index];
    522 }
    523