Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2008 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 // The copyright below was added in 2009, but I see no record of moto contributions...?
      9 
     10 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
     11  *
     12  * Use of this source code is governed by a BSD-style license that can be
     13  * found in the LICENSE file.
     14  */
     15 
     16 #include "SkBitmapProcState.h"
     17 #include "SkPerspIter.h"
     18 #include "SkShader.h"
     19 #include "SkUtils.h"
     20 #include "SkUtilsArm.h"
     21 #include "SkBitmapProcState_utils.h"
     22 
     23 /*  returns 0...(n-1) given any x (positive or negative).
     24 
     25     As an example, if n (which is always positive) is 5...
     26 
     27           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
     28     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
     29  */
     30 static inline int sk_int_mod(int x, int n) {
     31     SkASSERT(n > 0);
     32     if ((unsigned)x >= (unsigned)n) {
     33         if (x < 0) {
     34             x = n + ~(~x % n);
     35         } else {
     36             x = x % n;
     37         }
     38     }
     39     return x;
     40 }
     41 
     42 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     43 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     44 
     45 #include "SkBitmapProcState_matrix_template.h"
     46 
     47 ///////////////////////////////////////////////////////////////////////////////
     48 
     49 // Compile neon code paths if needed
     50 #if !SK_ARM_NEON_IS_NONE
     51 
     52 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
     53 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
     54 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
     55 
     56 #endif // !SK_ARM_NEON_IS_NONE
     57 
     58 // Compile non-neon code path if needed
     59 #if !SK_ARM_NEON_IS_ALWAYS
     60 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
     61 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
     62 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
     63 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
     64 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
     65 #define CHECK_FOR_DECAL
     66 #include "SkBitmapProcState_matrix.h"
     67 
     68 struct ClampTileProcs {
     69     static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
     70         return SkClampMax(fx >> 16, max);
     71     }
     72     static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
     73         return SkClampMax(fy >> 16, max);
     74     }
     75 };
     76 
     77 // Referenced in opts_check_x86.cpp
     78 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[],
     79                                   int count, int x, int y) {
     80     return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y);
     81 }
     82 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[],
     83                                   int count, int x, int y) {
     84     return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y);
     85 }
     86 
     87 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = {
     88     // only clamp lives in the right coord space to check for decal
     89     ClampX_ClampY_nofilter_scale,
     90     ClampX_ClampY_filter_scale,
     91     ClampX_ClampY_nofilter_affine,
     92     ClampX_ClampY_filter_affine,
     93     NoFilterProc_Persp<ClampTileProcs>,
     94     ClampX_ClampY_filter_persp
     95 };
     96 
     97 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
     98 #define TILEX_PROCF(fx, max)    SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1))
     99 #define TILEY_PROCF(fy, max)    SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1))
    100 #define TILEX_LOW_BITS(fx, max) (((unsigned)((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
    101 #define TILEY_LOW_BITS(fy, max) (((unsigned)((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
    102 #include "SkBitmapProcState_matrix.h"
    103 
    104 struct RepeatTileProcs {
    105     static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) {
    106         SkASSERT(max < 65535);
    107         return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1));
    108     }
    109     static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) {
    110         SkASSERT(max < 65535);
    111         return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1));
    112     }
    113 };
    114 
    115 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = {
    116     NoFilterProc_Scale<RepeatTileProcs, false>,
    117     RepeatX_RepeatY_filter_scale,
    118     NoFilterProc_Affine<RepeatTileProcs>,
    119     RepeatX_RepeatY_filter_affine,
    120     NoFilterProc_Persp<RepeatTileProcs>,
    121     RepeatX_RepeatY_filter_persp
    122 };
    123 #endif
    124 
    125 #define MAKENAME(suffix)        GeneralXY ## suffix
    126 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
    127                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
    128                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
    129                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
    130 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
    131 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
    132 #define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
    133 #define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
    134 #define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
    135 #define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
    136 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
    137 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
    138 #include "SkBitmapProcState_matrix.h"
    139 
    140 struct GeneralTileProcs {
    141     static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) {
    142         return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1));
    143     }
    144     static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) {
    145         return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1));
    146     }
    147 };
    148 
    149 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = {
    150     NoFilterProc_Scale<GeneralTileProcs, false>,
    151     GeneralXY_filter_scale,
    152     NoFilterProc_Affine<GeneralTileProcs>,
    153     GeneralXY_filter_affine,
    154     NoFilterProc_Persp<GeneralTileProcs>,
    155     GeneralXY_filter_persp
    156 };
    157 
    158 ///////////////////////////////////////////////////////////////////////////////
    159 
    160 static inline U16CPU fixed_clamp(SkFixed x) {
    161     if (x < 0) {
    162         x = 0;
    163     }
    164     if (x >> 16) {
    165         x = 0xFFFF;
    166     }
    167     return x;
    168 }
    169 
    170 static inline U16CPU fixed_repeat(SkFixed x) {
    171     return x & 0xFFFF;
    172 }
    173 
    174 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
    175 // See http://code.google.com/p/skia/issues/detail?id=472
    176 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
    177 #pragma optimize("", off)
    178 #endif
    179 
    180 static inline U16CPU fixed_mirror(SkFixed x) {
    181     SkFixed s = SkLeftShift(x, 15) >> 31;
    182     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
    183     return (x ^ s) & 0xFFFF;
    184 }
    185 
    186 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
    187 #pragma optimize("", on)
    188 #endif
    189 
    190 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) {
    191     if (SkShader::kClamp_TileMode == m) {
    192         return fixed_clamp;
    193     }
    194     if (SkShader::kRepeat_TileMode == m) {
    195         return fixed_repeat;
    196     }
    197     SkASSERT(SkShader::kMirror_TileMode == m);
    198     return fixed_mirror;
    199 }
    200 
    201 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
    202     return (x >> 12) & 0xF;
    203 }
    204 
    205 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
    206     return ((x * scale) >> 12) & 0xF;
    207 }
    208 
    209 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
    210     if (SkShader::kClamp_TileMode == m) {
    211         return fixed_clamp_lowbits;
    212     } else {
    213         SkASSERT(SkShader::kMirror_TileMode == m ||
    214                  SkShader::kRepeat_TileMode == m);
    215         // mirror and repeat have the same behavior for the low bits.
    216         return fixed_repeat_or_mirrow_lowbits;
    217     }
    218 }
    219 
    220 static inline U16CPU int_clamp(int x, int n) {
    221     if (x >= n) {
    222         x = n - 1;
    223     }
    224     if (x < 0) {
    225         x = 0;
    226     }
    227     return x;
    228 }
    229 
    230 static inline U16CPU int_repeat(int x, int n) {
    231     return sk_int_mod(x, n);
    232 }
    233 
    234 static inline U16CPU int_mirror(int x, int n) {
    235     x = sk_int_mod(x, 2 * n);
    236     if (x >= n) {
    237         x = n + ~(x - n);
    238     }
    239     return x;
    240 }
    241 
    242 #if 0
    243 static void test_int_tileprocs() {
    244     for (int i = -8; i <= 8; i++) {
    245         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
    246     }
    247 }
    248 #endif
    249 
    250 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
    251     if (SkShader::kClamp_TileMode == tm)
    252         return int_clamp;
    253     if (SkShader::kRepeat_TileMode == tm)
    254         return int_repeat;
    255     SkASSERT(SkShader::kMirror_TileMode == tm);
    256     return int_mirror;
    257 }
    258 
    259 //////////////////////////////////////////////////////////////////////////////
    260 
    261 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
    262     int i;
    263 
    264     for (i = (count >> 2); i > 0; --i) {
    265         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    266         fx += dx+dx;
    267         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    268         fx += dx+dx;
    269     }
    270     count &= 3;
    271 
    272     uint16_t* xx = (uint16_t*)dst;
    273     for (i = count; i > 0; --i) {
    274         *xx++ = SkToU16(fx >> 16); fx += dx;
    275     }
    276 }
    277 
    278 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) {
    279     if (count & 1) {
    280         SkASSERT((fx >> (16 + 14)) == 0);
    281         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    282         fx += dx;
    283     }
    284     while ((count -= 2) >= 0) {
    285         SkASSERT((fx >> (16 + 14)) == 0);
    286         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    287         fx += dx;
    288 
    289         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    290         fx += dx;
    291     }
    292 }
    293 
    294 ///////////////////////////////////////////////////////////////////////////////
    295 // stores the same as SCALE, but is cheaper to compute. Also since there is no
    296 // scale, we don't need/have a FILTER version
    297 
    298 static void fill_sequential(uint16_t xptr[], int start, int count) {
    299 #if 1
    300     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
    301         *xptr++ = start++;
    302         count -= 1;
    303     }
    304     if (count > 3) {
    305         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
    306         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
    307         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
    308         start += count & ~3;
    309         int qcount = count >> 2;
    310         do {
    311             *xxptr++ = pattern0;
    312             pattern0 += 0x40004;
    313             *xxptr++ = pattern1;
    314             pattern1 += 0x40004;
    315         } while (--qcount != 0);
    316         xptr = reinterpret_cast<uint16_t*>(xxptr);
    317         count &= 3;
    318     }
    319     while (--count >= 0) {
    320         *xptr++ = start++;
    321     }
    322 #else
    323     for (int i = 0; i < count; i++) {
    324         *xptr++ = start++;
    325     }
    326 #endif
    327 }
    328 
    329 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
    330                                    int x, int y) {
    331     const SkBitmapProcStateAutoMapper mapper(s, x, y);
    332     **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height());
    333     *xy += 1;   // bump the ptr
    334     // return our starting X position
    335     return mapper.intX();
    336 }
    337 
    338 static void clampx_nofilter_trans(const SkBitmapProcState& s,
    339                                   uint32_t xy[], int count, int x, int y) {
    340     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    341 
    342     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    343     const int width = s.fPixmap.width();
    344     if (1 == width) {
    345         // all of the following X values must be 0
    346         memset(xy, 0, count * sizeof(uint16_t));
    347         return;
    348     }
    349 
    350     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    351     int n;
    352 
    353     // fill before 0 as needed
    354     if (xpos < 0) {
    355         n = -xpos;
    356         if (n > count) {
    357             n = count;
    358         }
    359         memset(xptr, 0, n * sizeof(uint16_t));
    360         count -= n;
    361         if (0 == count) {
    362             return;
    363         }
    364         xptr += n;
    365         xpos = 0;
    366     }
    367 
    368     // fill in 0..width-1 if needed
    369     if (xpos < width) {
    370         n = width - xpos;
    371         if (n > count) {
    372             n = count;
    373         }
    374         fill_sequential(xptr, xpos, n);
    375         count -= n;
    376         if (0 == count) {
    377             return;
    378         }
    379         xptr += n;
    380     }
    381 
    382     // fill the remaining with the max value
    383     sk_memset16(xptr, width - 1, count);
    384 }
    385 
    386 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
    387                                    uint32_t xy[], int count, int x, int y) {
    388     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    389 
    390     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    391     const int width = s.fPixmap.width();
    392     if (1 == width) {
    393         // all of the following X values must be 0
    394         memset(xy, 0, count * sizeof(uint16_t));
    395         return;
    396     }
    397 
    398     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    399     int start = sk_int_mod(xpos, width);
    400     int n = width - start;
    401     if (n > count) {
    402         n = count;
    403     }
    404     fill_sequential(xptr, start, n);
    405     xptr += n;
    406     count -= n;
    407 
    408     while (count >= width) {
    409         fill_sequential(xptr, 0, width);
    410         xptr += width;
    411         count -= width;
    412     }
    413 
    414     if (count > 0) {
    415         fill_sequential(xptr, 0, count);
    416     }
    417 }
    418 
    419 static void fill_backwards(uint16_t xptr[], int pos, int count) {
    420     for (int i = 0; i < count; i++) {
    421         SkASSERT(pos >= 0);
    422         xptr[i] = pos--;
    423     }
    424 }
    425 
    426 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
    427                                    uint32_t xy[], int count, int x, int y) {
    428     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    429 
    430     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    431     const int width = s.fPixmap.width();
    432     if (1 == width) {
    433         // all of the following X values must be 0
    434         memset(xy, 0, count * sizeof(uint16_t));
    435         return;
    436     }
    437 
    438     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    439     // need to know our start, and our initial phase (forward or backward)
    440     bool forward;
    441     int n;
    442     int start = sk_int_mod(xpos, 2 * width);
    443     if (start >= width) {
    444         start = width + ~(start - width);
    445         forward = false;
    446         n = start + 1;  // [start .. 0]
    447     } else {
    448         forward = true;
    449         n = width - start;  // [start .. width)
    450     }
    451     if (n > count) {
    452         n = count;
    453     }
    454     if (forward) {
    455         fill_sequential(xptr, start, n);
    456     } else {
    457         fill_backwards(xptr, start, n);
    458     }
    459     forward = !forward;
    460     xptr += n;
    461     count -= n;
    462 
    463     while (count >= width) {
    464         if (forward) {
    465             fill_sequential(xptr, 0, width);
    466         } else {
    467             fill_backwards(xptr, width - 1, width);
    468         }
    469         forward = !forward;
    470         xptr += width;
    471         count -= width;
    472     }
    473 
    474     if (count > 0) {
    475         if (forward) {
    476             fill_sequential(xptr, 0, count);
    477         } else {
    478             fill_backwards(xptr, width - 1, count);
    479         }
    480     }
    481 }
    482 
    483 ///////////////////////////////////////////////////////////////////////////////
    484 
    485 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
    486 //    test_int_tileprocs();
    487     // check for our special case when there is no scale/affine/perspective
    488     if (trivial_matrix && kNone_SkFilterQuality == fFilterLevel) {
    489         fIntTileProcY = choose_int_tile_proc(fTileModeY);
    490         switch (fTileModeX) {
    491             case SkShader::kClamp_TileMode:
    492                 return clampx_nofilter_trans;
    493             case SkShader::kRepeat_TileMode:
    494                 return repeatx_nofilter_trans;
    495             case SkShader::kMirror_TileMode:
    496                 return mirrorx_nofilter_trans;
    497         }
    498     }
    499 
    500     int index = 0;
    501     if (fFilterLevel != kNone_SkFilterQuality) {
    502         index = 1;
    503     }
    504     if (fInvType & SkMatrix::kPerspective_Mask) {
    505         index += 4;
    506     } else if (fInvType & SkMatrix::kAffine_Mask) {
    507         index += 2;
    508     }
    509 
    510     if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) {
    511         // clamp gets special version of filterOne
    512         fFilterOneX = SK_Fixed1;
    513         fFilterOneY = SK_Fixed1;
    514         return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
    515     }
    516 
    517     // all remaining procs use this form for filterOne
    518     fFilterOneX = SK_Fixed1 / fPixmap.width();
    519     fFilterOneY = SK_Fixed1 / fPixmap.height();
    520 
    521     if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) {
    522         return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
    523     }
    524 
    525     fTileProcX = choose_tile_proc(fTileModeX);
    526     fTileProcY = choose_tile_proc(fTileModeY);
    527     fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
    528     fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
    529     return GeneralXY_Procs[index];
    530 }
    531