Home | History | Annotate | Download | only in core
      1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
      2  *
      3  * Use of this source code is governed by a BSD-style license that can be
      4  * found in the LICENSE file.
      5  */
      6 
      7 #include "SkBitmapProcState.h"
      8 #include "SkPerspIter.h"
      9 #include "SkShader.h"
     10 #include "SkUtils.h"
     11 #include "SkUtilsArm.h"
     12 
     13 // Helper to ensure that when we shift down, we do it w/o sign-extension
     14 // so the caller doesn't have to manually mask off the top 16 bits
     15 //
     16 static unsigned SK_USHIFT16(unsigned x) {
     17     return x >> 16;
     18 }
     19 
     20 /*  returns 0...(n-1) given any x (positive or negative).
     21 
     22     As an example, if n (which is always positive) is 5...
     23 
     24           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
     25     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
     26  */
     27 static inline int sk_int_mod(int x, int n) {
     28     SkASSERT(n > 0);
     29     if ((unsigned)x >= (unsigned)n) {
     30         if (x < 0) {
     31             x = n + ~(~x % n);
     32         } else {
     33             x = x % n;
     34         }
     35     }
     36     return x;
     37 }
     38 
     39 /*
     40  *  The decal_ functions require that
     41  *  1. dx > 0
     42  *  2. [fx, fx+dx, fx+2dx, fx+3dx, ... fx+(count-1)dx] are all <= maxX
     43  *
     44  *  In addition, we use SkFractionalInt to keep more fractional precision than
     45  *  just SkFixed, so we will abort the decal_ call if dx is very small, since
     46  *  the decal_ function just operates on SkFixed. If that were changed, we could
     47  *  skip the very_small test here.
     48  */
     49 static inline bool can_truncate_to_fixed_for_decal(SkFractionalInt frX,
     50                                                    SkFractionalInt frDx,
     51                                                    int count, unsigned max) {
     52     SkFixed dx = SkFractionalIntToFixed(frDx);
     53 
     54     // if decal_ kept SkFractionalInt precision, this would just be dx <= 0
     55     // I just made up the 1/256. Just don't want to perceive accumulated error
     56     // if we truncate frDx and lose its low bits.
     57     if (dx <= SK_Fixed1 / 256) {
     58         return false;
     59     }
     60 
     61     // We cast to unsigned so we don't have to check for negative values, which
     62     // will now appear as very large positive values, and thus fail our test!
     63     SkFixed fx = SkFractionalIntToFixed(frX);
     64     return (unsigned)SkFixedFloorToInt(fx) <= max &&
     65            (unsigned)SkFixedFloorToInt(fx + dx * (count - 1)) < max;
     66 }
     67 
     68 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     69 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     70 
     71 // Compile neon code paths if needed
     72 #if !SK_ARM_NEON_IS_NONE
     73 
     74 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp
     75 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[];
     76 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[];
     77 
     78 #endif // !SK_ARM_NEON_IS_NONE
     79 
     80 // Compile non-neon code path if needed
     81 #if !SK_ARM_NEON_IS_ALWAYS
     82 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
     83 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
     84 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
     85 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
     86 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
     87 #define CHECK_FOR_DECAL
     88 #include "SkBitmapProcState_matrix.h"
     89 
     90 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
     91 #define TILEX_PROCF(fx, max)    SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1))
     92 #define TILEY_PROCF(fy, max)    SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1))
     93 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     94 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     95 #include "SkBitmapProcState_matrix.h"
     96 #endif
     97 
     98 #define MAKENAME(suffix)        GeneralXY ## suffix
     99 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \
    100                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \
    101                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \
    102                                 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY
    103 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX
    104 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY
    105 #define PREAMBLE_ARG_X          , tileProcX, tileLowBitsProcX
    106 #define PREAMBLE_ARG_Y          , tileProcY, tileLowBitsProcY
    107 #define TILEX_PROCF(fx, max)    SK_USHIFT16(tileProcX(fx) * ((max) + 1))
    108 #define TILEY_PROCF(fy, max)    SK_USHIFT16(tileProcY(fy) * ((max) + 1))
    109 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1)
    110 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1)
    111 #include "SkBitmapProcState_matrix.h"
    112 
    113 static inline U16CPU fixed_clamp(SkFixed x)
    114 {
    115 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
    116     if (x < 0)
    117         x = 0;
    118     if (x >> 16)
    119         x = 0xFFFF;
    120 #else
    121     if (x >> 16)
    122     {
    123 #if 0   // is this faster?
    124         x = (~x >> 31) & 0xFFFF;
    125 #else
    126         if (x < 0)
    127             x = 0;
    128         else
    129             x = 0xFFFF;
    130 #endif
    131     }
    132 #endif
    133     return x;
    134 }
    135 
    136 static inline U16CPU fixed_repeat(SkFixed x)
    137 {
    138     return x & 0xFFFF;
    139 }
    140 
    141 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly.
    142 // See http://code.google.com/p/skia/issues/detail?id=472
    143 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
    144 #pragma optimize("", off)
    145 #endif
    146 
    147 static inline U16CPU fixed_mirror(SkFixed x)
    148 {
    149     SkFixed s = x << 15 >> 31;
    150     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
    151     return (x ^ s) & 0xFFFF;
    152 }
    153 
    154 #if defined(_MSC_VER) && (_MSC_VER >= 1600)
    155 #pragma optimize("", on)
    156 #endif
    157 
    158 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
    159 {
    160     if (SkShader::kClamp_TileMode == m)
    161         return fixed_clamp;
    162     if (SkShader::kRepeat_TileMode == m)
    163         return fixed_repeat;
    164     SkASSERT(SkShader::kMirror_TileMode == m);
    165     return fixed_mirror;
    166 }
    167 
    168 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) {
    169     return (x >> 12) & 0xF;
    170 }
    171 
    172 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) {
    173     return ((x * scale) >> 12) & 0xF;
    174 }
    175 
    176 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) {
    177     if (SkShader::kClamp_TileMode == m) {
    178         return fixed_clamp_lowbits;
    179     } else {
    180         SkASSERT(SkShader::kMirror_TileMode == m ||
    181                  SkShader::kRepeat_TileMode == m);
    182         // mirror and repeat have the same behavior for the low bits.
    183         return fixed_repeat_or_mirrow_lowbits;
    184     }
    185 }
    186 
    187 static inline U16CPU int_clamp(int x, int n) {
    188 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
    189     if (x >= n)
    190         x = n - 1;
    191     if (x < 0)
    192         x = 0;
    193 #else
    194     if ((unsigned)x >= (unsigned)n) {
    195         if (x < 0) {
    196             x = 0;
    197         } else {
    198             x = n - 1;
    199         }
    200     }
    201 #endif
    202     return x;
    203 }
    204 
    205 static inline U16CPU int_repeat(int x, int n) {
    206     return sk_int_mod(x, n);
    207 }
    208 
    209 static inline U16CPU int_mirror(int x, int n) {
    210     x = sk_int_mod(x, 2 * n);
    211     if (x >= n) {
    212         x = n + ~(x - n);
    213     }
    214     return x;
    215 }
    216 
    217 #if 0
    218 static void test_int_tileprocs() {
    219     for (int i = -8; i <= 8; i++) {
    220         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
    221     }
    222 }
    223 #endif
    224 
    225 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
    226     if (SkShader::kClamp_TileMode == tm)
    227         return int_clamp;
    228     if (SkShader::kRepeat_TileMode == tm)
    229         return int_repeat;
    230     SkASSERT(SkShader::kMirror_TileMode == tm);
    231     return int_mirror;
    232 }
    233 
    234 //////////////////////////////////////////////////////////////////////////////
    235 
    236 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
    237 {
    238     int i;
    239 
    240     for (i = (count >> 2); i > 0; --i)
    241     {
    242         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    243         fx += dx+dx;
    244         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    245         fx += dx+dx;
    246     }
    247     count &= 3;
    248 
    249     uint16_t* xx = (uint16_t*)dst;
    250     for (i = count; i > 0; --i) {
    251         *xx++ = SkToU16(fx >> 16); fx += dx;
    252     }
    253 }
    254 
    255 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
    256 {
    257 
    258 
    259     if (count & 1)
    260     {
    261         SkASSERT((fx >> (16 + 14)) == 0);
    262         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    263         fx += dx;
    264     }
    265     while ((count -= 2) >= 0)
    266     {
    267         SkASSERT((fx >> (16 + 14)) == 0);
    268         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    269         fx += dx;
    270 
    271         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    272         fx += dx;
    273     }
    274 }
    275 
    276 ///////////////////////////////////////////////////////////////////////////////
    277 // stores the same as SCALE, but is cheaper to compute. Also since there is no
    278 // scale, we don't need/have a FILTER version
    279 
    280 static void fill_sequential(uint16_t xptr[], int start, int count) {
    281 #if 1
    282     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
    283         *xptr++ = start++;
    284         count -= 1;
    285     }
    286     if (count > 3) {
    287         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
    288         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
    289         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
    290         start += count & ~3;
    291         int qcount = count >> 2;
    292         do {
    293             *xxptr++ = pattern0;
    294             pattern0 += 0x40004;
    295             *xxptr++ = pattern1;
    296             pattern1 += 0x40004;
    297         } while (--qcount != 0);
    298         xptr = reinterpret_cast<uint16_t*>(xxptr);
    299         count &= 3;
    300     }
    301     while (--count >= 0) {
    302         *xptr++ = start++;
    303     }
    304 #else
    305     for (int i = 0; i < count; i++) {
    306         *xptr++ = start++;
    307     }
    308 #endif
    309 }
    310 
    311 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
    312                                    int x, int y) {
    313     SkPoint pt;
    314     s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
    315                SkIntToScalar(y) + SK_ScalarHalf, &pt);
    316     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
    317                            s.fBitmap->height());
    318     *xy += 1;   // bump the ptr
    319     // return our starting X position
    320     return SkScalarToFixed(pt.fX) >> 16;
    321 }
    322 
    323 static void clampx_nofilter_trans(const SkBitmapProcState& s,
    324                                   uint32_t xy[], int count, int x, int y) {
    325     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    326 
    327     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    328     const int width = s.fBitmap->width();
    329     if (1 == width) {
    330         // all of the following X values must be 0
    331         memset(xy, 0, count * sizeof(uint16_t));
    332         return;
    333     }
    334 
    335     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    336     int n;
    337 
    338     // fill before 0 as needed
    339     if (xpos < 0) {
    340         n = -xpos;
    341         if (n > count) {
    342             n = count;
    343         }
    344         memset(xptr, 0, n * sizeof(uint16_t));
    345         count -= n;
    346         if (0 == count) {
    347             return;
    348         }
    349         xptr += n;
    350         xpos = 0;
    351     }
    352 
    353     // fill in 0..width-1 if needed
    354     if (xpos < width) {
    355         n = width - xpos;
    356         if (n > count) {
    357             n = count;
    358         }
    359         fill_sequential(xptr, xpos, n);
    360         count -= n;
    361         if (0 == count) {
    362             return;
    363         }
    364         xptr += n;
    365     }
    366 
    367     // fill the remaining with the max value
    368     sk_memset16(xptr, width - 1, count);
    369 }
    370 
    371 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
    372                                    uint32_t xy[], int count, int x, int y) {
    373     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    374 
    375     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    376     const int width = s.fBitmap->width();
    377     if (1 == width) {
    378         // all of the following X values must be 0
    379         memset(xy, 0, count * sizeof(uint16_t));
    380         return;
    381     }
    382 
    383     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    384     int start = sk_int_mod(xpos, width);
    385     int n = width - start;
    386     if (n > count) {
    387         n = count;
    388     }
    389     fill_sequential(xptr, start, n);
    390     xptr += n;
    391     count -= n;
    392 
    393     while (count >= width) {
    394         fill_sequential(xptr, 0, width);
    395         xptr += width;
    396         count -= width;
    397     }
    398 
    399     if (count > 0) {
    400         fill_sequential(xptr, 0, count);
    401     }
    402 }
    403 
    404 static void fill_backwards(uint16_t xptr[], int pos, int count) {
    405     for (int i = 0; i < count; i++) {
    406         SkASSERT(pos >= 0);
    407         xptr[i] = pos--;
    408     }
    409 }
    410 
    411 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
    412                                    uint32_t xy[], int count, int x, int y) {
    413     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    414 
    415     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    416     const int width = s.fBitmap->width();
    417     if (1 == width) {
    418         // all of the following X values must be 0
    419         memset(xy, 0, count * sizeof(uint16_t));
    420         return;
    421     }
    422 
    423     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    424     // need to know our start, and our initial phase (forward or backward)
    425     bool forward;
    426     int n;
    427     int start = sk_int_mod(xpos, 2 * width);
    428     if (start >= width) {
    429         start = width + ~(start - width);
    430         forward = false;
    431         n = start + 1;  // [start .. 0]
    432     } else {
    433         forward = true;
    434         n = width - start;  // [start .. width)
    435     }
    436     if (n > count) {
    437         n = count;
    438     }
    439     if (forward) {
    440         fill_sequential(xptr, start, n);
    441     } else {
    442         fill_backwards(xptr, start, n);
    443     }
    444     forward = !forward;
    445     xptr += n;
    446     count -= n;
    447 
    448     while (count >= width) {
    449         if (forward) {
    450             fill_sequential(xptr, 0, width);
    451         } else {
    452             fill_backwards(xptr, width - 1, width);
    453         }
    454         forward = !forward;
    455         xptr += width;
    456         count -= width;
    457     }
    458 
    459     if (count > 0) {
    460         if (forward) {
    461             fill_sequential(xptr, 0, count);
    462         } else {
    463             fill_backwards(xptr, width - 1, count);
    464         }
    465     }
    466 }
    467 
    468 ///////////////////////////////////////////////////////////////////////////////
    469 
    470 SkBitmapProcState::MatrixProc
    471 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
    472 //    test_int_tileprocs();
    473     // check for our special case when there is no scale/affine/perspective
    474     if (trivial_matrix) {
    475         SkASSERT(!fDoFilter);
    476         fIntTileProcY = choose_int_tile_proc(fTileModeY);
    477         switch (fTileModeX) {
    478             case SkShader::kClamp_TileMode:
    479                 return clampx_nofilter_trans;
    480             case SkShader::kRepeat_TileMode:
    481                 return repeatx_nofilter_trans;
    482             case SkShader::kMirror_TileMode:
    483                 return mirrorx_nofilter_trans;
    484         }
    485     }
    486 
    487     int index = 0;
    488     if (fDoFilter) {
    489         index = 1;
    490     }
    491     if (fInvType & SkMatrix::kPerspective_Mask) {
    492         index += 4;
    493     } else if (fInvType & SkMatrix::kAffine_Mask) {
    494         index += 2;
    495     }
    496 
    497     if (SkShader::kClamp_TileMode == fTileModeX &&
    498         SkShader::kClamp_TileMode == fTileModeY)
    499     {
    500         // clamp gets special version of filterOne
    501         fFilterOneX = SK_Fixed1;
    502         fFilterOneY = SK_Fixed1;
    503         return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index];
    504     }
    505 
    506     // all remaining procs use this form for filterOne
    507     fFilterOneX = SK_Fixed1 / fBitmap->width();
    508     fFilterOneY = SK_Fixed1 / fBitmap->height();
    509 
    510     if (SkShader::kRepeat_TileMode == fTileModeX &&
    511         SkShader::kRepeat_TileMode == fTileModeY)
    512     {
    513         return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index];
    514     }
    515 
    516     fTileProcX = choose_tile_proc(fTileModeX);
    517     fTileProcY = choose_tile_proc(fTileModeY);
    518     fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX);
    519     fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY);
    520     return GeneralXY_Procs[index];
    521 }
    522