Home | History | Annotate | Download | only in core
      1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola */
      2 
      3 #include "SkBitmapProcState.h"
      4 #include "SkPerspIter.h"
      5 #include "SkShader.h"
      6 #include "SkUtils.h"
      7 
      8 /*  returns 0...(n-1) given any x (positive or negative).
      9 
     10     As an example, if n (which is always positive) is 5...
     11 
     12           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
     13     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
     14  */
     15 static inline int sk_int_mod(int x, int n) {
     16     SkASSERT(n > 0);
     17     if ((unsigned)x >= (unsigned)n) {
     18         if (x < 0) {
     19             x = n + ~(~x % n);
     20         } else {
     21             x = x % n;
     22         }
     23     }
     24     return x;
     25 }
     26 
     27 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     28 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     29 
     30 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
     31 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
     32 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
     33 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
     34 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
     35 #define CHECK_FOR_DECAL
     36 #if	defined(__ARM_HAVE_NEON)
     37     #include "SkBitmapProcState_matrix_clamp.h"
     38 #else
     39     #include "SkBitmapProcState_matrix.h"
     40 #endif
     41 
     42 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
     43 #define TILEX_PROCF(fx, max)    (((fx) & 0xFFFF) * ((max) + 1) >> 16)
     44 #define TILEY_PROCF(fy, max)    (((fy) & 0xFFFF) * ((max) + 1) >> 16)
     45 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     46 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     47 #if	defined(__ARM_HAVE_NEON)
     48     #include "SkBitmapProcState_matrix_repeat.h"
     49 #else
     50     #include "SkBitmapProcState_matrix.h"
     51 #endif
     52 
     53 #define MAKENAME(suffix)        GeneralXY ## suffix
     54 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; \
     55                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY
     56 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX
     57 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY
     58 #define PREAMBLE_ARG_X          , tileProcX
     59 #define PREAMBLE_ARG_Y          , tileProcY
     60 #define TILEX_PROCF(fx, max)    (tileProcX(fx) * ((max) + 1) >> 16)
     61 #define TILEY_PROCF(fy, max)    (tileProcY(fy) * ((max) + 1) >> 16)
     62 #define TILEX_LOW_BITS(fx, max) ((tileProcX(fx) * ((max) + 1) >> 12) & 0xF)
     63 #define TILEY_LOW_BITS(fy, max) ((tileProcY(fy) * ((max) + 1) >> 12) & 0xF)
     64 #include "SkBitmapProcState_matrix.h"
     65 
     66 static inline U16CPU fixed_clamp(SkFixed x)
     67 {
     68 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
     69     if (x >> 16)
     70         x = 0xFFFF;
     71     if (x < 0)
     72         x = 0;
     73 #else
     74     if (x >> 16)
     75     {
     76         if (x < 0)
     77             x = 0;
     78         else
     79             x = 0xFFFF;
     80     }
     81 #endif
     82     return x;
     83 }
     84 
     85 static inline U16CPU fixed_repeat(SkFixed x)
     86 {
     87     return x & 0xFFFF;
     88 }
     89 
     90 static inline U16CPU fixed_mirror(SkFixed x)
     91 {
     92     SkFixed s = x << 15 >> 31;
     93     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
     94     return (x ^ s) & 0xFFFF;
     95 }
     96 
     97 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
     98 {
     99     if (SkShader::kClamp_TileMode == m)
    100         return fixed_clamp;
    101     if (SkShader::kRepeat_TileMode == m)
    102         return fixed_repeat;
    103     SkASSERT(SkShader::kMirror_TileMode == m);
    104     return fixed_mirror;
    105 }
    106 
    107 static inline U16CPU int_clamp(int x, int n) {
    108 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
    109     if (x >= n)
    110         x = n - 1;
    111     if (x < 0)
    112         x = 0;
    113 #else
    114     if ((unsigned)x >= (unsigned)n) {
    115         if (x < 0) {
    116             x = 0;
    117         } else {
    118             x = n - 1;
    119         }
    120     }
    121 #endif
    122     return x;
    123 }
    124 
    125 static inline U16CPU int_repeat(int x, int n) {
    126     return sk_int_mod(x, n);
    127 }
    128 
    129 static inline U16CPU int_mirror(int x, int n) {
    130     x = sk_int_mod(x, 2 * n);
    131     if (x >= n) {
    132         x = n + ~(x - n);
    133     }
    134     return x;
    135 }
    136 
    137 #if 0
    138 static void test_int_tileprocs() {
    139     for (int i = -8; i <= 8; i++) {
    140         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
    141     }
    142 }
    143 #endif
    144 
    145 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
    146     if (SkShader::kClamp_TileMode == tm)
    147         return int_clamp;
    148     if (SkShader::kRepeat_TileMode == tm)
    149         return int_repeat;
    150     SkASSERT(SkShader::kMirror_TileMode == tm);
    151     return int_mirror;
    152 }
    153 
    154 //////////////////////////////////////////////////////////////////////////////
    155 
    156 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
    157 {
    158     int i;
    159 
    160 #if	defined(__ARM_HAVE_NEON)
    161     if (count >= 8) {
    162         /* SkFixed is 16.16 fixed point */
    163         SkFixed dx2 = dx+dx;
    164         SkFixed dx4 = dx2+dx2;
    165         SkFixed dx8 = dx4+dx4;
    166 
    167         /* now build fx/fx+dx/fx+2dx/fx+3dx */
    168         SkFixed fx1, fx2, fx3;
    169         int32x2_t lower, upper;
    170         int32x4_t lbase, hbase;
    171         uint16_t *dst16 = (uint16_t *)dst;
    172 
    173         fx1 = fx+dx;
    174         fx2 = fx1+dx;
    175         fx3 = fx2+dx;
    176 
    177         /* avoid an 'lbase unitialized' warning */
    178         lbase = vdupq_n_s32(fx);
    179         lbase = vsetq_lane_s32(fx1, lbase, 1);
    180         lbase = vsetq_lane_s32(fx2, lbase, 2);
    181         lbase = vsetq_lane_s32(fx3, lbase, 3);
    182         hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));
    183 
    184         /* take upper 16 of each, store, and bump everything */
    185         do {
    186             int32x4_t lout, hout;
    187             uint16x8_t hi16;
    188 
    189             lout = lbase;
    190             hout = hbase;
    191             /* gets hi's of all louts then hi's of all houts */
    192             asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout));
    193             hi16 = vreinterpretq_u16_s32(hout);
    194             vst1q_u16(dst16, hi16);
    195 
    196             /* on to the next */
    197             lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
    198             hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
    199             dst16 += 8;
    200             count -= 8;
    201             fx += dx8;
    202         } while (count >= 8);
    203         dst = (uint32_t *) dst16;
    204     }
    205 #else
    206     for (i = (count >> 2); i > 0; --i)
    207     {
    208         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    209         fx += dx+dx;
    210         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    211         fx += dx+dx;
    212     }
    213     count &= 3;
    214 #endif
    215 
    216     uint16_t* xx = (uint16_t*)dst;
    217     for (i = count; i > 0; --i) {
    218         *xx++ = SkToU16(fx >> 16); fx += dx;
    219     }
    220 }
    221 
    222 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
    223 {
    224 
    225 #if	defined(__ARM_HAVE_NEON)
    226     if (count >= 8) {
    227         int32x4_t wide_fx;
    228         int32x4_t wide_fx2;
    229         int32x4_t wide_dx8 = vdupq_n_s32(dx*8);
    230 
    231         wide_fx = vdupq_n_s32(fx);
    232         wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
    233         wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
    234         wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
    235 
    236         wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx));
    237 
    238         while (count >= 8) {
    239             int32x4_t wide_out;
    240             int32x4_t wide_out2;
    241 
    242             wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14);
    243             wide_out = vorrq_s32(wide_out,
    244             vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1)));
    245 
    246             wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14);
    247             wide_out2 = vorrq_s32(wide_out2,
    248             vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1)));
    249 
    250             vst1q_u32(dst, vreinterpretq_u32_s32(wide_out));
    251             vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2));
    252 
    253             dst += 8;
    254             fx += dx*8;
    255             wide_fx = vaddq_s32(wide_fx, wide_dx8);
    256             wide_fx2 = vaddq_s32(wide_fx2, wide_dx8);
    257             count -= 8;
    258         }
    259     }
    260 #endif
    261 
    262     if (count & 1)
    263     {
    264         SkASSERT((fx >> (16 + 14)) == 0);
    265         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    266         fx += dx;
    267     }
    268     while ((count -= 2) >= 0)
    269     {
    270         SkASSERT((fx >> (16 + 14)) == 0);
    271         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    272         fx += dx;
    273 
    274         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    275         fx += dx;
    276     }
    277 }
    278 
    279 ///////////////////////////////////////////////////////////////////////////////
    280 // stores the same as SCALE, but is cheaper to compute. Also since there is no
    281 // scale, we don't need/have a FILTER version
    282 
    283 static void fill_sequential(uint16_t xptr[], int start, int count) {
    284 #if 1
    285     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
    286         *xptr++ = start++;
    287         count -= 1;
    288     }
    289     if (count > 3) {
    290         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
    291         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
    292         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
    293         start += count & ~3;
    294         int qcount = count >> 2;
    295         do {
    296             *xxptr++ = pattern0;
    297             pattern0 += 0x40004;
    298             *xxptr++ = pattern1;
    299             pattern1 += 0x40004;
    300         } while (--qcount != 0);
    301         xptr = reinterpret_cast<uint16_t*>(xxptr);
    302         count &= 3;
    303     }
    304     while (--count >= 0) {
    305         *xptr++ = start++;
    306     }
    307 #else
    308     for (int i = 0; i < count; i++) {
    309         *xptr++ = start++;
    310     }
    311 #endif
    312 }
    313 
    314 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
    315                                    int x, int y) {
    316     SkPoint pt;
    317     s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
    318                SkIntToScalar(y) + SK_ScalarHalf, &pt);
    319     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
    320                            s.fBitmap->height());
    321     *xy += 1;   // bump the ptr
    322     // return our starting X position
    323     return SkScalarToFixed(pt.fX) >> 16;
    324 }
    325 
    326 static void clampx_nofilter_trans(const SkBitmapProcState& s,
    327                                   uint32_t xy[], int count, int x, int y) {
    328     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    329 
    330     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    331     const int width = s.fBitmap->width();
    332     if (1 == width) {
    333         // all of the following X values must be 0
    334         memset(xy, 0, count * sizeof(uint16_t));
    335         return;
    336     }
    337 
    338     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    339     int n;
    340 
    341     // fill before 0 as needed
    342     if (xpos < 0) {
    343         n = -xpos;
    344         if (n > count) {
    345             n = count;
    346         }
    347         memset(xptr, 0, n * sizeof(uint16_t));
    348         count -= n;
    349         if (0 == count) {
    350             return;
    351         }
    352         xptr += n;
    353         xpos = 0;
    354     }
    355 
    356     // fill in 0..width-1 if needed
    357     if (xpos < width) {
    358         n = width - xpos;
    359         if (n > count) {
    360             n = count;
    361         }
    362         fill_sequential(xptr, xpos, n);
    363         count -= n;
    364         if (0 == count) {
    365             return;
    366         }
    367         xptr += n;
    368     }
    369 
    370     // fill the remaining with the max value
    371     sk_memset16(xptr, width - 1, count);
    372 }
    373 
    374 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
    375                                    uint32_t xy[], int count, int x, int y) {
    376     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    377 
    378     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    379     const int width = s.fBitmap->width();
    380     if (1 == width) {
    381         // all of the following X values must be 0
    382         memset(xy, 0, count * sizeof(uint16_t));
    383         return;
    384     }
    385 
    386     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    387     int start = sk_int_mod(xpos, width);
    388     int n = width - start;
    389     if (n > count) {
    390         n = count;
    391     }
    392     fill_sequential(xptr, start, n);
    393     xptr += n;
    394     count -= n;
    395 
    396     while (count >= width) {
    397         fill_sequential(xptr, 0, width);
    398         xptr += width;
    399         count -= width;
    400     }
    401 
    402     if (count > 0) {
    403         fill_sequential(xptr, 0, count);
    404     }
    405 }
    406 
    407 static void fill_backwards(uint16_t xptr[], int pos, int count) {
    408     for (int i = 0; i < count; i++) {
    409         SkASSERT(pos >= 0);
    410         xptr[i] = pos--;
    411     }
    412 }
    413 
    414 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
    415                                    uint32_t xy[], int count, int x, int y) {
    416     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    417 
    418     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    419     const int width = s.fBitmap->width();
    420     if (1 == width) {
    421         // all of the following X values must be 0
    422         memset(xy, 0, count * sizeof(uint16_t));
    423         return;
    424     }
    425 
    426     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    427     // need to know our start, and our initial phase (forward or backward)
    428     bool forward;
    429     int n;
    430     int start = sk_int_mod(xpos, 2 * width);
    431     if (start >= width) {
    432         start = width + ~(start - width);
    433         forward = false;
    434         n = start + 1;  // [start .. 0]
    435     } else {
    436         forward = true;
    437         n = width - start;  // [start .. width)
    438     }
    439     if (n > count) {
    440         n = count;
    441     }
    442     if (forward) {
    443         fill_sequential(xptr, start, n);
    444     } else {
    445         fill_backwards(xptr, start, n);
    446     }
    447     forward = !forward;
    448     xptr += n;
    449     count -= n;
    450 
    451     while (count >= width) {
    452         if (forward) {
    453             fill_sequential(xptr, 0, width);
    454         } else {
    455             fill_backwards(xptr, width - 1, width);
    456         }
    457         forward = !forward;
    458         xptr += width;
    459         count -= width;
    460     }
    461 
    462     if (count > 0) {
    463         if (forward) {
    464             fill_sequential(xptr, 0, count);
    465         } else {
    466             fill_backwards(xptr, width - 1, count);
    467         }
    468     }
    469 }
    470 
    471 ///////////////////////////////////////////////////////////////////////////////
    472 
    473 SkBitmapProcState::MatrixProc
    474 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
    475 //    test_int_tileprocs();
    476     // check for our special case when there is no scale/affine/perspective
    477     if (trivial_matrix) {
    478         SkASSERT(!fDoFilter);
    479         fIntTileProcY = choose_int_tile_proc(fTileModeY);
    480         switch (fTileModeX) {
    481             case SkShader::kClamp_TileMode:
    482                 return clampx_nofilter_trans;
    483             case SkShader::kRepeat_TileMode:
    484                 return repeatx_nofilter_trans;
    485             case SkShader::kMirror_TileMode:
    486                 return mirrorx_nofilter_trans;
    487         }
    488     }
    489 
    490     int index = 0;
    491     if (fDoFilter) {
    492         index = 1;
    493     }
    494     if (fInvType & SkMatrix::kPerspective_Mask) {
    495         index += 4;
    496     } else if (fInvType & SkMatrix::kAffine_Mask) {
    497         index += 2;
    498     }
    499 
    500     if (SkShader::kClamp_TileMode == fTileModeX &&
    501         SkShader::kClamp_TileMode == fTileModeY)
    502     {
    503         // clamp gets special version of filterOne
    504         fFilterOneX = SK_Fixed1;
    505         fFilterOneY = SK_Fixed1;
    506         return ClampX_ClampY_Procs[index];
    507     }
    508 
    509     // all remaining procs use this form for filterOne
    510     fFilterOneX = SK_Fixed1 / fBitmap->width();
    511     fFilterOneY = SK_Fixed1 / fBitmap->height();
    512 
    513     if (SkShader::kRepeat_TileMode == fTileModeX &&
    514         SkShader::kRepeat_TileMode == fTileModeY)
    515     {
    516         return RepeatX_RepeatY_Procs[index];
    517     }
    518 
    519     fTileProcX = choose_tile_proc(fTileModeX);
    520     fTileProcY = choose_tile_proc(fTileModeY);
    521     return GeneralXY_Procs[index];
    522 }
    523 
    524