Home | History | Annotate | Download | only in core
      1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola
      2  *
      3  * Use of this source code is governed by a BSD-style license that can be
      4  * found in the LICENSE file.
      5  */
      6 
      7 #include "SkBitmapProcState.h"
      8 #include "SkPerspIter.h"
      9 #include "SkShader.h"
     10 #include "SkUtils.h"
     11 
     12 /*  returns 0...(n-1) given any x (positive or negative).
     13 
     14     As an example, if n (which is always positive) is 5...
     15 
     16           x: -8 -7 -6 -5 -4 -3 -2 -1  0  1  2  3  4  5  6  7  8
     17     returns:  2  3  4  0  1  2  3  4  0  1  2  3  4  0  1  2  3
     18  */
     19 static inline int sk_int_mod(int x, int n) {
     20     SkASSERT(n > 0);
     21     if ((unsigned)x >= (unsigned)n) {
     22         if (x < 0) {
     23             x = n + ~(~x % n);
     24         } else {
     25             x = x % n;
     26         }
     27     }
     28     return x;
     29 }
     30 
     31 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     32 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count);
     33 
     34 #define MAKENAME(suffix)        ClampX_ClampY ## suffix
     35 #define TILEX_PROCF(fx, max)    SkClampMax((fx) >> 16, max)
     36 #define TILEY_PROCF(fy, max)    SkClampMax((fy) >> 16, max)
     37 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF)
     38 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF)
     39 #define CHECK_FOR_DECAL
     40 #if	defined(__ARM_HAVE_NEON)
     41     #include "SkBitmapProcState_matrix_clamp.h"
     42 #else
     43     #include "SkBitmapProcState_matrix.h"
     44 #endif
     45 
     46 #define MAKENAME(suffix)        RepeatX_RepeatY ## suffix
     47 #define TILEX_PROCF(fx, max)    (((fx) & 0xFFFF) * ((max) + 1) >> 16)
     48 #define TILEY_PROCF(fy, max)    (((fy) & 0xFFFF) * ((max) + 1) >> 16)
     49 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     50 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF)
     51 #if	defined(__ARM_HAVE_NEON)
     52     #include "SkBitmapProcState_matrix_repeat.h"
     53 #else
     54     #include "SkBitmapProcState_matrix.h"
     55 #endif
     56 
     57 #define MAKENAME(suffix)        GeneralXY ## suffix
     58 #define PREAMBLE(state)         SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; \
     59                                 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY
     60 #define PREAMBLE_PARAM_X        , SkBitmapProcState::FixedTileProc tileProcX
     61 #define PREAMBLE_PARAM_Y        , SkBitmapProcState::FixedTileProc tileProcY
     62 #define PREAMBLE_ARG_X          , tileProcX
     63 #define PREAMBLE_ARG_Y          , tileProcY
     64 #define TILEX_PROCF(fx, max)    (tileProcX(fx) * ((max) + 1) >> 16)
     65 #define TILEY_PROCF(fy, max)    (tileProcY(fy) * ((max) + 1) >> 16)
     66 #define TILEX_LOW_BITS(fx, max) ((tileProcX(fx) * ((max) + 1) >> 12) & 0xF)
     67 #define TILEY_LOW_BITS(fy, max) ((tileProcY(fy) * ((max) + 1) >> 12) & 0xF)
     68 #include "SkBitmapProcState_matrix.h"
     69 
     70 static inline U16CPU fixed_clamp(SkFixed x)
     71 {
     72 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
     73     if (x >> 16)
     74         x = 0xFFFF;
     75     if (x < 0)
     76         x = 0;
     77 #else
     78     if (x >> 16)
     79     {
     80         if (x < 0)
     81             x = 0;
     82         else
     83             x = 0xFFFF;
     84     }
     85 #endif
     86     return x;
     87 }
     88 
     89 static inline U16CPU fixed_repeat(SkFixed x)
     90 {
     91     return x & 0xFFFF;
     92 }
     93 
     94 static inline U16CPU fixed_mirror(SkFixed x)
     95 {
     96     SkFixed s = x << 15 >> 31;
     97     // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval
     98     return (x ^ s) & 0xFFFF;
     99 }
    100 
    101 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m)
    102 {
    103     if (SkShader::kClamp_TileMode == m)
    104         return fixed_clamp;
    105     if (SkShader::kRepeat_TileMode == m)
    106         return fixed_repeat;
    107     SkASSERT(SkShader::kMirror_TileMode == m);
    108     return fixed_mirror;
    109 }
    110 
    111 static inline U16CPU int_clamp(int x, int n) {
    112 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR
    113     if (x >= n)
    114         x = n - 1;
    115     if (x < 0)
    116         x = 0;
    117 #else
    118     if ((unsigned)x >= (unsigned)n) {
    119         if (x < 0) {
    120             x = 0;
    121         } else {
    122             x = n - 1;
    123         }
    124     }
    125 #endif
    126     return x;
    127 }
    128 
    129 static inline U16CPU int_repeat(int x, int n) {
    130     return sk_int_mod(x, n);
    131 }
    132 
    133 static inline U16CPU int_mirror(int x, int n) {
    134     x = sk_int_mod(x, 2 * n);
    135     if (x >= n) {
    136         x = n + ~(x - n);
    137     }
    138     return x;
    139 }
    140 
    141 #if 0
    142 static void test_int_tileprocs() {
    143     for (int i = -8; i <= 8; i++) {
    144         SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3));
    145     }
    146 }
    147 #endif
    148 
    149 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) {
    150     if (SkShader::kClamp_TileMode == tm)
    151         return int_clamp;
    152     if (SkShader::kRepeat_TileMode == tm)
    153         return int_repeat;
    154     SkASSERT(SkShader::kMirror_TileMode == tm);
    155     return int_mirror;
    156 }
    157 
    158 //////////////////////////////////////////////////////////////////////////////
    159 
    160 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
    161 {
    162     int i;
    163 
    164 #if	defined(__ARM_HAVE_NEON)
    165     if (count >= 8) {
    166         /* SkFixed is 16.16 fixed point */
    167         SkFixed dx2 = dx+dx;
    168         SkFixed dx4 = dx2+dx2;
    169         SkFixed dx8 = dx4+dx4;
    170 
    171         /* now build fx/fx+dx/fx+2dx/fx+3dx */
    172         SkFixed fx1, fx2, fx3;
    173         int32x2_t lower, upper;
    174         int32x4_t lbase, hbase;
    175         uint16_t *dst16 = (uint16_t *)dst;
    176 
    177         fx1 = fx+dx;
    178         fx2 = fx1+dx;
    179         fx3 = fx2+dx;
    180 
    181         /* avoid an 'lbase unitialized' warning */
    182         lbase = vdupq_n_s32(fx);
    183         lbase = vsetq_lane_s32(fx1, lbase, 1);
    184         lbase = vsetq_lane_s32(fx2, lbase, 2);
    185         lbase = vsetq_lane_s32(fx3, lbase, 3);
    186         hbase = vaddq_s32(lbase, vdupq_n_s32(dx4));
    187 
    188         /* take upper 16 of each, store, and bump everything */
    189         do {
    190             int32x4_t lout, hout;
    191             uint16x8_t hi16;
    192 
    193             lout = lbase;
    194             hout = hbase;
    195             /* gets hi's of all louts then hi's of all houts */
    196             asm ("vuzpq.16 %q0, %q1" : "+w" (lout), "+w" (hout));
    197             hi16 = vreinterpretq_u16_s32(hout);
    198             vst1q_u16(dst16, hi16);
    199 
    200             /* on to the next */
    201             lbase = vaddq_s32 (lbase, vdupq_n_s32(dx8));
    202             hbase = vaddq_s32 (hbase, vdupq_n_s32(dx8));
    203             dst16 += 8;
    204             count -= 8;
    205             fx += dx8;
    206         } while (count >= 8);
    207         dst = (uint32_t *) dst16;
    208     }
    209 #else
    210     for (i = (count >> 2); i > 0; --i)
    211     {
    212         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    213         fx += dx+dx;
    214         *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16);
    215         fx += dx+dx;
    216     }
    217     count &= 3;
    218 #endif
    219 
    220     uint16_t* xx = (uint16_t*)dst;
    221     for (i = count; i > 0; --i) {
    222         *xx++ = SkToU16(fx >> 16); fx += dx;
    223     }
    224 }
    225 
    226 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count)
    227 {
    228 
    229 #if	defined(__ARM_HAVE_NEON)
    230     if (count >= 8) {
    231         int32x4_t wide_fx;
    232         int32x4_t wide_fx2;
    233         int32x4_t wide_dx8 = vdupq_n_s32(dx*8);
    234 
    235         wide_fx = vdupq_n_s32(fx);
    236         wide_fx = vsetq_lane_s32(fx+dx, wide_fx, 1);
    237         wide_fx = vsetq_lane_s32(fx+dx+dx, wide_fx, 2);
    238         wide_fx = vsetq_lane_s32(fx+dx+dx+dx, wide_fx, 3);
    239 
    240         wide_fx2 = vaddq_s32(wide_fx, vdupq_n_s32(dx+dx+dx+dx));
    241 
    242         while (count >= 8) {
    243             int32x4_t wide_out;
    244             int32x4_t wide_out2;
    245 
    246             wide_out = vshlq_n_s32(vshrq_n_s32(wide_fx, 12), 14);
    247             wide_out = vorrq_s32(wide_out,
    248             vaddq_s32(vshrq_n_s32(wide_fx,16), vdupq_n_s32(1)));
    249 
    250             wide_out2 = vshlq_n_s32(vshrq_n_s32(wide_fx2, 12), 14);
    251             wide_out2 = vorrq_s32(wide_out2,
    252             vaddq_s32(vshrq_n_s32(wide_fx2,16), vdupq_n_s32(1)));
    253 
    254             vst1q_u32(dst, vreinterpretq_u32_s32(wide_out));
    255             vst1q_u32(dst+4, vreinterpretq_u32_s32(wide_out2));
    256 
    257             dst += 8;
    258             fx += dx*8;
    259             wide_fx = vaddq_s32(wide_fx, wide_dx8);
    260             wide_fx2 = vaddq_s32(wide_fx2, wide_dx8);
    261             count -= 8;
    262         }
    263     }
    264 #endif
    265 
    266     if (count & 1)
    267     {
    268         SkASSERT((fx >> (16 + 14)) == 0);
    269         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    270         fx += dx;
    271     }
    272     while ((count -= 2) >= 0)
    273     {
    274         SkASSERT((fx >> (16 + 14)) == 0);
    275         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    276         fx += dx;
    277 
    278         *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1);
    279         fx += dx;
    280     }
    281 }
    282 
    283 ///////////////////////////////////////////////////////////////////////////////
    284 // stores the same as SCALE, but is cheaper to compute. Also since there is no
    285 // scale, we don't need/have a FILTER version
    286 
    287 static void fill_sequential(uint16_t xptr[], int start, int count) {
    288 #if 1
    289     if (reinterpret_cast<intptr_t>(xptr) & 0x2) {
    290         *xptr++ = start++;
    291         count -= 1;
    292     }
    293     if (count > 3) {
    294         uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr);
    295         uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1);
    296         uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3);
    297         start += count & ~3;
    298         int qcount = count >> 2;
    299         do {
    300             *xxptr++ = pattern0;
    301             pattern0 += 0x40004;
    302             *xxptr++ = pattern1;
    303             pattern1 += 0x40004;
    304         } while (--qcount != 0);
    305         xptr = reinterpret_cast<uint16_t*>(xxptr);
    306         count &= 3;
    307     }
    308     while (--count >= 0) {
    309         *xptr++ = start++;
    310     }
    311 #else
    312     for (int i = 0; i < count; i++) {
    313         *xptr++ = start++;
    314     }
    315 #endif
    316 }
    317 
    318 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy,
    319                                    int x, int y) {
    320     SkPoint pt;
    321     s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf,
    322                SkIntToScalar(y) + SK_ScalarHalf, &pt);
    323     **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16,
    324                            s.fBitmap->height());
    325     *xy += 1;   // bump the ptr
    326     // return our starting X position
    327     return SkScalarToFixed(pt.fX) >> 16;
    328 }
    329 
    330 static void clampx_nofilter_trans(const SkBitmapProcState& s,
    331                                   uint32_t xy[], int count, int x, int y) {
    332     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    333 
    334     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    335     const int width = s.fBitmap->width();
    336     if (1 == width) {
    337         // all of the following X values must be 0
    338         memset(xy, 0, count * sizeof(uint16_t));
    339         return;
    340     }
    341 
    342     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    343     int n;
    344 
    345     // fill before 0 as needed
    346     if (xpos < 0) {
    347         n = -xpos;
    348         if (n > count) {
    349             n = count;
    350         }
    351         memset(xptr, 0, n * sizeof(uint16_t));
    352         count -= n;
    353         if (0 == count) {
    354             return;
    355         }
    356         xptr += n;
    357         xpos = 0;
    358     }
    359 
    360     // fill in 0..width-1 if needed
    361     if (xpos < width) {
    362         n = width - xpos;
    363         if (n > count) {
    364             n = count;
    365         }
    366         fill_sequential(xptr, xpos, n);
    367         count -= n;
    368         if (0 == count) {
    369             return;
    370         }
    371         xptr += n;
    372     }
    373 
    374     // fill the remaining with the max value
    375     sk_memset16(xptr, width - 1, count);
    376 }
    377 
    378 static void repeatx_nofilter_trans(const SkBitmapProcState& s,
    379                                    uint32_t xy[], int count, int x, int y) {
    380     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    381 
    382     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    383     const int width = s.fBitmap->width();
    384     if (1 == width) {
    385         // all of the following X values must be 0
    386         memset(xy, 0, count * sizeof(uint16_t));
    387         return;
    388     }
    389 
    390     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    391     int start = sk_int_mod(xpos, width);
    392     int n = width - start;
    393     if (n > count) {
    394         n = count;
    395     }
    396     fill_sequential(xptr, start, n);
    397     xptr += n;
    398     count -= n;
    399 
    400     while (count >= width) {
    401         fill_sequential(xptr, 0, width);
    402         xptr += width;
    403         count -= width;
    404     }
    405 
    406     if (count > 0) {
    407         fill_sequential(xptr, 0, count);
    408     }
    409 }
    410 
    411 static void fill_backwards(uint16_t xptr[], int pos, int count) {
    412     for (int i = 0; i < count; i++) {
    413         SkASSERT(pos >= 0);
    414         xptr[i] = pos--;
    415     }
    416 }
    417 
    418 static void mirrorx_nofilter_trans(const SkBitmapProcState& s,
    419                                    uint32_t xy[], int count, int x, int y) {
    420     SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0);
    421 
    422     int xpos = nofilter_trans_preamble(s, &xy, x, y);
    423     const int width = s.fBitmap->width();
    424     if (1 == width) {
    425         // all of the following X values must be 0
    426         memset(xy, 0, count * sizeof(uint16_t));
    427         return;
    428     }
    429 
    430     uint16_t* xptr = reinterpret_cast<uint16_t*>(xy);
    431     // need to know our start, and our initial phase (forward or backward)
    432     bool forward;
    433     int n;
    434     int start = sk_int_mod(xpos, 2 * width);
    435     if (start >= width) {
    436         start = width + ~(start - width);
    437         forward = false;
    438         n = start + 1;  // [start .. 0]
    439     } else {
    440         forward = true;
    441         n = width - start;  // [start .. width)
    442     }
    443     if (n > count) {
    444         n = count;
    445     }
    446     if (forward) {
    447         fill_sequential(xptr, start, n);
    448     } else {
    449         fill_backwards(xptr, start, n);
    450     }
    451     forward = !forward;
    452     xptr += n;
    453     count -= n;
    454 
    455     while (count >= width) {
    456         if (forward) {
    457             fill_sequential(xptr, 0, width);
    458         } else {
    459             fill_backwards(xptr, width - 1, width);
    460         }
    461         forward = !forward;
    462         xptr += width;
    463         count -= width;
    464     }
    465 
    466     if (count > 0) {
    467         if (forward) {
    468             fill_sequential(xptr, 0, count);
    469         } else {
    470             fill_backwards(xptr, width - 1, count);
    471         }
    472     }
    473 }
    474 
    475 ///////////////////////////////////////////////////////////////////////////////
    476 
    477 SkBitmapProcState::MatrixProc
    478 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) {
    479 //    test_int_tileprocs();
    480     // check for our special case when there is no scale/affine/perspective
    481     if (trivial_matrix) {
    482         SkASSERT(!fDoFilter);
    483         fIntTileProcY = choose_int_tile_proc(fTileModeY);
    484         switch (fTileModeX) {
    485             case SkShader::kClamp_TileMode:
    486                 return clampx_nofilter_trans;
    487             case SkShader::kRepeat_TileMode:
    488                 return repeatx_nofilter_trans;
    489             case SkShader::kMirror_TileMode:
    490                 return mirrorx_nofilter_trans;
    491         }
    492     }
    493 
    494     int index = 0;
    495     if (fDoFilter) {
    496         index = 1;
    497     }
    498     if (fInvType & SkMatrix::kPerspective_Mask) {
    499         index += 4;
    500     } else if (fInvType & SkMatrix::kAffine_Mask) {
    501         index += 2;
    502     }
    503 
    504     if (SkShader::kClamp_TileMode == fTileModeX &&
    505         SkShader::kClamp_TileMode == fTileModeY)
    506     {
    507         // clamp gets special version of filterOne
    508         fFilterOneX = SK_Fixed1;
    509         fFilterOneY = SK_Fixed1;
    510         return ClampX_ClampY_Procs[index];
    511     }
    512 
    513     // all remaining procs use this form for filterOne
    514     fFilterOneX = SK_Fixed1 / fBitmap->width();
    515     fFilterOneY = SK_Fixed1 / fBitmap->height();
    516 
    517     if (SkShader::kRepeat_TileMode == fTileModeX &&
    518         SkShader::kRepeat_TileMode == fTileModeY)
    519     {
    520         return RepeatX_RepeatY_Procs[index];
    521     }
    522 
    523     fTileProcX = choose_tile_proc(fTileModeX);
    524     fTileProcY = choose_tile_proc(fTileModeY);
    525     return GeneralXY_Procs[index];
    526 }
    527 
    528