1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola 2 * 3 * Use of this source code is governed by a BSD-style license that can be 4 * found in the LICENSE file. 5 */ 6 7 #include "SkBitmapProcState.h" 8 #include "SkPerspIter.h" 9 #include "SkShader.h" 10 #include "SkUtils.h" 11 #include "SkUtilsArm.h" 12 #include "SkBitmapProcState_utils.h" 13 14 /* returns 0...(n-1) given any x (positive or negative). 15 16 As an example, if n (which is always positive) is 5... 17 18 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 19 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 20 */ 21 static inline int sk_int_mod(int x, int n) { 22 SkASSERT(n > 0); 23 if ((unsigned)x >= (unsigned)n) { 24 if (x < 0) { 25 x = n + ~(~x % n); 26 } else { 27 x = x % n; 28 } 29 } 30 return x; 31 } 32 33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 35 36 // Compile neon code paths if needed 37 #if !SK_ARM_NEON_IS_NONE 38 39 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp 40 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; 41 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; 42 43 #endif // !SK_ARM_NEON_IS_NONE 44 45 // Compile non-neon code path if needed 46 #if !SK_ARM_NEON_IS_ALWAYS 47 #define MAKENAME(suffix) ClampX_ClampY ## suffix 48 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) 49 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) 50 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) 51 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) 52 #define CHECK_FOR_DECAL 53 #include "SkBitmapProcState_matrix.h" 54 55 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix 56 #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)) 57 #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)) 58 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 59 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 60 #include "SkBitmapProcState_matrix.h" 61 #endif 62 63 #define MAKENAME(suffix) GeneralXY ## suffix 64 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ 65 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \ 66 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \ 67 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY 68 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX 69 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY 70 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX 71 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY 72 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) 73 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) 74 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1) 75 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1) 76 #include "SkBitmapProcState_matrix.h" 77 78 static inline U16CPU fixed_clamp(SkFixed x) 79 { 80 if (x < 0) { 81 x = 0; 82 } 83 if (x >> 16) { 84 x = 0xFFFF; 85 } 86 return x; 87 } 88 89 static inline U16CPU fixed_repeat(SkFixed x) 90 { 91 return x & 0xFFFF; 92 } 93 94 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly. 95 // See http://code.google.com/p/skia/issues/detail?id=472 96 #if defined(_MSC_VER) && (_MSC_VER >= 1600) 97 #pragma optimize("", off) 98 #endif 99 100 static inline U16CPU fixed_mirror(SkFixed x) 101 { 102 SkFixed s = x << 15 >> 31; 103 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval 104 return (x ^ s) & 0xFFFF; 105 } 106 107 #if defined(_MSC_VER) && (_MSC_VER >= 1600) 108 #pragma optimize("", on) 109 #endif 110 111 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) 112 { 113 if (SkShader::kClamp_TileMode == m) 114 return fixed_clamp; 115 if (SkShader::kRepeat_TileMode == m) 116 return fixed_repeat; 117 SkASSERT(SkShader::kMirror_TileMode == m); 118 return fixed_mirror; 119 } 120 121 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) { 122 return (x >> 12) & 0xF; 123 } 124 125 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) { 126 return ((x * scale) >> 12) & 0xF; 127 } 128 129 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) { 130 if (SkShader::kClamp_TileMode == m) { 131 return fixed_clamp_lowbits; 132 } else { 133 SkASSERT(SkShader::kMirror_TileMode == m || 134 SkShader::kRepeat_TileMode == m); 135 // mirror and repeat have the same behavior for the low bits. 136 return fixed_repeat_or_mirrow_lowbits; 137 } 138 } 139 140 static inline U16CPU int_clamp(int x, int n) { 141 if (x >= n) { 142 x = n - 1; 143 } 144 if (x < 0) { 145 x = 0; 146 } 147 return x; 148 } 149 150 static inline U16CPU int_repeat(int x, int n) { 151 return sk_int_mod(x, n); 152 } 153 154 static inline U16CPU int_mirror(int x, int n) { 155 x = sk_int_mod(x, 2 * n); 156 if (x >= n) { 157 x = n + ~(x - n); 158 } 159 return x; 160 } 161 162 #if 0 163 static void test_int_tileprocs() { 164 for (int i = -8; i <= 8; i++) { 165 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); 166 } 167 } 168 #endif 169 170 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { 171 if (SkShader::kClamp_TileMode == tm) 172 return int_clamp; 173 if (SkShader::kRepeat_TileMode == tm) 174 return int_repeat; 175 SkASSERT(SkShader::kMirror_TileMode == tm); 176 return int_mirror; 177 } 178 179 ////////////////////////////////////////////////////////////////////////////// 180 181 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) 182 { 183 int i; 184 185 for (i = (count >> 2); i > 0; --i) 186 { 187 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 188 fx += dx+dx; 189 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 190 fx += dx+dx; 191 } 192 count &= 3; 193 194 uint16_t* xx = (uint16_t*)dst; 195 for (i = count; i > 0; --i) { 196 *xx++ = SkToU16(fx >> 16); fx += dx; 197 } 198 } 199 200 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) 201 { 202 203 204 if (count & 1) 205 { 206 SkASSERT((fx >> (16 + 14)) == 0); 207 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 208 fx += dx; 209 } 210 while ((count -= 2) >= 0) 211 { 212 SkASSERT((fx >> (16 + 14)) == 0); 213 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 214 fx += dx; 215 216 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 217 fx += dx; 218 } 219 } 220 221 /////////////////////////////////////////////////////////////////////////////// 222 // stores the same as SCALE, but is cheaper to compute. Also since there is no 223 // scale, we don't need/have a FILTER version 224 225 static void fill_sequential(uint16_t xptr[], int start, int count) { 226 #if 1 227 if (reinterpret_cast<intptr_t>(xptr) & 0x2) { 228 *xptr++ = start++; 229 count -= 1; 230 } 231 if (count > 3) { 232 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr); 233 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); 234 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); 235 start += count & ~3; 236 int qcount = count >> 2; 237 do { 238 *xxptr++ = pattern0; 239 pattern0 += 0x40004; 240 *xxptr++ = pattern1; 241 pattern1 += 0x40004; 242 } while (--qcount != 0); 243 xptr = reinterpret_cast<uint16_t*>(xxptr); 244 count &= 3; 245 } 246 while (--count >= 0) { 247 *xptr++ = start++; 248 } 249 #else 250 for (int i = 0; i < count; i++) { 251 *xptr++ = start++; 252 } 253 #endif 254 } 255 256 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, 257 int x, int y) { 258 SkPoint pt; 259 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, 260 SkIntToScalar(y) + SK_ScalarHalf, &pt); 261 **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16, 262 s.fBitmap->height()); 263 *xy += 1; // bump the ptr 264 // return our starting X position 265 return SkScalarToFixed(pt.fX) >> 16; 266 } 267 268 static void clampx_nofilter_trans(const SkBitmapProcState& s, 269 uint32_t xy[], int count, int x, int y) { 270 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 271 272 int xpos = nofilter_trans_preamble(s, &xy, x, y); 273 const int width = s.fBitmap->width(); 274 if (1 == width) { 275 // all of the following X values must be 0 276 memset(xy, 0, count * sizeof(uint16_t)); 277 return; 278 } 279 280 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 281 int n; 282 283 // fill before 0 as needed 284 if (xpos < 0) { 285 n = -xpos; 286 if (n > count) { 287 n = count; 288 } 289 memset(xptr, 0, n * sizeof(uint16_t)); 290 count -= n; 291 if (0 == count) { 292 return; 293 } 294 xptr += n; 295 xpos = 0; 296 } 297 298 // fill in 0..width-1 if needed 299 if (xpos < width) { 300 n = width - xpos; 301 if (n > count) { 302 n = count; 303 } 304 fill_sequential(xptr, xpos, n); 305 count -= n; 306 if (0 == count) { 307 return; 308 } 309 xptr += n; 310 } 311 312 // fill the remaining with the max value 313 sk_memset16(xptr, width - 1, count); 314 } 315 316 static void repeatx_nofilter_trans(const SkBitmapProcState& s, 317 uint32_t xy[], int count, int x, int y) { 318 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 319 320 int xpos = nofilter_trans_preamble(s, &xy, x, y); 321 const int width = s.fBitmap->width(); 322 if (1 == width) { 323 // all of the following X values must be 0 324 memset(xy, 0, count * sizeof(uint16_t)); 325 return; 326 } 327 328 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 329 int start = sk_int_mod(xpos, width); 330 int n = width - start; 331 if (n > count) { 332 n = count; 333 } 334 fill_sequential(xptr, start, n); 335 xptr += n; 336 count -= n; 337 338 while (count >= width) { 339 fill_sequential(xptr, 0, width); 340 xptr += width; 341 count -= width; 342 } 343 344 if (count > 0) { 345 fill_sequential(xptr, 0, count); 346 } 347 } 348 349 static void fill_backwards(uint16_t xptr[], int pos, int count) { 350 for (int i = 0; i < count; i++) { 351 SkASSERT(pos >= 0); 352 xptr[i] = pos--; 353 } 354 } 355 356 static void mirrorx_nofilter_trans(const SkBitmapProcState& s, 357 uint32_t xy[], int count, int x, int y) { 358 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 359 360 int xpos = nofilter_trans_preamble(s, &xy, x, y); 361 const int width = s.fBitmap->width(); 362 if (1 == width) { 363 // all of the following X values must be 0 364 memset(xy, 0, count * sizeof(uint16_t)); 365 return; 366 } 367 368 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 369 // need to know our start, and our initial phase (forward or backward) 370 bool forward; 371 int n; 372 int start = sk_int_mod(xpos, 2 * width); 373 if (start >= width) { 374 start = width + ~(start - width); 375 forward = false; 376 n = start + 1; // [start .. 0] 377 } else { 378 forward = true; 379 n = width - start; // [start .. width) 380 } 381 if (n > count) { 382 n = count; 383 } 384 if (forward) { 385 fill_sequential(xptr, start, n); 386 } else { 387 fill_backwards(xptr, start, n); 388 } 389 forward = !forward; 390 xptr += n; 391 count -= n; 392 393 while (count >= width) { 394 if (forward) { 395 fill_sequential(xptr, 0, width); 396 } else { 397 fill_backwards(xptr, width - 1, width); 398 } 399 forward = !forward; 400 xptr += width; 401 count -= width; 402 } 403 404 if (count > 0) { 405 if (forward) { 406 fill_sequential(xptr, 0, count); 407 } else { 408 fill_backwards(xptr, width - 1, count); 409 } 410 } 411 } 412 413 /////////////////////////////////////////////////////////////////////////////// 414 415 SkBitmapProcState::MatrixProc 416 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { 417 // test_int_tileprocs(); 418 // check for our special case when there is no scale/affine/perspective 419 if (trivial_matrix) { 420 SkASSERT(SkPaint::kNone_FilterLevel == fFilterLevel); 421 fIntTileProcY = choose_int_tile_proc(fTileModeY); 422 switch (fTileModeX) { 423 case SkShader::kClamp_TileMode: 424 return clampx_nofilter_trans; 425 case SkShader::kRepeat_TileMode: 426 return repeatx_nofilter_trans; 427 case SkShader::kMirror_TileMode: 428 return mirrorx_nofilter_trans; 429 } 430 } 431 432 int index = 0; 433 if (fFilterLevel != SkPaint::kNone_FilterLevel) { 434 index = 1; 435 } 436 if (fInvType & SkMatrix::kPerspective_Mask) { 437 index += 4; 438 } else if (fInvType & SkMatrix::kAffine_Mask) { 439 index += 2; 440 } 441 442 if (SkShader::kClamp_TileMode == fTileModeX && 443 SkShader::kClamp_TileMode == fTileModeY) 444 { 445 // clamp gets special version of filterOne 446 fFilterOneX = SK_Fixed1; 447 fFilterOneY = SK_Fixed1; 448 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; 449 } 450 451 // all remaining procs use this form for filterOne 452 fFilterOneX = SK_Fixed1 / fBitmap->width(); 453 fFilterOneY = SK_Fixed1 / fBitmap->height(); 454 455 if (SkShader::kRepeat_TileMode == fTileModeX && 456 SkShader::kRepeat_TileMode == fTileModeY) 457 { 458 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; 459 } 460 461 fTileProcX = choose_tile_proc(fTileModeX); 462 fTileProcY = choose_tile_proc(fTileModeY); 463 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX); 464 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY); 465 return GeneralXY_Procs[index]; 466 } 467