1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola 2 * 3 * Use of this source code is governed by a BSD-style license that can be 4 * found in the LICENSE file. 5 */ 6 7 #include "SkBitmapProcState.h" 8 #include "SkPerspIter.h" 9 #include "SkShader.h" 10 #include "SkUtils.h" 11 #include "SkUtilsArm.h" 12 #include "SkBitmapProcState_utils.h" 13 14 /* returns 0...(n-1) given any x (positive or negative). 15 16 As an example, if n (which is always positive) is 5... 17 18 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 19 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 20 */ 21 static inline int sk_int_mod(int x, int n) { 22 SkASSERT(n > 0); 23 if ((unsigned)x >= (unsigned)n) { 24 if (x < 0) { 25 x = n + ~(~x % n); 26 } else { 27 x = x % n; 28 } 29 } 30 return x; 31 } 32 33 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 34 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 35 36 #include "SkBitmapProcState_matrix_template.h" 37 38 /////////////////////////////////////////////////////////////////////////////// 39 40 // Compile neon code paths if needed 41 #if !SK_ARM_NEON_IS_NONE 42 43 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp 44 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; 45 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; 46 47 #endif // !SK_ARM_NEON_IS_NONE 48 49 // Compile non-neon code path if needed 50 #if !SK_ARM_NEON_IS_ALWAYS 51 #define MAKENAME(suffix) ClampX_ClampY ## suffix 52 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) 53 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) 54 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) 55 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) 56 #define CHECK_FOR_DECAL 57 #include "SkBitmapProcState_matrix.h" 58 59 struct ClampTileProcs { 60 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) { 61 return SkClampMax(fx >> 16, max); 62 } 63 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) { 64 return SkClampMax(fy >> 16, max); 65 } 66 }; 67 68 // Referenced in opts_check_x86.cpp 69 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[], 70 int count, int x, int y) { 71 return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y); 72 } 73 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[], 74 int count, int x, int y) { 75 return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y); 76 } 77 78 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = { 79 // only clamp lives in the right coord space to check for decal 80 ClampX_ClampY_nofilter_scale, 81 ClampX_ClampY_filter_scale, 82 ClampX_ClampY_nofilter_affine, 83 ClampX_ClampY_filter_affine, 84 NoFilterProc_Persp<ClampTileProcs>, 85 ClampX_ClampY_filter_persp 86 }; 87 88 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix 89 #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)) 90 #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)) 91 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 92 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 93 #include "SkBitmapProcState_matrix.h" 94 95 struct RepeatTileProcs { 96 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) { 97 return SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)); 98 } 99 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) { 100 return SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)); 101 } 102 }; 103 104 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = { 105 NoFilterProc_Scale<RepeatTileProcs, false>, 106 RepeatX_RepeatY_filter_scale, 107 NoFilterProc_Affine<RepeatTileProcs>, 108 RepeatX_RepeatY_filter_affine, 109 NoFilterProc_Persp<RepeatTileProcs>, 110 RepeatX_RepeatY_filter_persp 111 }; 112 #endif 113 114 #define MAKENAME(suffix) GeneralXY ## suffix 115 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ 116 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \ 117 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \ 118 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY 119 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX 120 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY 121 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX 122 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY 123 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) 124 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) 125 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1) 126 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1) 127 #include "SkBitmapProcState_matrix.h" 128 129 struct GeneralTileProcs { 130 static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) { 131 return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1)); 132 } 133 static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) { 134 return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1)); 135 } 136 }; 137 138 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = { 139 NoFilterProc_Scale<GeneralTileProcs, false>, 140 GeneralXY_filter_scale, 141 NoFilterProc_Affine<GeneralTileProcs>, 142 GeneralXY_filter_affine, 143 NoFilterProc_Persp<GeneralTileProcs>, 144 GeneralXY_filter_persp 145 }; 146 147 /////////////////////////////////////////////////////////////////////////////// 148 149 static inline U16CPU fixed_clamp(SkFixed x) { 150 if (x < 0) { 151 x = 0; 152 } 153 if (x >> 16) { 154 x = 0xFFFF; 155 } 156 return x; 157 } 158 159 static inline U16CPU fixed_repeat(SkFixed x) { 160 return x & 0xFFFF; 161 } 162 163 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly. 164 // See http://code.google.com/p/skia/issues/detail?id=472 165 #if defined(_MSC_VER) && (_MSC_VER >= 1600) 166 #pragma optimize("", off) 167 #endif 168 169 static inline U16CPU fixed_mirror(SkFixed x) { 170 SkFixed s = x << 15 >> 31; 171 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval 172 return (x ^ s) & 0xFFFF; 173 } 174 175 #if defined(_MSC_VER) && (_MSC_VER >= 1600) 176 #pragma optimize("", on) 177 #endif 178 179 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) { 180 if (SkShader::kClamp_TileMode == m) { 181 return fixed_clamp; 182 } 183 if (SkShader::kRepeat_TileMode == m) { 184 return fixed_repeat; 185 } 186 SkASSERT(SkShader::kMirror_TileMode == m); 187 return fixed_mirror; 188 } 189 190 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) { 191 return (x >> 12) & 0xF; 192 } 193 194 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) { 195 return ((x * scale) >> 12) & 0xF; 196 } 197 198 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) { 199 if (SkShader::kClamp_TileMode == m) { 200 return fixed_clamp_lowbits; 201 } else { 202 SkASSERT(SkShader::kMirror_TileMode == m || 203 SkShader::kRepeat_TileMode == m); 204 // mirror and repeat have the same behavior for the low bits. 205 return fixed_repeat_or_mirrow_lowbits; 206 } 207 } 208 209 static inline U16CPU int_clamp(int x, int n) { 210 if (x >= n) { 211 x = n - 1; 212 } 213 if (x < 0) { 214 x = 0; 215 } 216 return x; 217 } 218 219 static inline U16CPU int_repeat(int x, int n) { 220 return sk_int_mod(x, n); 221 } 222 223 static inline U16CPU int_mirror(int x, int n) { 224 x = sk_int_mod(x, 2 * n); 225 if (x >= n) { 226 x = n + ~(x - n); 227 } 228 return x; 229 } 230 231 #if 0 232 static void test_int_tileprocs() { 233 for (int i = -8; i <= 8; i++) { 234 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); 235 } 236 } 237 #endif 238 239 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { 240 if (SkShader::kClamp_TileMode == tm) 241 return int_clamp; 242 if (SkShader::kRepeat_TileMode == tm) 243 return int_repeat; 244 SkASSERT(SkShader::kMirror_TileMode == tm); 245 return int_mirror; 246 } 247 248 ////////////////////////////////////////////////////////////////////////////// 249 250 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { 251 int i; 252 253 for (i = (count >> 2); i > 0; --i) { 254 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 255 fx += dx+dx; 256 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 257 fx += dx+dx; 258 } 259 count &= 3; 260 261 uint16_t* xx = (uint16_t*)dst; 262 for (i = count; i > 0; --i) { 263 *xx++ = SkToU16(fx >> 16); fx += dx; 264 } 265 } 266 267 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { 268 if (count & 1) { 269 SkASSERT((fx >> (16 + 14)) == 0); 270 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 271 fx += dx; 272 } 273 while ((count -= 2) >= 0) { 274 SkASSERT((fx >> (16 + 14)) == 0); 275 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 276 fx += dx; 277 278 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 279 fx += dx; 280 } 281 } 282 283 /////////////////////////////////////////////////////////////////////////////// 284 // stores the same as SCALE, but is cheaper to compute. Also since there is no 285 // scale, we don't need/have a FILTER version 286 287 static void fill_sequential(uint16_t xptr[], int start, int count) { 288 #if 1 289 if (reinterpret_cast<intptr_t>(xptr) & 0x2) { 290 *xptr++ = start++; 291 count -= 1; 292 } 293 if (count > 3) { 294 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr); 295 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); 296 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); 297 start += count & ~3; 298 int qcount = count >> 2; 299 do { 300 *xxptr++ = pattern0; 301 pattern0 += 0x40004; 302 *xxptr++ = pattern1; 303 pattern1 += 0x40004; 304 } while (--qcount != 0); 305 xptr = reinterpret_cast<uint16_t*>(xxptr); 306 count &= 3; 307 } 308 while (--count >= 0) { 309 *xptr++ = start++; 310 } 311 #else 312 for (int i = 0; i < count; i++) { 313 *xptr++ = start++; 314 } 315 #endif 316 } 317 318 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, 319 int x, int y) { 320 SkPoint pt; 321 s.fInvProc(s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, 322 SkIntToScalar(y) + SK_ScalarHalf, &pt); 323 **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16, 324 s.fBitmap->height()); 325 *xy += 1; // bump the ptr 326 // return our starting X position 327 return SkScalarToFixed(pt.fX) >> 16; 328 } 329 330 static void clampx_nofilter_trans(const SkBitmapProcState& s, 331 uint32_t xy[], int count, int x, int y) { 332 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 333 334 int xpos = nofilter_trans_preamble(s, &xy, x, y); 335 const int width = s.fBitmap->width(); 336 if (1 == width) { 337 // all of the following X values must be 0 338 memset(xy, 0, count * sizeof(uint16_t)); 339 return; 340 } 341 342 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 343 int n; 344 345 // fill before 0 as needed 346 if (xpos < 0) { 347 n = -xpos; 348 if (n > count) { 349 n = count; 350 } 351 memset(xptr, 0, n * sizeof(uint16_t)); 352 count -= n; 353 if (0 == count) { 354 return; 355 } 356 xptr += n; 357 xpos = 0; 358 } 359 360 // fill in 0..width-1 if needed 361 if (xpos < width) { 362 n = width - xpos; 363 if (n > count) { 364 n = count; 365 } 366 fill_sequential(xptr, xpos, n); 367 count -= n; 368 if (0 == count) { 369 return; 370 } 371 xptr += n; 372 } 373 374 // fill the remaining with the max value 375 sk_memset16(xptr, width - 1, count); 376 } 377 378 static void repeatx_nofilter_trans(const SkBitmapProcState& s, 379 uint32_t xy[], int count, int x, int y) { 380 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 381 382 int xpos = nofilter_trans_preamble(s, &xy, x, y); 383 const int width = s.fBitmap->width(); 384 if (1 == width) { 385 // all of the following X values must be 0 386 memset(xy, 0, count * sizeof(uint16_t)); 387 return; 388 } 389 390 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 391 int start = sk_int_mod(xpos, width); 392 int n = width - start; 393 if (n > count) { 394 n = count; 395 } 396 fill_sequential(xptr, start, n); 397 xptr += n; 398 count -= n; 399 400 while (count >= width) { 401 fill_sequential(xptr, 0, width); 402 xptr += width; 403 count -= width; 404 } 405 406 if (count > 0) { 407 fill_sequential(xptr, 0, count); 408 } 409 } 410 411 static void fill_backwards(uint16_t xptr[], int pos, int count) { 412 for (int i = 0; i < count; i++) { 413 SkASSERT(pos >= 0); 414 xptr[i] = pos--; 415 } 416 } 417 418 static void mirrorx_nofilter_trans(const SkBitmapProcState& s, 419 uint32_t xy[], int count, int x, int y) { 420 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 421 422 int xpos = nofilter_trans_preamble(s, &xy, x, y); 423 const int width = s.fBitmap->width(); 424 if (1 == width) { 425 // all of the following X values must be 0 426 memset(xy, 0, count * sizeof(uint16_t)); 427 return; 428 } 429 430 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 431 // need to know our start, and our initial phase (forward or backward) 432 bool forward; 433 int n; 434 int start = sk_int_mod(xpos, 2 * width); 435 if (start >= width) { 436 start = width + ~(start - width); 437 forward = false; 438 n = start + 1; // [start .. 0] 439 } else { 440 forward = true; 441 n = width - start; // [start .. width) 442 } 443 if (n > count) { 444 n = count; 445 } 446 if (forward) { 447 fill_sequential(xptr, start, n); 448 } else { 449 fill_backwards(xptr, start, n); 450 } 451 forward = !forward; 452 xptr += n; 453 count -= n; 454 455 while (count >= width) { 456 if (forward) { 457 fill_sequential(xptr, 0, width); 458 } else { 459 fill_backwards(xptr, width - 1, width); 460 } 461 forward = !forward; 462 xptr += width; 463 count -= width; 464 } 465 466 if (count > 0) { 467 if (forward) { 468 fill_sequential(xptr, 0, count); 469 } else { 470 fill_backwards(xptr, width - 1, count); 471 } 472 } 473 } 474 475 /////////////////////////////////////////////////////////////////////////////// 476 477 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { 478 // test_int_tileprocs(); 479 // check for our special case when there is no scale/affine/perspective 480 if (trivial_matrix && kNone_SkFilterQuality == fFilterLevel) { 481 fIntTileProcY = choose_int_tile_proc(fTileModeY); 482 switch (fTileModeX) { 483 case SkShader::kClamp_TileMode: 484 return clampx_nofilter_trans; 485 case SkShader::kRepeat_TileMode: 486 return repeatx_nofilter_trans; 487 case SkShader::kMirror_TileMode: 488 return mirrorx_nofilter_trans; 489 } 490 } 491 492 int index = 0; 493 if (fFilterLevel != kNone_SkFilterQuality) { 494 index = 1; 495 } 496 if (fInvType & SkMatrix::kPerspective_Mask) { 497 index += 4; 498 } else if (fInvType & SkMatrix::kAffine_Mask) { 499 index += 2; 500 } 501 502 if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) { 503 // clamp gets special version of filterOne 504 fFilterOneX = SK_Fixed1; 505 fFilterOneY = SK_Fixed1; 506 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; 507 } 508 509 // all remaining procs use this form for filterOne 510 fFilterOneX = SK_Fixed1 / fBitmap->width(); 511 fFilterOneY = SK_Fixed1 / fBitmap->height(); 512 513 if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) { 514 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; 515 } 516 517 fTileProcX = choose_tile_proc(fTileModeX); 518 fTileProcY = choose_tile_proc(fTileModeY); 519 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX); 520 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY); 521 return GeneralXY_Procs[index]; 522 } 523