1 /* NEON optimized code (C) COPYRIGHT 2009 Motorola 2 * 3 * Use of this source code is governed by a BSD-style license that can be 4 * found in the LICENSE file. 5 */ 6 7 #include "SkBitmapProcState.h" 8 #include "SkPerspIter.h" 9 #include "SkShader.h" 10 #include "SkUtils.h" 11 #include "SkUtilsArm.h" 12 13 // Helper to ensure that when we shift down, we do it w/o sign-extension 14 // so the caller doesn't have to manually mask off the top 16 bits 15 // 16 static unsigned SK_USHIFT16(unsigned x) { 17 return x >> 16; 18 } 19 20 /* returns 0...(n-1) given any x (positive or negative). 21 22 As an example, if n (which is always positive) is 5... 23 24 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 25 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 26 */ 27 static inline int sk_int_mod(int x, int n) { 28 SkASSERT(n > 0); 29 if ((unsigned)x >= (unsigned)n) { 30 if (x < 0) { 31 x = n + ~(~x % n); 32 } else { 33 x = x % n; 34 } 35 } 36 return x; 37 } 38 39 /* 40 * The decal_ functions require that 41 * 1. dx > 0 42 * 2. [fx, fx+dx, fx+2dx, fx+3dx, ... fx+(count-1)dx] are all <= maxX 43 * 44 * In addition, we use SkFractionalInt to keep more fractional precision than 45 * just SkFixed, so we will abort the decal_ call if dx is very small, since 46 * the decal_ function just operates on SkFixed. If that were changed, we could 47 * skip the very_small test here. 48 */ 49 static inline bool can_truncate_to_fixed_for_decal(SkFractionalInt frX, 50 SkFractionalInt frDx, 51 int count, unsigned max) { 52 SkFixed dx = SkFractionalIntToFixed(frDx); 53 54 // if decal_ kept SkFractionalInt precision, this would just be dx <= 0 55 // I just made up the 1/256. Just don't want to perceive accumulated error 56 // if we truncate frDx and lose its low bits. 57 if (dx <= SK_Fixed1 / 256) { 58 return false; 59 } 60 61 // We cast to unsigned so we don't have to check for negative values, which 62 // will now appear as very large positive values, and thus fail our test! 63 SkFixed fx = SkFractionalIntToFixed(frX); 64 return (unsigned)SkFixedFloorToInt(fx) <= max && 65 (unsigned)SkFixedFloorToInt(fx + dx * (count - 1)) < max; 66 } 67 68 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 69 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 70 71 // Compile neon code paths if needed 72 #if !SK_ARM_NEON_IS_NONE 73 74 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp 75 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; 76 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; 77 78 #endif // !SK_ARM_NEON_IS_NONE 79 80 // Compile non-neon code path if needed 81 #if !SK_ARM_NEON_IS_ALWAYS 82 #define MAKENAME(suffix) ClampX_ClampY ## suffix 83 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) 84 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) 85 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) 86 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) 87 #define CHECK_FOR_DECAL 88 #include "SkBitmapProcState_matrix.h" 89 90 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix 91 #define TILEX_PROCF(fx, max) SK_USHIFT16(((fx) & 0xFFFF) * ((max) + 1)) 92 #define TILEY_PROCF(fy, max) SK_USHIFT16(((fy) & 0xFFFF) * ((max) + 1)) 93 #define TILEX_LOW_BITS(fx, max) ((((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 94 #define TILEY_LOW_BITS(fy, max) ((((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 95 #include "SkBitmapProcState_matrix.h" 96 #endif 97 98 #define MAKENAME(suffix) GeneralXY ## suffix 99 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ 100 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \ 101 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \ 102 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY 103 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX 104 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY 105 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX 106 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY 107 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) 108 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) 109 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1) 110 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1) 111 #include "SkBitmapProcState_matrix.h" 112 113 static inline U16CPU fixed_clamp(SkFixed x) 114 { 115 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR 116 if (x < 0) 117 x = 0; 118 if (x >> 16) 119 x = 0xFFFF; 120 #else 121 if (x >> 16) 122 { 123 #if 0 // is this faster? 124 x = (~x >> 31) & 0xFFFF; 125 #else 126 if (x < 0) 127 x = 0; 128 else 129 x = 0xFFFF; 130 #endif 131 } 132 #endif 133 return x; 134 } 135 136 static inline U16CPU fixed_repeat(SkFixed x) 137 { 138 return x & 0xFFFF; 139 } 140 141 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly. 142 // See http://code.google.com/p/skia/issues/detail?id=472 143 #if defined(_MSC_VER) && (_MSC_VER >= 1600) 144 #pragma optimize("", off) 145 #endif 146 147 static inline U16CPU fixed_mirror(SkFixed x) 148 { 149 SkFixed s = x << 15 >> 31; 150 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval 151 return (x ^ s) & 0xFFFF; 152 } 153 154 #if defined(_MSC_VER) && (_MSC_VER >= 1600) 155 #pragma optimize("", on) 156 #endif 157 158 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) 159 { 160 if (SkShader::kClamp_TileMode == m) 161 return fixed_clamp; 162 if (SkShader::kRepeat_TileMode == m) 163 return fixed_repeat; 164 SkASSERT(SkShader::kMirror_TileMode == m); 165 return fixed_mirror; 166 } 167 168 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) { 169 return (x >> 12) & 0xF; 170 } 171 172 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) { 173 return ((x * scale) >> 12) & 0xF; 174 } 175 176 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) { 177 if (SkShader::kClamp_TileMode == m) { 178 return fixed_clamp_lowbits; 179 } else { 180 SkASSERT(SkShader::kMirror_TileMode == m || 181 SkShader::kRepeat_TileMode == m); 182 // mirror and repeat have the same behavior for the low bits. 183 return fixed_repeat_or_mirrow_lowbits; 184 } 185 } 186 187 static inline U16CPU int_clamp(int x, int n) { 188 #ifdef SK_CPU_HAS_CONDITIONAL_INSTR 189 if (x >= n) 190 x = n - 1; 191 if (x < 0) 192 x = 0; 193 #else 194 if ((unsigned)x >= (unsigned)n) { 195 if (x < 0) { 196 x = 0; 197 } else { 198 x = n - 1; 199 } 200 } 201 #endif 202 return x; 203 } 204 205 static inline U16CPU int_repeat(int x, int n) { 206 return sk_int_mod(x, n); 207 } 208 209 static inline U16CPU int_mirror(int x, int n) { 210 x = sk_int_mod(x, 2 * n); 211 if (x >= n) { 212 x = n + ~(x - n); 213 } 214 return x; 215 } 216 217 #if 0 218 static void test_int_tileprocs() { 219 for (int i = -8; i <= 8; i++) { 220 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); 221 } 222 } 223 #endif 224 225 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { 226 if (SkShader::kClamp_TileMode == tm) 227 return int_clamp; 228 if (SkShader::kRepeat_TileMode == tm) 229 return int_repeat; 230 SkASSERT(SkShader::kMirror_TileMode == tm); 231 return int_mirror; 232 } 233 234 ////////////////////////////////////////////////////////////////////////////// 235 236 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) 237 { 238 int i; 239 240 for (i = (count >> 2); i > 0; --i) 241 { 242 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 243 fx += dx+dx; 244 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 245 fx += dx+dx; 246 } 247 count &= 3; 248 249 uint16_t* xx = (uint16_t*)dst; 250 for (i = count; i > 0; --i) { 251 *xx++ = SkToU16(fx >> 16); fx += dx; 252 } 253 } 254 255 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) 256 { 257 258 259 if (count & 1) 260 { 261 SkASSERT((fx >> (16 + 14)) == 0); 262 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 263 fx += dx; 264 } 265 while ((count -= 2) >= 0) 266 { 267 SkASSERT((fx >> (16 + 14)) == 0); 268 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 269 fx += dx; 270 271 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 272 fx += dx; 273 } 274 } 275 276 /////////////////////////////////////////////////////////////////////////////// 277 // stores the same as SCALE, but is cheaper to compute. Also since there is no 278 // scale, we don't need/have a FILTER version 279 280 static void fill_sequential(uint16_t xptr[], int start, int count) { 281 #if 1 282 if (reinterpret_cast<intptr_t>(xptr) & 0x2) { 283 *xptr++ = start++; 284 count -= 1; 285 } 286 if (count > 3) { 287 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr); 288 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); 289 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); 290 start += count & ~3; 291 int qcount = count >> 2; 292 do { 293 *xxptr++ = pattern0; 294 pattern0 += 0x40004; 295 *xxptr++ = pattern1; 296 pattern1 += 0x40004; 297 } while (--qcount != 0); 298 xptr = reinterpret_cast<uint16_t*>(xxptr); 299 count &= 3; 300 } 301 while (--count >= 0) { 302 *xptr++ = start++; 303 } 304 #else 305 for (int i = 0; i < count; i++) { 306 *xptr++ = start++; 307 } 308 #endif 309 } 310 311 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, 312 int x, int y) { 313 SkPoint pt; 314 s.fInvProc(*s.fInvMatrix, SkIntToScalar(x) + SK_ScalarHalf, 315 SkIntToScalar(y) + SK_ScalarHalf, &pt); 316 **xy = s.fIntTileProcY(SkScalarToFixed(pt.fY) >> 16, 317 s.fBitmap->height()); 318 *xy += 1; // bump the ptr 319 // return our starting X position 320 return SkScalarToFixed(pt.fX) >> 16; 321 } 322 323 static void clampx_nofilter_trans(const SkBitmapProcState& s, 324 uint32_t xy[], int count, int x, int y) { 325 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 326 327 int xpos = nofilter_trans_preamble(s, &xy, x, y); 328 const int width = s.fBitmap->width(); 329 if (1 == width) { 330 // all of the following X values must be 0 331 memset(xy, 0, count * sizeof(uint16_t)); 332 return; 333 } 334 335 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 336 int n; 337 338 // fill before 0 as needed 339 if (xpos < 0) { 340 n = -xpos; 341 if (n > count) { 342 n = count; 343 } 344 memset(xptr, 0, n * sizeof(uint16_t)); 345 count -= n; 346 if (0 == count) { 347 return; 348 } 349 xptr += n; 350 xpos = 0; 351 } 352 353 // fill in 0..width-1 if needed 354 if (xpos < width) { 355 n = width - xpos; 356 if (n > count) { 357 n = count; 358 } 359 fill_sequential(xptr, xpos, n); 360 count -= n; 361 if (0 == count) { 362 return; 363 } 364 xptr += n; 365 } 366 367 // fill the remaining with the max value 368 sk_memset16(xptr, width - 1, count); 369 } 370 371 static void repeatx_nofilter_trans(const SkBitmapProcState& s, 372 uint32_t xy[], int count, int x, int y) { 373 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 374 375 int xpos = nofilter_trans_preamble(s, &xy, x, y); 376 const int width = s.fBitmap->width(); 377 if (1 == width) { 378 // all of the following X values must be 0 379 memset(xy, 0, count * sizeof(uint16_t)); 380 return; 381 } 382 383 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 384 int start = sk_int_mod(xpos, width); 385 int n = width - start; 386 if (n > count) { 387 n = count; 388 } 389 fill_sequential(xptr, start, n); 390 xptr += n; 391 count -= n; 392 393 while (count >= width) { 394 fill_sequential(xptr, 0, width); 395 xptr += width; 396 count -= width; 397 } 398 399 if (count > 0) { 400 fill_sequential(xptr, 0, count); 401 } 402 } 403 404 static void fill_backwards(uint16_t xptr[], int pos, int count) { 405 for (int i = 0; i < count; i++) { 406 SkASSERT(pos >= 0); 407 xptr[i] = pos--; 408 } 409 } 410 411 static void mirrorx_nofilter_trans(const SkBitmapProcState& s, 412 uint32_t xy[], int count, int x, int y) { 413 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 414 415 int xpos = nofilter_trans_preamble(s, &xy, x, y); 416 const int width = s.fBitmap->width(); 417 if (1 == width) { 418 // all of the following X values must be 0 419 memset(xy, 0, count * sizeof(uint16_t)); 420 return; 421 } 422 423 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 424 // need to know our start, and our initial phase (forward or backward) 425 bool forward; 426 int n; 427 int start = sk_int_mod(xpos, 2 * width); 428 if (start >= width) { 429 start = width + ~(start - width); 430 forward = false; 431 n = start + 1; // [start .. 0] 432 } else { 433 forward = true; 434 n = width - start; // [start .. width) 435 } 436 if (n > count) { 437 n = count; 438 } 439 if (forward) { 440 fill_sequential(xptr, start, n); 441 } else { 442 fill_backwards(xptr, start, n); 443 } 444 forward = !forward; 445 xptr += n; 446 count -= n; 447 448 while (count >= width) { 449 if (forward) { 450 fill_sequential(xptr, 0, width); 451 } else { 452 fill_backwards(xptr, width - 1, width); 453 } 454 forward = !forward; 455 xptr += width; 456 count -= width; 457 } 458 459 if (count > 0) { 460 if (forward) { 461 fill_sequential(xptr, 0, count); 462 } else { 463 fill_backwards(xptr, width - 1, count); 464 } 465 } 466 } 467 468 /////////////////////////////////////////////////////////////////////////////// 469 470 SkBitmapProcState::MatrixProc 471 SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { 472 // test_int_tileprocs(); 473 // check for our special case when there is no scale/affine/perspective 474 if (trivial_matrix) { 475 SkASSERT(!fDoFilter); 476 fIntTileProcY = choose_int_tile_proc(fTileModeY); 477 switch (fTileModeX) { 478 case SkShader::kClamp_TileMode: 479 return clampx_nofilter_trans; 480 case SkShader::kRepeat_TileMode: 481 return repeatx_nofilter_trans; 482 case SkShader::kMirror_TileMode: 483 return mirrorx_nofilter_trans; 484 } 485 } 486 487 int index = 0; 488 if (fDoFilter) { 489 index = 1; 490 } 491 if (fInvType & SkMatrix::kPerspective_Mask) { 492 index += 4; 493 } else if (fInvType & SkMatrix::kAffine_Mask) { 494 index += 2; 495 } 496 497 if (SkShader::kClamp_TileMode == fTileModeX && 498 SkShader::kClamp_TileMode == fTileModeY) 499 { 500 // clamp gets special version of filterOne 501 fFilterOneX = SK_Fixed1; 502 fFilterOneY = SK_Fixed1; 503 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; 504 } 505 506 // all remaining procs use this form for filterOne 507 fFilterOneX = SK_Fixed1 / fBitmap->width(); 508 fFilterOneY = SK_Fixed1 / fBitmap->height(); 509 510 if (SkShader::kRepeat_TileMode == fTileModeX && 511 SkShader::kRepeat_TileMode == fTileModeY) 512 { 513 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; 514 } 515 516 fTileProcX = choose_tile_proc(fTileModeX); 517 fTileProcY = choose_tile_proc(fTileModeY); 518 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX); 519 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY); 520 return GeneralXY_Procs[index]; 521 } 522