1 /* 2 * Copyright 2008 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 // The copyright below was added in 2009, but I see no record of moto contributions...? 9 10 /* NEON optimized code (C) COPYRIGHT 2009 Motorola 11 * 12 * Use of this source code is governed by a BSD-style license that can be 13 * found in the LICENSE file. 14 */ 15 16 #include "SkBitmapProcState.h" 17 #include "SkPerspIter.h" 18 #include "SkShader.h" 19 #include "SkUtils.h" 20 #include "SkUtilsArm.h" 21 #include "SkBitmapProcState_utils.h" 22 23 /* returns 0...(n-1) given any x (positive or negative). 24 25 As an example, if n (which is always positive) is 5... 26 27 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 28 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 29 */ 30 static inline int sk_int_mod(int x, int n) { 31 SkASSERT(n > 0); 32 if ((unsigned)x >= (unsigned)n) { 33 if (x < 0) { 34 x = n + ~(~x % n); 35 } else { 36 x = x % n; 37 } 38 } 39 return x; 40 } 41 42 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 43 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 44 45 #include "SkBitmapProcState_matrix_template.h" 46 47 /////////////////////////////////////////////////////////////////////////////// 48 49 // Compile neon code paths if needed 50 #if !SK_ARM_NEON_IS_NONE 51 52 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp 53 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; 54 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; 55 56 #endif // !SK_ARM_NEON_IS_NONE 57 58 // Compile non-neon code path if needed 59 #if !SK_ARM_NEON_IS_ALWAYS 60 #define MAKENAME(suffix) ClampX_ClampY ## suffix 61 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) 62 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) 63 #define TILEX_LOW_BITS(fx, max) (((fx) >> 12) & 0xF) 64 #define TILEY_LOW_BITS(fy, max) (((fy) >> 12) & 0xF) 65 #define CHECK_FOR_DECAL 66 #include "SkBitmapProcState_matrix.h" 67 68 struct ClampTileProcs { 69 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) { 70 return SkClampMax(fx >> 16, max); 71 } 72 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) { 73 return SkClampMax(fy >> 16, max); 74 } 75 }; 76 77 // Referenced in opts_check_x86.cpp 78 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[], 79 int count, int x, int y) { 80 return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y); 81 } 82 void ClampX_ClampY_nofilter_affine(const SkBitmapProcState& s, uint32_t xy[], 83 int count, int x, int y) { 84 return NoFilterProc_Affine<ClampTileProcs>(s, xy, count, x, y); 85 } 86 87 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = { 88 // only clamp lives in the right coord space to check for decal 89 ClampX_ClampY_nofilter_scale, 90 ClampX_ClampY_filter_scale, 91 ClampX_ClampY_nofilter_affine, 92 ClampX_ClampY_filter_affine, 93 NoFilterProc_Persp<ClampTileProcs>, 94 ClampX_ClampY_filter_persp 95 }; 96 97 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix 98 #define TILEX_PROCF(fx, max) SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1)) 99 #define TILEY_PROCF(fy, max) SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1)) 100 #define TILEX_LOW_BITS(fx, max) (((unsigned)((fx) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 101 #define TILEY_LOW_BITS(fy, max) (((unsigned)((fy) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 102 #include "SkBitmapProcState_matrix.h" 103 104 struct RepeatTileProcs { 105 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) { 106 SkASSERT(max < 65535); 107 return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1)); 108 } 109 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) { 110 SkASSERT(max < 65535); 111 return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1)); 112 } 113 }; 114 115 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = { 116 NoFilterProc_Scale<RepeatTileProcs, false>, 117 RepeatX_RepeatY_filter_scale, 118 NoFilterProc_Affine<RepeatTileProcs>, 119 RepeatX_RepeatY_filter_affine, 120 NoFilterProc_Persp<RepeatTileProcs>, 121 RepeatX_RepeatY_filter_persp 122 }; 123 #endif 124 125 #define MAKENAME(suffix) GeneralXY ## suffix 126 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ 127 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; \ 128 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX = (state).fTileLowBitsProcX; (void) tileLowBitsProcX; \ 129 SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY = (state).fTileLowBitsProcY; (void) tileLowBitsProcY 130 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcX 131 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY, SkBitmapProcState::FixedTileLowBitsProc tileLowBitsProcY 132 #define PREAMBLE_ARG_X , tileProcX, tileLowBitsProcX 133 #define PREAMBLE_ARG_Y , tileProcY, tileLowBitsProcY 134 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) 135 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) 136 #define TILEX_LOW_BITS(fx, max) tileLowBitsProcX(fx, (max) + 1) 137 #define TILEY_LOW_BITS(fy, max) tileLowBitsProcY(fy, (max) + 1) 138 #include "SkBitmapProcState_matrix.h" 139 140 struct GeneralTileProcs { 141 static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) { 142 return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1)); 143 } 144 static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) { 145 return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1)); 146 } 147 }; 148 149 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = { 150 NoFilterProc_Scale<GeneralTileProcs, false>, 151 GeneralXY_filter_scale, 152 NoFilterProc_Affine<GeneralTileProcs>, 153 GeneralXY_filter_affine, 154 NoFilterProc_Persp<GeneralTileProcs>, 155 GeneralXY_filter_persp 156 }; 157 158 /////////////////////////////////////////////////////////////////////////////// 159 160 static inline U16CPU fixed_clamp(SkFixed x) { 161 if (x < 0) { 162 x = 0; 163 } 164 if (x >> 16) { 165 x = 0xFFFF; 166 } 167 return x; 168 } 169 170 static inline U16CPU fixed_repeat(SkFixed x) { 171 return x & 0xFFFF; 172 } 173 174 // Visual Studio 2010 (MSC_VER=1600) optimizes bit-shift code incorrectly. 175 // See http://code.google.com/p/skia/issues/detail?id=472 176 #if defined(_MSC_VER) && (_MSC_VER >= 1600) 177 #pragma optimize("", off) 178 #endif 179 180 static inline U16CPU fixed_mirror(SkFixed x) { 181 SkFixed s = SkLeftShift(x, 15) >> 31; 182 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval 183 return (x ^ s) & 0xFFFF; 184 } 185 186 #if defined(_MSC_VER) && (_MSC_VER >= 1600) 187 #pragma optimize("", on) 188 #endif 189 190 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) { 191 if (SkShader::kClamp_TileMode == m) { 192 return fixed_clamp; 193 } 194 if (SkShader::kRepeat_TileMode == m) { 195 return fixed_repeat; 196 } 197 SkASSERT(SkShader::kMirror_TileMode == m); 198 return fixed_mirror; 199 } 200 201 static inline U16CPU fixed_clamp_lowbits(SkFixed x, int) { 202 return (x >> 12) & 0xF; 203 } 204 205 static inline U16CPU fixed_repeat_or_mirrow_lowbits(SkFixed x, int scale) { 206 return ((x * scale) >> 12) & 0xF; 207 } 208 209 static SkBitmapProcState::FixedTileLowBitsProc choose_tile_lowbits_proc(unsigned m) { 210 if (SkShader::kClamp_TileMode == m) { 211 return fixed_clamp_lowbits; 212 } else { 213 SkASSERT(SkShader::kMirror_TileMode == m || 214 SkShader::kRepeat_TileMode == m); 215 // mirror and repeat have the same behavior for the low bits. 216 return fixed_repeat_or_mirrow_lowbits; 217 } 218 } 219 220 static inline U16CPU int_clamp(int x, int n) { 221 if (x >= n) { 222 x = n - 1; 223 } 224 if (x < 0) { 225 x = 0; 226 } 227 return x; 228 } 229 230 static inline U16CPU int_repeat(int x, int n) { 231 return sk_int_mod(x, n); 232 } 233 234 static inline U16CPU int_mirror(int x, int n) { 235 x = sk_int_mod(x, 2 * n); 236 if (x >= n) { 237 x = n + ~(x - n); 238 } 239 return x; 240 } 241 242 #if 0 243 static void test_int_tileprocs() { 244 for (int i = -8; i <= 8; i++) { 245 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); 246 } 247 } 248 #endif 249 250 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { 251 if (SkShader::kClamp_TileMode == tm) 252 return int_clamp; 253 if (SkShader::kRepeat_TileMode == tm) 254 return int_repeat; 255 SkASSERT(SkShader::kMirror_TileMode == tm); 256 return int_mirror; 257 } 258 259 ////////////////////////////////////////////////////////////////////////////// 260 261 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { 262 int i; 263 264 for (i = (count >> 2); i > 0; --i) { 265 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 266 fx += dx+dx; 267 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 268 fx += dx+dx; 269 } 270 count &= 3; 271 272 uint16_t* xx = (uint16_t*)dst; 273 for (i = count; i > 0; --i) { 274 *xx++ = SkToU16(fx >> 16); fx += dx; 275 } 276 } 277 278 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { 279 if (count & 1) { 280 SkASSERT((fx >> (16 + 14)) == 0); 281 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 282 fx += dx; 283 } 284 while ((count -= 2) >= 0) { 285 SkASSERT((fx >> (16 + 14)) == 0); 286 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 287 fx += dx; 288 289 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 290 fx += dx; 291 } 292 } 293 294 /////////////////////////////////////////////////////////////////////////////// 295 // stores the same as SCALE, but is cheaper to compute. Also since there is no 296 // scale, we don't need/have a FILTER version 297 298 static void fill_sequential(uint16_t xptr[], int start, int count) { 299 #if 1 300 if (reinterpret_cast<intptr_t>(xptr) & 0x2) { 301 *xptr++ = start++; 302 count -= 1; 303 } 304 if (count > 3) { 305 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr); 306 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); 307 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); 308 start += count & ~3; 309 int qcount = count >> 2; 310 do { 311 *xxptr++ = pattern0; 312 pattern0 += 0x40004; 313 *xxptr++ = pattern1; 314 pattern1 += 0x40004; 315 } while (--qcount != 0); 316 xptr = reinterpret_cast<uint16_t*>(xxptr); 317 count &= 3; 318 } 319 while (--count >= 0) { 320 *xptr++ = start++; 321 } 322 #else 323 for (int i = 0; i < count; i++) { 324 *xptr++ = start++; 325 } 326 #endif 327 } 328 329 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, 330 int x, int y) { 331 const SkBitmapProcStateAutoMapper mapper(s, x, y); 332 **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height()); 333 *xy += 1; // bump the ptr 334 // return our starting X position 335 return mapper.intX(); 336 } 337 338 static void clampx_nofilter_trans(const SkBitmapProcState& s, 339 uint32_t xy[], int count, int x, int y) { 340 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 341 342 int xpos = nofilter_trans_preamble(s, &xy, x, y); 343 const int width = s.fPixmap.width(); 344 if (1 == width) { 345 // all of the following X values must be 0 346 memset(xy, 0, count * sizeof(uint16_t)); 347 return; 348 } 349 350 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 351 int n; 352 353 // fill before 0 as needed 354 if (xpos < 0) { 355 n = -xpos; 356 if (n > count) { 357 n = count; 358 } 359 memset(xptr, 0, n * sizeof(uint16_t)); 360 count -= n; 361 if (0 == count) { 362 return; 363 } 364 xptr += n; 365 xpos = 0; 366 } 367 368 // fill in 0..width-1 if needed 369 if (xpos < width) { 370 n = width - xpos; 371 if (n > count) { 372 n = count; 373 } 374 fill_sequential(xptr, xpos, n); 375 count -= n; 376 if (0 == count) { 377 return; 378 } 379 xptr += n; 380 } 381 382 // fill the remaining with the max value 383 sk_memset16(xptr, width - 1, count); 384 } 385 386 static void repeatx_nofilter_trans(const SkBitmapProcState& s, 387 uint32_t xy[], int count, int x, int y) { 388 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 389 390 int xpos = nofilter_trans_preamble(s, &xy, x, y); 391 const int width = s.fPixmap.width(); 392 if (1 == width) { 393 // all of the following X values must be 0 394 memset(xy, 0, count * sizeof(uint16_t)); 395 return; 396 } 397 398 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 399 int start = sk_int_mod(xpos, width); 400 int n = width - start; 401 if (n > count) { 402 n = count; 403 } 404 fill_sequential(xptr, start, n); 405 xptr += n; 406 count -= n; 407 408 while (count >= width) { 409 fill_sequential(xptr, 0, width); 410 xptr += width; 411 count -= width; 412 } 413 414 if (count > 0) { 415 fill_sequential(xptr, 0, count); 416 } 417 } 418 419 static void fill_backwards(uint16_t xptr[], int pos, int count) { 420 for (int i = 0; i < count; i++) { 421 SkASSERT(pos >= 0); 422 xptr[i] = pos--; 423 } 424 } 425 426 static void mirrorx_nofilter_trans(const SkBitmapProcState& s, 427 uint32_t xy[], int count, int x, int y) { 428 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 429 430 int xpos = nofilter_trans_preamble(s, &xy, x, y); 431 const int width = s.fPixmap.width(); 432 if (1 == width) { 433 // all of the following X values must be 0 434 memset(xy, 0, count * sizeof(uint16_t)); 435 return; 436 } 437 438 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 439 // need to know our start, and our initial phase (forward or backward) 440 bool forward; 441 int n; 442 int start = sk_int_mod(xpos, 2 * width); 443 if (start >= width) { 444 start = width + ~(start - width); 445 forward = false; 446 n = start + 1; // [start .. 0] 447 } else { 448 forward = true; 449 n = width - start; // [start .. width) 450 } 451 if (n > count) { 452 n = count; 453 } 454 if (forward) { 455 fill_sequential(xptr, start, n); 456 } else { 457 fill_backwards(xptr, start, n); 458 } 459 forward = !forward; 460 xptr += n; 461 count -= n; 462 463 while (count >= width) { 464 if (forward) { 465 fill_sequential(xptr, 0, width); 466 } else { 467 fill_backwards(xptr, width - 1, width); 468 } 469 forward = !forward; 470 xptr += width; 471 count -= width; 472 } 473 474 if (count > 0) { 475 if (forward) { 476 fill_sequential(xptr, 0, count); 477 } else { 478 fill_backwards(xptr, width - 1, count); 479 } 480 } 481 } 482 483 /////////////////////////////////////////////////////////////////////////////// 484 485 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { 486 // test_int_tileprocs(); 487 // check for our special case when there is no scale/affine/perspective 488 if (trivial_matrix && kNone_SkFilterQuality == fFilterLevel) { 489 fIntTileProcY = choose_int_tile_proc(fTileModeY); 490 switch (fTileModeX) { 491 case SkShader::kClamp_TileMode: 492 return clampx_nofilter_trans; 493 case SkShader::kRepeat_TileMode: 494 return repeatx_nofilter_trans; 495 case SkShader::kMirror_TileMode: 496 return mirrorx_nofilter_trans; 497 } 498 } 499 500 int index = 0; 501 if (fFilterLevel != kNone_SkFilterQuality) { 502 index = 1; 503 } 504 if (fInvType & SkMatrix::kPerspective_Mask) { 505 index += 4; 506 } else if (fInvType & SkMatrix::kAffine_Mask) { 507 index += 2; 508 } 509 510 if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) { 511 // clamp gets special version of filterOne 512 fFilterOneX = SK_Fixed1; 513 fFilterOneY = SK_Fixed1; 514 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; 515 } 516 517 // all remaining procs use this form for filterOne 518 fFilterOneX = SK_Fixed1 / fPixmap.width(); 519 fFilterOneY = SK_Fixed1 / fPixmap.height(); 520 521 if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) { 522 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; 523 } 524 525 fTileProcX = choose_tile_proc(fTileModeX); 526 fTileProcY = choose_tile_proc(fTileModeY); 527 fTileLowBitsProcX = choose_tile_lowbits_proc(fTileModeX); 528 fTileLowBitsProcY = choose_tile_lowbits_proc(fTileModeY); 529 return GeneralXY_Procs[index]; 530 } 531