1 /* 2 * Copyright 2008 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 // The copyright below was added in 2009, but I see no record of moto contributions...? 9 10 /* NEON optimized code (C) COPYRIGHT 2009 Motorola 11 * 12 * Use of this source code is governed by a BSD-style license that can be 13 * found in the LICENSE file. 14 */ 15 16 #include "SkBitmapProcState.h" 17 #include "SkShader.h" 18 #include "SkUtils.h" 19 #include "SkUtilsArm.h" 20 #include "SkBitmapProcState_utils.h" 21 22 /* returns 0...(n-1) given any x (positive or negative). 23 24 As an example, if n (which is always positive) is 5... 25 26 x: -8 -7 -6 -5 -4 -3 -2 -1 0 1 2 3 4 5 6 7 8 27 returns: 2 3 4 0 1 2 3 4 0 1 2 3 4 0 1 2 3 28 */ 29 static inline int sk_int_mod(int x, int n) { 30 SkASSERT(n > 0); 31 if ((unsigned)x >= (unsigned)n) { 32 if (x < 0) { 33 x = n + ~(~x % n); 34 } else { 35 x = x % n; 36 } 37 } 38 return x; 39 } 40 41 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 42 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count); 43 44 #include "SkBitmapProcState_matrix_template.h" 45 46 /////////////////////////////////////////////////////////////////////////////// 47 48 // Compile neon code paths if needed 49 #if defined(SK_ARM_HAS_NEON) 50 51 // These are defined in src/opts/SkBitmapProcState_matrixProcs_neon.cpp 52 extern const SkBitmapProcState::MatrixProc ClampX_ClampY_Procs_neon[]; 53 extern const SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs_neon[]; 54 55 #endif // defined(SK_ARM_HAS_NEON) 56 57 // Compile non-neon code path if needed 58 #if !defined(SK_ARM_HAS_NEON) 59 #define MAKENAME(suffix) ClampX_ClampY ## suffix 60 #define TILEX_PROCF(fx, max) SkClampMax((fx) >> 16, max) 61 #define TILEY_PROCF(fy, max) SkClampMax((fy) >> 16, max) 62 #define EXTRACT_LOW_BITS(v, max) (((v) >> 12) & 0xF) 63 #define CHECK_FOR_DECAL 64 #include "SkBitmapProcState_matrix.h" 65 66 struct ClampTileProcs { 67 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) { 68 return SkClampMax(fx >> 16, max); 69 } 70 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) { 71 return SkClampMax(fy >> 16, max); 72 } 73 }; 74 75 // Referenced in opts_check_x86.cpp 76 void ClampX_ClampY_nofilter_scale(const SkBitmapProcState& s, uint32_t xy[], 77 int count, int x, int y) { 78 return NoFilterProc_Scale<ClampTileProcs, true>(s, xy, count, x, y); 79 } 80 81 static SkBitmapProcState::MatrixProc ClampX_ClampY_Procs[] = { 82 // only clamp lives in the right coord space to check for decal 83 ClampX_ClampY_nofilter_scale, 84 ClampX_ClampY_filter_scale, 85 }; 86 87 #define MAKENAME(suffix) RepeatX_RepeatY ## suffix 88 #define TILEX_PROCF(fx, max) SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1)) 89 #define TILEY_PROCF(fy, max) SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1)) 90 #define EXTRACT_LOW_BITS(v, max) (((unsigned)((v) & 0xFFFF) * ((max) + 1) >> 12) & 0xF) 91 #include "SkBitmapProcState_matrix.h" 92 93 struct RepeatTileProcs { 94 static unsigned X(const SkBitmapProcState&, SkFixed fx, int max) { 95 SkASSERT(max < 65535); 96 return SK_USHIFT16((unsigned)((fx) & 0xFFFF) * ((max) + 1)); 97 } 98 static unsigned Y(const SkBitmapProcState&, SkFixed fy, int max) { 99 SkASSERT(max < 65535); 100 return SK_USHIFT16((unsigned)((fy) & 0xFFFF) * ((max) + 1)); 101 } 102 }; 103 104 static SkBitmapProcState::MatrixProc RepeatX_RepeatY_Procs[] = { 105 NoFilterProc_Scale<RepeatTileProcs, false>, 106 RepeatX_RepeatY_filter_scale, 107 }; 108 #endif 109 110 #define MAKENAME(suffix) GeneralXY ## suffix 111 #define PREAMBLE(state) SkBitmapProcState::FixedTileProc tileProcX = (state).fTileProcX; (void) tileProcX; \ 112 SkBitmapProcState::FixedTileProc tileProcY = (state).fTileProcY; (void) tileProcY; 113 #define PREAMBLE_PARAM_X , SkBitmapProcState::FixedTileProc tileProcX 114 #define PREAMBLE_PARAM_Y , SkBitmapProcState::FixedTileProc tileProcY 115 #define PREAMBLE_ARG_X , tileProcX 116 #define PREAMBLE_ARG_Y , tileProcY 117 #define TILEX_PROCF(fx, max) SK_USHIFT16(tileProcX(fx) * ((max) + 1)) 118 #define TILEY_PROCF(fy, max) SK_USHIFT16(tileProcY(fy) * ((max) + 1)) 119 #define EXTRACT_LOW_BITS(v, max) (((v * (max + 1)) >> 12) & 0xF) 120 #include "SkBitmapProcState_matrix.h" 121 122 struct GeneralTileProcs { 123 static unsigned X(const SkBitmapProcState& s, SkFixed fx, int max) { 124 return SK_USHIFT16(s.fTileProcX(fx) * ((max) + 1)); 125 } 126 static unsigned Y(const SkBitmapProcState& s, SkFixed fy, int max) { 127 return SK_USHIFT16(s.fTileProcY(fy) * ((max) + 1)); 128 } 129 }; 130 131 static SkBitmapProcState::MatrixProc GeneralXY_Procs[] = { 132 NoFilterProc_Scale<GeneralTileProcs, false>, 133 GeneralXY_filter_scale, 134 }; 135 136 /////////////////////////////////////////////////////////////////////////////// 137 138 static inline U16CPU fixed_clamp(SkFixed x) { 139 if (x < 0) { 140 x = 0; 141 } 142 if (x >> 16) { 143 x = 0xFFFF; 144 } 145 return x; 146 } 147 148 static inline U16CPU fixed_repeat(SkFixed x) { 149 return x & 0xFFFF; 150 } 151 152 static inline U16CPU fixed_mirror(SkFixed x) { 153 SkFixed s = SkLeftShift(x, 15) >> 31; 154 // s is FFFFFFFF if we're on an odd interval, or 0 if an even interval 155 return (x ^ s) & 0xFFFF; 156 } 157 158 static SkBitmapProcState::FixedTileProc choose_tile_proc(unsigned m) { 159 if (SkShader::kClamp_TileMode == m) { 160 return fixed_clamp; 161 } 162 if (SkShader::kRepeat_TileMode == m) { 163 return fixed_repeat; 164 } 165 SkASSERT(SkShader::kMirror_TileMode == m); 166 return fixed_mirror; 167 } 168 169 static inline U16CPU int_clamp(int x, int n) { 170 if (x >= n) { 171 x = n - 1; 172 } 173 if (x < 0) { 174 x = 0; 175 } 176 return x; 177 } 178 179 static inline U16CPU int_repeat(int x, int n) { 180 return sk_int_mod(x, n); 181 } 182 183 static inline U16CPU int_mirror(int x, int n) { 184 x = sk_int_mod(x, 2 * n); 185 if (x >= n) { 186 x = n + ~(x - n); 187 } 188 return x; 189 } 190 191 #if 0 192 static void test_int_tileprocs() { 193 for (int i = -8; i <= 8; i++) { 194 SkDebugf(" int_mirror(%2d, 3) = %d\n", i, int_mirror(i, 3)); 195 } 196 } 197 #endif 198 199 static SkBitmapProcState::IntTileProc choose_int_tile_proc(unsigned tm) { 200 if (SkShader::kClamp_TileMode == tm) 201 return int_clamp; 202 if (SkShader::kRepeat_TileMode == tm) 203 return int_repeat; 204 SkASSERT(SkShader::kMirror_TileMode == tm); 205 return int_mirror; 206 } 207 208 ////////////////////////////////////////////////////////////////////////////// 209 210 void decal_nofilter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { 211 int i; 212 213 for (i = (count >> 2); i > 0; --i) { 214 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 215 fx += dx+dx; 216 *dst++ = pack_two_shorts(fx >> 16, (fx + dx) >> 16); 217 fx += dx+dx; 218 } 219 count &= 3; 220 221 uint16_t* xx = (uint16_t*)dst; 222 for (i = count; i > 0; --i) { 223 *xx++ = SkToU16(fx >> 16); fx += dx; 224 } 225 } 226 227 void decal_filter_scale(uint32_t dst[], SkFixed fx, SkFixed dx, int count) { 228 if (count & 1) { 229 SkASSERT((fx >> (16 + 14)) == 0); 230 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 231 fx += dx; 232 } 233 while ((count -= 2) >= 0) { 234 SkASSERT((fx >> (16 + 14)) == 0); 235 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 236 fx += dx; 237 238 *dst++ = (fx >> 12 << 14) | ((fx >> 16) + 1); 239 fx += dx; 240 } 241 } 242 243 /////////////////////////////////////////////////////////////////////////////// 244 // stores the same as SCALE, but is cheaper to compute. Also since there is no 245 // scale, we don't need/have a FILTER version 246 247 static void fill_sequential(uint16_t xptr[], int start, int count) { 248 #if 1 249 if (reinterpret_cast<intptr_t>(xptr) & 0x2) { 250 *xptr++ = start++; 251 count -= 1; 252 } 253 if (count > 3) { 254 uint32_t* xxptr = reinterpret_cast<uint32_t*>(xptr); 255 uint32_t pattern0 = PACK_TWO_SHORTS(start + 0, start + 1); 256 uint32_t pattern1 = PACK_TWO_SHORTS(start + 2, start + 3); 257 start += count & ~3; 258 int qcount = count >> 2; 259 do { 260 *xxptr++ = pattern0; 261 pattern0 += 0x40004; 262 *xxptr++ = pattern1; 263 pattern1 += 0x40004; 264 } while (--qcount != 0); 265 xptr = reinterpret_cast<uint16_t*>(xxptr); 266 count &= 3; 267 } 268 while (--count >= 0) { 269 *xptr++ = start++; 270 } 271 #else 272 for (int i = 0; i < count; i++) { 273 *xptr++ = start++; 274 } 275 #endif 276 } 277 278 static int nofilter_trans_preamble(const SkBitmapProcState& s, uint32_t** xy, 279 int x, int y) { 280 const SkBitmapProcStateAutoMapper mapper(s, x, y); 281 **xy = s.fIntTileProcY(mapper.intY(), s.fPixmap.height()); 282 *xy += 1; // bump the ptr 283 // return our starting X position 284 return mapper.intX(); 285 } 286 287 static void clampx_nofilter_trans(const SkBitmapProcState& s, 288 uint32_t xy[], int count, int x, int y) { 289 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 290 291 int xpos = nofilter_trans_preamble(s, &xy, x, y); 292 const int width = s.fPixmap.width(); 293 if (1 == width) { 294 // all of the following X values must be 0 295 memset(xy, 0, count * sizeof(uint16_t)); 296 return; 297 } 298 299 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 300 int n; 301 302 // fill before 0 as needed 303 if (xpos < 0) { 304 n = -xpos; 305 if (n > count) { 306 n = count; 307 } 308 memset(xptr, 0, n * sizeof(uint16_t)); 309 count -= n; 310 if (0 == count) { 311 return; 312 } 313 xptr += n; 314 xpos = 0; 315 } 316 317 // fill in 0..width-1 if needed 318 if (xpos < width) { 319 n = width - xpos; 320 if (n > count) { 321 n = count; 322 } 323 fill_sequential(xptr, xpos, n); 324 count -= n; 325 if (0 == count) { 326 return; 327 } 328 xptr += n; 329 } 330 331 // fill the remaining with the max value 332 sk_memset16(xptr, width - 1, count); 333 } 334 335 static void repeatx_nofilter_trans(const SkBitmapProcState& s, 336 uint32_t xy[], int count, int x, int y) { 337 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 338 339 int xpos = nofilter_trans_preamble(s, &xy, x, y); 340 const int width = s.fPixmap.width(); 341 if (1 == width) { 342 // all of the following X values must be 0 343 memset(xy, 0, count * sizeof(uint16_t)); 344 return; 345 } 346 347 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 348 int start = sk_int_mod(xpos, width); 349 int n = width - start; 350 if (n > count) { 351 n = count; 352 } 353 fill_sequential(xptr, start, n); 354 xptr += n; 355 count -= n; 356 357 while (count >= width) { 358 fill_sequential(xptr, 0, width); 359 xptr += width; 360 count -= width; 361 } 362 363 if (count > 0) { 364 fill_sequential(xptr, 0, count); 365 } 366 } 367 368 static void fill_backwards(uint16_t xptr[], int pos, int count) { 369 for (int i = 0; i < count; i++) { 370 SkASSERT(pos >= 0); 371 xptr[i] = pos--; 372 } 373 } 374 375 static void mirrorx_nofilter_trans(const SkBitmapProcState& s, 376 uint32_t xy[], int count, int x, int y) { 377 SkASSERT((s.fInvType & ~SkMatrix::kTranslate_Mask) == 0); 378 379 int xpos = nofilter_trans_preamble(s, &xy, x, y); 380 const int width = s.fPixmap.width(); 381 if (1 == width) { 382 // all of the following X values must be 0 383 memset(xy, 0, count * sizeof(uint16_t)); 384 return; 385 } 386 387 uint16_t* xptr = reinterpret_cast<uint16_t*>(xy); 388 // need to know our start, and our initial phase (forward or backward) 389 bool forward; 390 int n; 391 int start = sk_int_mod(xpos, 2 * width); 392 if (start >= width) { 393 start = width + ~(start - width); 394 forward = false; 395 n = start + 1; // [start .. 0] 396 } else { 397 forward = true; 398 n = width - start; // [start .. width) 399 } 400 if (n > count) { 401 n = count; 402 } 403 if (forward) { 404 fill_sequential(xptr, start, n); 405 } else { 406 fill_backwards(xptr, start, n); 407 } 408 forward = !forward; 409 xptr += n; 410 count -= n; 411 412 while (count >= width) { 413 if (forward) { 414 fill_sequential(xptr, 0, width); 415 } else { 416 fill_backwards(xptr, width - 1, width); 417 } 418 forward = !forward; 419 xptr += width; 420 count -= width; 421 } 422 423 if (count > 0) { 424 if (forward) { 425 fill_sequential(xptr, 0, count); 426 } else { 427 fill_backwards(xptr, width - 1, count); 428 } 429 } 430 } 431 432 /////////////////////////////////////////////////////////////////////////////// 433 434 SkBitmapProcState::MatrixProc SkBitmapProcState::chooseMatrixProc(bool trivial_matrix) { 435 SkASSERT((fInvType & (SkMatrix::kAffine_Mask | SkMatrix::kPerspective_Mask)) == 0); 436 437 // test_int_tileprocs(); 438 // check for our special case when there is no scale/affine/perspective 439 if (trivial_matrix && kNone_SkFilterQuality == fFilterQuality) { 440 fIntTileProcY = choose_int_tile_proc(fTileModeY); 441 switch (fTileModeX) { 442 case SkShader::kClamp_TileMode: 443 return clampx_nofilter_trans; 444 case SkShader::kRepeat_TileMode: 445 return repeatx_nofilter_trans; 446 case SkShader::kMirror_TileMode: 447 return mirrorx_nofilter_trans; 448 case SkShader::kDecal_TileMode: 449 SkASSERT(false); // should never get here, handled by stages 450 return clampx_nofilter_trans; 451 } 452 } 453 454 int index = 0; 455 if (fFilterQuality != kNone_SkFilterQuality) { 456 index = 1; 457 } 458 459 if (SkShader::kClamp_TileMode == fTileModeX && SkShader::kClamp_TileMode == fTileModeY) { 460 // clamp gets special version of filterOne 461 fFilterOneX = SK_Fixed1; 462 fFilterOneY = SK_Fixed1; 463 return SK_ARM_NEON_WRAP(ClampX_ClampY_Procs)[index]; 464 } 465 466 // all remaining procs use this form for filterOne 467 fFilterOneX = SK_Fixed1 / fPixmap.width(); 468 fFilterOneY = SK_Fixed1 / fPixmap.height(); 469 470 if (SkShader::kRepeat_TileMode == fTileModeX && SkShader::kRepeat_TileMode == fTileModeY) { 471 return SK_ARM_NEON_WRAP(RepeatX_RepeatY_Procs)[index]; 472 } 473 474 fTileProcX = choose_tile_proc(fTileModeX); 475 fTileProcY = choose_tile_proc(fTileModeY); 476 return GeneralXY_Procs[index]; 477 } 478