1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ 2 /* 3 * Copyright 2000 SuSE, Inc. 4 * Copyright 2007 Red Hat, Inc. 5 * 6 * Permission to use, copy, modify, distribute, and sell this software and its 7 * documentation for any purpose is hereby granted without fee, provided that 8 * the above copyright notice appear in all copies and that both that 9 * copyright notice and this permission notice appear in supporting 10 * documentation, and that the name of SuSE not be used in advertising or 11 * publicity pertaining to distribution of the software without specific, 12 * written prior permission. SuSE makes no representations about the 13 * suitability of this software for any purpose. It is provided "as is" 14 * without express or implied warranty. 15 * 16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE 18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 22 * 23 * Author: Keith Packard, SuSE, Inc. 24 */ 25 26 #ifndef PIXMAN_FAST_PATH_H__ 27 #define PIXMAN_FAST_PATH_H__ 28 29 #include "pixman-private.h" 30 31 #define PIXMAN_REPEAT_COVER -1 32 33 /* Flags describing input parameters to fast path macro template. 34 * Turning on some flag values may indicate that 35 * "some property X is available so template can use this" or 36 * "some property X should be handled by template". 37 * 38 * FLAG_HAVE_SOLID_MASK 39 * Input mask is solid so template should handle this. 40 * 41 * FLAG_HAVE_NON_SOLID_MASK 42 * Input mask is bits mask so template should handle this. 43 * 44 * FLAG_HAVE_SOLID_MASK and FLAG_HAVE_NON_SOLID_MASK are mutually 45 * exclusive. (It's not allowed to turn both flags on) 46 */ 47 #define FLAG_NONE (0) 48 #define FLAG_HAVE_SOLID_MASK (1 << 1) 49 #define FLAG_HAVE_NON_SOLID_MASK (1 << 2) 50 51 /* To avoid too short repeated scanline function calls, extend source 52 * scanlines having width less than below constant value. 53 */ 54 #define REPEAT_NORMAL_MIN_WIDTH 64 55 56 static force_inline pixman_bool_t 57 repeat (pixman_repeat_t repeat, int *c, int size) 58 { 59 if (repeat == PIXMAN_REPEAT_NONE) 60 { 61 if (*c < 0 || *c >= size) 62 return FALSE; 63 } 64 else if (repeat == PIXMAN_REPEAT_NORMAL) 65 { 66 while (*c >= size) 67 *c -= size; 68 while (*c < 0) 69 *c += size; 70 } 71 else if (repeat == PIXMAN_REPEAT_PAD) 72 { 73 *c = CLIP (*c, 0, size - 1); 74 } 75 else /* REFLECT */ 76 { 77 *c = MOD (*c, size * 2); 78 if (*c >= size) 79 *c = size * 2 - *c - 1; 80 } 81 return TRUE; 82 } 83 84 static force_inline int 85 pixman_fixed_to_bilinear_weight (pixman_fixed_t x) 86 { 87 return (x >> (16 - BILINEAR_INTERPOLATION_BITS)) & 88 ((1 << BILINEAR_INTERPOLATION_BITS) - 1); 89 } 90 91 #if BILINEAR_INTERPOLATION_BITS <= 4 92 /* Inspired by Filter_32_opaque from Skia */ 93 static force_inline uint32_t 94 bilinear_interpolation (uint32_t tl, uint32_t tr, 95 uint32_t bl, uint32_t br, 96 int distx, int disty) 97 { 98 int distxy, distxiy, distixy, distixiy; 99 uint32_t lo, hi; 100 101 distx <<= (4 - BILINEAR_INTERPOLATION_BITS); 102 disty <<= (4 - BILINEAR_INTERPOLATION_BITS); 103 104 distxy = distx * disty; 105 distxiy = (distx << 4) - distxy; /* distx * (16 - disty) */ 106 distixy = (disty << 4) - distxy; /* disty * (16 - distx) */ 107 distixiy = 108 16 * 16 - (disty << 4) - 109 (distx << 4) + distxy; /* (16 - distx) * (16 - disty) */ 110 111 lo = (tl & 0xff00ff) * distixiy; 112 hi = ((tl >> 8) & 0xff00ff) * distixiy; 113 114 lo += (tr & 0xff00ff) * distxiy; 115 hi += ((tr >> 8) & 0xff00ff) * distxiy; 116 117 lo += (bl & 0xff00ff) * distixy; 118 hi += ((bl >> 8) & 0xff00ff) * distixy; 119 120 lo += (br & 0xff00ff) * distxy; 121 hi += ((br >> 8) & 0xff00ff) * distxy; 122 123 return ((lo >> 8) & 0xff00ff) | (hi & ~0xff00ff); 124 } 125 126 #else 127 #if SIZEOF_LONG > 4 128 129 static force_inline uint32_t 130 bilinear_interpolation (uint32_t tl, uint32_t tr, 131 uint32_t bl, uint32_t br, 132 int distx, int disty) 133 { 134 uint64_t distxy, distxiy, distixy, distixiy; 135 uint64_t tl64, tr64, bl64, br64; 136 uint64_t f, r; 137 138 distx <<= (8 - BILINEAR_INTERPOLATION_BITS); 139 disty <<= (8 - BILINEAR_INTERPOLATION_BITS); 140 141 distxy = distx * disty; 142 distxiy = distx * (256 - disty); 143 distixy = (256 - distx) * disty; 144 distixiy = (256 - distx) * (256 - disty); 145 146 /* Alpha and Blue */ 147 tl64 = tl & 0xff0000ff; 148 tr64 = tr & 0xff0000ff; 149 bl64 = bl & 0xff0000ff; 150 br64 = br & 0xff0000ff; 151 152 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; 153 r = f & 0x0000ff0000ff0000ull; 154 155 /* Red and Green */ 156 tl64 = tl; 157 tl64 = ((tl64 << 16) & 0x000000ff00000000ull) | (tl64 & 0x0000ff00ull); 158 159 tr64 = tr; 160 tr64 = ((tr64 << 16) & 0x000000ff00000000ull) | (tr64 & 0x0000ff00ull); 161 162 bl64 = bl; 163 bl64 = ((bl64 << 16) & 0x000000ff00000000ull) | (bl64 & 0x0000ff00ull); 164 165 br64 = br; 166 br64 = ((br64 << 16) & 0x000000ff00000000ull) | (br64 & 0x0000ff00ull); 167 168 f = tl64 * distixiy + tr64 * distxiy + bl64 * distixy + br64 * distxy; 169 r |= ((f >> 16) & 0x000000ff00000000ull) | (f & 0xff000000ull); 170 171 return (uint32_t)(r >> 16); 172 } 173 174 #else 175 176 static force_inline uint32_t 177 bilinear_interpolation (uint32_t tl, uint32_t tr, 178 uint32_t bl, uint32_t br, 179 int distx, int disty) 180 { 181 int distxy, distxiy, distixy, distixiy; 182 uint32_t f, r; 183 184 distx <<= (8 - BILINEAR_INTERPOLATION_BITS); 185 disty <<= (8 - BILINEAR_INTERPOLATION_BITS); 186 187 distxy = distx * disty; 188 distxiy = (distx << 8) - distxy; /* distx * (256 - disty) */ 189 distixy = (disty << 8) - distxy; /* disty * (256 - distx) */ 190 distixiy = 191 256 * 256 - (disty << 8) - 192 (distx << 8) + distxy; /* (256 - distx) * (256 - disty) */ 193 194 /* Blue */ 195 r = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy 196 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; 197 198 /* Green */ 199 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy 200 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; 201 r |= f & 0xff000000; 202 203 tl >>= 16; 204 tr >>= 16; 205 bl >>= 16; 206 br >>= 16; 207 r >>= 16; 208 209 /* Red */ 210 f = (tl & 0x000000ff) * distixiy + (tr & 0x000000ff) * distxiy 211 + (bl & 0x000000ff) * distixy + (br & 0x000000ff) * distxy; 212 r |= f & 0x00ff0000; 213 214 /* Alpha */ 215 f = (tl & 0x0000ff00) * distixiy + (tr & 0x0000ff00) * distxiy 216 + (bl & 0x0000ff00) * distixy + (br & 0x0000ff00) * distxy; 217 r |= f & 0xff000000; 218 219 return r; 220 } 221 222 #endif 223 #endif // BILINEAR_INTERPOLATION_BITS <= 4 224 225 /* 226 * For each scanline fetched from source image with PAD repeat: 227 * - calculate how many pixels need to be padded on the left side 228 * - calculate how many pixels need to be padded on the right side 229 * - update width to only count pixels which are fetched from the image 230 * All this information is returned via 'width', 'left_pad', 'right_pad' 231 * arguments. The code is assuming that 'unit_x' is positive. 232 * 233 * Note: 64-bit math is used in order to avoid potential overflows, which 234 * is probably excessive in many cases. This particular function 235 * may need its own correctness test and performance tuning. 236 */ 237 static force_inline void 238 pad_repeat_get_scanline_bounds (int32_t source_image_width, 239 pixman_fixed_t vx, 240 pixman_fixed_t unit_x, 241 int32_t * width, 242 int32_t * left_pad, 243 int32_t * right_pad) 244 { 245 int64_t max_vx = (int64_t) source_image_width << 16; 246 int64_t tmp; 247 if (vx < 0) 248 { 249 tmp = ((int64_t) unit_x - 1 - vx) / unit_x; 250 if (tmp > *width) 251 { 252 *left_pad = *width; 253 *width = 0; 254 } 255 else 256 { 257 *left_pad = (int32_t) tmp; 258 *width -= (int32_t) tmp; 259 } 260 } 261 else 262 { 263 *left_pad = 0; 264 } 265 tmp = ((int64_t) unit_x - 1 - vx + max_vx) / unit_x - *left_pad; 266 if (tmp < 0) 267 { 268 *right_pad = *width; 269 *width = 0; 270 } 271 else if (tmp >= *width) 272 { 273 *right_pad = 0; 274 } 275 else 276 { 277 *right_pad = *width - (int32_t) tmp; 278 *width = (int32_t) tmp; 279 } 280 } 281 282 /* A macroified version of specialized nearest scalers for some 283 * common 8888 and 565 formats. It supports SRC and OVER ops. 284 * 285 * There are two repeat versions, one that handles repeat normal, 286 * and one without repeat handling that only works if the src region 287 * used is completely covered by the pre-repeated source samples. 288 * 289 * The loops are unrolled to process two pixels per iteration for better 290 * performance on most CPU architectures (superscalar processors 291 * can issue several operations simultaneously, other processors can hide 292 * instructions latencies by pipelining operations). Unrolling more 293 * does not make much sense because the compiler will start running out 294 * of spare registers soon. 295 */ 296 297 #define GET_8888_ALPHA(s) ((s) >> 24) 298 /* This is not actually used since we don't have an OVER with 299 565 source, but it is needed to build. */ 300 #define GET_0565_ALPHA(s) 0xff 301 #define GET_x888_ALPHA(s) 0xff 302 303 #define FAST_NEAREST_SCANLINE(scanline_func_name, SRC_FORMAT, DST_FORMAT, \ 304 src_type_t, dst_type_t, OP, repeat_mode) \ 305 static force_inline void \ 306 scanline_func_name (dst_type_t *dst, \ 307 const src_type_t *src, \ 308 int32_t w, \ 309 pixman_fixed_t vx, \ 310 pixman_fixed_t unit_x, \ 311 pixman_fixed_t src_width_fixed, \ 312 pixman_bool_t fully_transparent_src) \ 313 { \ 314 uint32_t d; \ 315 src_type_t s1, s2; \ 316 uint8_t a1, a2; \ 317 int x1, x2; \ 318 \ 319 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER && fully_transparent_src) \ 320 return; \ 321 \ 322 if (PIXMAN_OP_ ## OP != PIXMAN_OP_SRC && PIXMAN_OP_ ## OP != PIXMAN_OP_OVER) \ 323 abort(); \ 324 \ 325 while ((w -= 2) >= 0) \ 326 { \ 327 x1 = pixman_fixed_to_int (vx); \ 328 vx += unit_x; \ 329 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 330 { \ 331 /* This works because we know that unit_x is positive */ \ 332 while (vx >= 0) \ 333 vx -= src_width_fixed; \ 334 } \ 335 s1 = *(src + x1); \ 336 \ 337 x2 = pixman_fixed_to_int (vx); \ 338 vx += unit_x; \ 339 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 340 { \ 341 /* This works because we know that unit_x is positive */ \ 342 while (vx >= 0) \ 343 vx -= src_width_fixed; \ 344 } \ 345 s2 = *(src + x2); \ 346 \ 347 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ 348 { \ 349 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ 350 a2 = GET_ ## SRC_FORMAT ## _ALPHA(s2); \ 351 \ 352 if (a1 == 0xff) \ 353 { \ 354 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ 355 } \ 356 else if (s1) \ 357 { \ 358 d = convert_ ## DST_FORMAT ## _to_8888 (*dst); \ 359 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ 360 a1 ^= 0xff; \ 361 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ 362 *dst = convert_8888_to_ ## DST_FORMAT (d); \ 363 } \ 364 dst++; \ 365 \ 366 if (a2 == 0xff) \ 367 { \ 368 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ 369 } \ 370 else if (s2) \ 371 { \ 372 d = convert_## DST_FORMAT ## _to_8888 (*dst); \ 373 s2 = convert_## SRC_FORMAT ## _to_8888 (s2); \ 374 a2 ^= 0xff; \ 375 UN8x4_MUL_UN8_ADD_UN8x4 (d, a2, s2); \ 376 *dst = convert_8888_to_ ## DST_FORMAT (d); \ 377 } \ 378 dst++; \ 379 } \ 380 else /* PIXMAN_OP_SRC */ \ 381 { \ 382 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ 383 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s2); \ 384 } \ 385 } \ 386 \ 387 if (w & 1) \ 388 { \ 389 x1 = pixman_fixed_to_int (vx); \ 390 s1 = *(src + x1); \ 391 \ 392 if (PIXMAN_OP_ ## OP == PIXMAN_OP_OVER) \ 393 { \ 394 a1 = GET_ ## SRC_FORMAT ## _ALPHA(s1); \ 395 \ 396 if (a1 == 0xff) \ 397 { \ 398 *dst = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ 399 } \ 400 else if (s1) \ 401 { \ 402 d = convert_## DST_FORMAT ## _to_8888 (*dst); \ 403 s1 = convert_ ## SRC_FORMAT ## _to_8888 (s1); \ 404 a1 ^= 0xff; \ 405 UN8x4_MUL_UN8_ADD_UN8x4 (d, a1, s1); \ 406 *dst = convert_8888_to_ ## DST_FORMAT (d); \ 407 } \ 408 dst++; \ 409 } \ 410 else /* PIXMAN_OP_SRC */ \ 411 { \ 412 *dst++ = convert_ ## SRC_FORMAT ## _to_ ## DST_FORMAT (s1); \ 413 } \ 414 } \ 415 } 416 417 #define FAST_NEAREST_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ 418 dst_type_t, repeat_mode, have_mask, mask_is_solid) \ 419 static void \ 420 fast_composite_scaled_nearest ## scale_func_name (pixman_implementation_t *imp, \ 421 pixman_composite_info_t *info) \ 422 { \ 423 PIXMAN_COMPOSITE_ARGS (info); \ 424 dst_type_t *dst_line; \ 425 mask_type_t *mask_line; \ 426 src_type_t *src_first_line; \ 427 int y; \ 428 pixman_fixed_t src_width_fixed = pixman_int_to_fixed (src_image->bits.width); \ 429 pixman_fixed_t max_vy; \ 430 pixman_vector_t v; \ 431 pixman_fixed_t vx, vy; \ 432 pixman_fixed_t unit_x, unit_y; \ 433 int32_t left_pad, right_pad; \ 434 \ 435 src_type_t *src; \ 436 dst_type_t *dst; \ 437 mask_type_t solid_mask; \ 438 const mask_type_t *mask = &solid_mask; \ 439 int src_stride, mask_stride, dst_stride; \ 440 \ 441 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ 442 if (have_mask) \ 443 { \ 444 if (mask_is_solid) \ 445 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ 446 else \ 447 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ 448 mask_stride, mask_line, 1); \ 449 } \ 450 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ 451 * transformed from destination space to source space */ \ 452 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ 453 \ 454 /* reference point is the center of the pixel */ \ 455 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ 456 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ 457 v.vector[2] = pixman_fixed_1; \ 458 \ 459 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ 460 return; \ 461 \ 462 unit_x = src_image->common.transform->matrix[0][0]; \ 463 unit_y = src_image->common.transform->matrix[1][1]; \ 464 \ 465 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ \ 466 v.vector[0] -= pixman_fixed_e; \ 467 v.vector[1] -= pixman_fixed_e; \ 468 \ 469 vx = v.vector[0]; \ 470 vy = v.vector[1]; \ 471 \ 472 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 473 { \ 474 max_vy = pixman_int_to_fixed (src_image->bits.height); \ 475 \ 476 /* Clamp repeating positions inside the actual samples */ \ 477 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ 478 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ 479 } \ 480 \ 481 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ 482 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ 483 { \ 484 pad_repeat_get_scanline_bounds (src_image->bits.width, vx, unit_x, \ 485 &width, &left_pad, &right_pad); \ 486 vx += left_pad * unit_x; \ 487 } \ 488 \ 489 while (--height >= 0) \ 490 { \ 491 dst = dst_line; \ 492 dst_line += dst_stride; \ 493 if (have_mask && !mask_is_solid) \ 494 { \ 495 mask = mask_line; \ 496 mask_line += mask_stride; \ 497 } \ 498 \ 499 y = pixman_fixed_to_int (vy); \ 500 vy += unit_y; \ 501 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 502 repeat (PIXMAN_REPEAT_NORMAL, &vy, max_vy); \ 503 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ 504 { \ 505 repeat (PIXMAN_REPEAT_PAD, &y, src_image->bits.height); \ 506 src = src_first_line + src_stride * y; \ 507 if (left_pad > 0) \ 508 { \ 509 scanline_func (mask, dst, \ 510 src + src_image->bits.width - src_image->bits.width + 1, \ 511 left_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ 512 } \ 513 if (width > 0) \ 514 { \ 515 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ 516 dst + left_pad, src + src_image->bits.width, width, \ 517 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ 518 } \ 519 if (right_pad > 0) \ 520 { \ 521 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ 522 dst + left_pad + width, src + src_image->bits.width, \ 523 right_pad, -pixman_fixed_e, 0, src_width_fixed, FALSE); \ 524 } \ 525 } \ 526 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ 527 { \ 528 static const src_type_t zero[1] = { 0 }; \ 529 if (y < 0 || y >= src_image->bits.height) \ 530 { \ 531 scanline_func (mask, dst, zero + 1, left_pad + width + right_pad, \ 532 -pixman_fixed_e, 0, src_width_fixed, TRUE); \ 533 continue; \ 534 } \ 535 src = src_first_line + src_stride * y; \ 536 if (left_pad > 0) \ 537 { \ 538 scanline_func (mask, dst, zero + 1, left_pad, \ 539 -pixman_fixed_e, 0, src_width_fixed, TRUE); \ 540 } \ 541 if (width > 0) \ 542 { \ 543 scanline_func (mask + (mask_is_solid ? 0 : left_pad), \ 544 dst + left_pad, src + src_image->bits.width, width, \ 545 vx - src_width_fixed, unit_x, src_width_fixed, FALSE); \ 546 } \ 547 if (right_pad > 0) \ 548 { \ 549 scanline_func (mask + (mask_is_solid ? 0 : left_pad + width), \ 550 dst + left_pad + width, zero + 1, right_pad, \ 551 -pixman_fixed_e, 0, src_width_fixed, TRUE); \ 552 } \ 553 } \ 554 else \ 555 { \ 556 src = src_first_line + src_stride * y; \ 557 scanline_func (mask, dst, src + src_image->bits.width, width, vx - src_width_fixed, \ 558 unit_x, src_width_fixed, FALSE); \ 559 } \ 560 } \ 561 } 562 563 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ 564 #define FAST_NEAREST_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ 565 dst_type_t, repeat_mode, have_mask, mask_is_solid) \ 566 FAST_NEAREST_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t, \ 567 dst_type_t, repeat_mode, have_mask, mask_is_solid) 568 569 #define FAST_NEAREST_MAINLOOP_NOMASK(scale_func_name, scanline_func, src_type_t, dst_type_t, \ 570 repeat_mode) \ 571 static force_inline void \ 572 scanline_func##scale_func_name##_wrapper ( \ 573 const uint8_t *mask, \ 574 dst_type_t *dst, \ 575 const src_type_t *src, \ 576 int32_t w, \ 577 pixman_fixed_t vx, \ 578 pixman_fixed_t unit_x, \ 579 pixman_fixed_t max_vx, \ 580 pixman_bool_t fully_transparent_src) \ 581 { \ 582 scanline_func (dst, src, w, vx, unit_x, max_vx, fully_transparent_src); \ 583 } \ 584 FAST_NEAREST_MAINLOOP_INT (scale_func_name, scanline_func##scale_func_name##_wrapper, \ 585 src_type_t, uint8_t, dst_type_t, repeat_mode, FALSE, FALSE) 586 587 #define FAST_NEAREST_MAINLOOP(scale_func_name, scanline_func, src_type_t, dst_type_t, \ 588 repeat_mode) \ 589 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name, scanline_func, src_type_t, \ 590 dst_type_t, repeat_mode) 591 592 #define FAST_NEAREST(scale_func_name, SRC_FORMAT, DST_FORMAT, \ 593 src_type_t, dst_type_t, OP, repeat_mode) \ 594 FAST_NEAREST_SCANLINE(scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ 595 SRC_FORMAT, DST_FORMAT, src_type_t, dst_type_t, \ 596 OP, repeat_mode) \ 597 FAST_NEAREST_MAINLOOP_NOMASK(_ ## scale_func_name ## _ ## OP, \ 598 scaled_nearest_scanline_ ## scale_func_name ## _ ## OP, \ 599 src_type_t, dst_type_t, repeat_mode) 600 601 602 #define SCALED_NEAREST_FLAGS \ 603 (FAST_PATH_SCALE_TRANSFORM | \ 604 FAST_PATH_NO_ALPHA_MAP | \ 605 FAST_PATH_NEAREST_FILTER | \ 606 FAST_PATH_NO_ACCESSORS | \ 607 FAST_PATH_NARROW_FORMAT) 608 609 #define SIMPLE_NEAREST_FAST_PATH_NORMAL(op,s,d,func) \ 610 { PIXMAN_OP_ ## op, \ 611 PIXMAN_ ## s, \ 612 (SCALED_NEAREST_FLAGS | \ 613 FAST_PATH_NORMAL_REPEAT | \ 614 FAST_PATH_X_UNIT_POSITIVE), \ 615 PIXMAN_null, 0, \ 616 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 617 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ 618 } 619 620 #define SIMPLE_NEAREST_FAST_PATH_PAD(op,s,d,func) \ 621 { PIXMAN_OP_ ## op, \ 622 PIXMAN_ ## s, \ 623 (SCALED_NEAREST_FLAGS | \ 624 FAST_PATH_PAD_REPEAT | \ 625 FAST_PATH_X_UNIT_POSITIVE), \ 626 PIXMAN_null, 0, \ 627 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 628 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ 629 } 630 631 #define SIMPLE_NEAREST_FAST_PATH_NONE(op,s,d,func) \ 632 { PIXMAN_OP_ ## op, \ 633 PIXMAN_ ## s, \ 634 (SCALED_NEAREST_FLAGS | \ 635 FAST_PATH_NONE_REPEAT | \ 636 FAST_PATH_X_UNIT_POSITIVE), \ 637 PIXMAN_null, 0, \ 638 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 639 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ 640 } 641 642 #define SIMPLE_NEAREST_FAST_PATH_COVER(op,s,d,func) \ 643 { PIXMAN_OP_ ## op, \ 644 PIXMAN_ ## s, \ 645 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ 646 PIXMAN_null, 0, \ 647 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 648 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ 649 } 650 651 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ 652 { PIXMAN_OP_ ## op, \ 653 PIXMAN_ ## s, \ 654 (SCALED_NEAREST_FLAGS | \ 655 FAST_PATH_NORMAL_REPEAT | \ 656 FAST_PATH_X_UNIT_POSITIVE), \ 657 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 658 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 659 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ 660 } 661 662 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ 663 { PIXMAN_OP_ ## op, \ 664 PIXMAN_ ## s, \ 665 (SCALED_NEAREST_FLAGS | \ 666 FAST_PATH_PAD_REPEAT | \ 667 FAST_PATH_X_UNIT_POSITIVE), \ 668 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 669 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 670 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ 671 } 672 673 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ 674 { PIXMAN_OP_ ## op, \ 675 PIXMAN_ ## s, \ 676 (SCALED_NEAREST_FLAGS | \ 677 FAST_PATH_NONE_REPEAT | \ 678 FAST_PATH_X_UNIT_POSITIVE), \ 679 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 680 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 681 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ 682 } 683 684 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ 685 { PIXMAN_OP_ ## op, \ 686 PIXMAN_ ## s, \ 687 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ 688 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 689 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 690 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ 691 } 692 693 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ 694 { PIXMAN_OP_ ## op, \ 695 PIXMAN_ ## s, \ 696 (SCALED_NEAREST_FLAGS | \ 697 FAST_PATH_NORMAL_REPEAT | \ 698 FAST_PATH_X_UNIT_POSITIVE), \ 699 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 700 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 701 fast_composite_scaled_nearest_ ## func ## _normal ## _ ## op, \ 702 } 703 704 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ 705 { PIXMAN_OP_ ## op, \ 706 PIXMAN_ ## s, \ 707 (SCALED_NEAREST_FLAGS | \ 708 FAST_PATH_PAD_REPEAT | \ 709 FAST_PATH_X_UNIT_POSITIVE), \ 710 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 711 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 712 fast_composite_scaled_nearest_ ## func ## _pad ## _ ## op, \ 713 } 714 715 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ 716 { PIXMAN_OP_ ## op, \ 717 PIXMAN_ ## s, \ 718 (SCALED_NEAREST_FLAGS | \ 719 FAST_PATH_NONE_REPEAT | \ 720 FAST_PATH_X_UNIT_POSITIVE), \ 721 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 722 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 723 fast_composite_scaled_nearest_ ## func ## _none ## _ ## op, \ 724 } 725 726 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ 727 { PIXMAN_OP_ ## op, \ 728 PIXMAN_ ## s, \ 729 SCALED_NEAREST_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST, \ 730 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 731 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 732 fast_composite_scaled_nearest_ ## func ## _cover ## _ ## op, \ 733 } 734 735 /* Prefer the use of 'cover' variant, because it is faster */ 736 #define SIMPLE_NEAREST_FAST_PATH(op,s,d,func) \ 737 SIMPLE_NEAREST_FAST_PATH_COVER (op,s,d,func), \ 738 SIMPLE_NEAREST_FAST_PATH_NONE (op,s,d,func), \ 739 SIMPLE_NEAREST_FAST_PATH_PAD (op,s,d,func), \ 740 SIMPLE_NEAREST_FAST_PATH_NORMAL (op,s,d,func) 741 742 #define SIMPLE_NEAREST_A8_MASK_FAST_PATH(op,s,d,func) \ 743 SIMPLE_NEAREST_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ 744 SIMPLE_NEAREST_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ 745 SIMPLE_NEAREST_A8_MASK_FAST_PATH_PAD (op,s,d,func) 746 747 #define SIMPLE_NEAREST_SOLID_MASK_FAST_PATH(op,s,d,func) \ 748 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ 749 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ 750 SIMPLE_NEAREST_SOLID_MASK_FAST_PATH_PAD (op,s,d,func) 751 752 /*****************************************************************************/ 753 754 /* 755 * Identify 5 zones in each scanline for bilinear scaling. Depending on 756 * whether 2 pixels to be interpolated are fetched from the image itself, 757 * from the padding area around it or from both image and padding area. 758 */ 759 static force_inline void 760 bilinear_pad_repeat_get_scanline_bounds (int32_t source_image_width, 761 pixman_fixed_t vx, 762 pixman_fixed_t unit_x, 763 int32_t * left_pad, 764 int32_t * left_tz, 765 int32_t * width, 766 int32_t * right_tz, 767 int32_t * right_pad) 768 { 769 int width1 = *width, left_pad1, right_pad1; 770 int width2 = *width, left_pad2, right_pad2; 771 772 pad_repeat_get_scanline_bounds (source_image_width, vx, unit_x, 773 &width1, &left_pad1, &right_pad1); 774 pad_repeat_get_scanline_bounds (source_image_width, vx + pixman_fixed_1, 775 unit_x, &width2, &left_pad2, &right_pad2); 776 777 *left_pad = left_pad2; 778 *left_tz = left_pad1 - left_pad2; 779 *right_tz = right_pad2 - right_pad1; 780 *right_pad = right_pad1; 781 *width -= *left_pad + *left_tz + *right_tz + *right_pad; 782 } 783 784 /* 785 * Main loop template for single pass bilinear scaling. It needs to be 786 * provided with 'scanline_func' which should do the compositing operation. 787 * The needed function has the following prototype: 788 * 789 * scanline_func (dst_type_t * dst, 790 * const mask_type_ * mask, 791 * const src_type_t * src_top, 792 * const src_type_t * src_bottom, 793 * int32_t width, 794 * int weight_top, 795 * int weight_bottom, 796 * pixman_fixed_t vx, 797 * pixman_fixed_t unit_x, 798 * pixman_fixed_t max_vx, 799 * pixman_bool_t zero_src) 800 * 801 * Where: 802 * dst - destination scanline buffer for storing results 803 * mask - mask buffer (or single value for solid mask) 804 * src_top, src_bottom - two source scanlines 805 * width - number of pixels to process 806 * weight_top - weight of the top row for interpolation 807 * weight_bottom - weight of the bottom row for interpolation 808 * vx - initial position for fetching the first pair of 809 * pixels from the source buffer 810 * unit_x - position increment needed to move to the next pair 811 * of pixels 812 * max_vx - image size as a fixed point value, can be used for 813 * implementing NORMAL repeat (when it is supported) 814 * zero_src - boolean hint variable, which is set to TRUE when 815 * all source pixels are fetched from zero padding 816 * zone for NONE repeat 817 * 818 * Note: normally the sum of 'weight_top' and 'weight_bottom' is equal to 819 * BILINEAR_INTERPOLATION_RANGE, but sometimes it may be less than that 820 * for NONE repeat when handling fuzzy antialiased top or bottom image 821 * edges. Also both top and bottom weight variables are guaranteed to 822 * have value, which is less than BILINEAR_INTERPOLATION_RANGE. 823 * For example, the weights can fit into unsigned byte or be used 824 * with 8-bit SIMD multiplication instructions for 8-bit interpolation 825 * precision. 826 */ 827 #define FAST_BILINEAR_MAINLOOP_INT(scale_func_name, scanline_func, src_type_t, mask_type_t, \ 828 dst_type_t, repeat_mode, flags) \ 829 static void \ 830 fast_composite_scaled_bilinear ## scale_func_name (pixman_implementation_t *imp, \ 831 pixman_composite_info_t *info) \ 832 { \ 833 PIXMAN_COMPOSITE_ARGS (info); \ 834 dst_type_t *dst_line; \ 835 mask_type_t *mask_line; \ 836 src_type_t *src_first_line; \ 837 int y1, y2; \ 838 pixman_fixed_t max_vx = INT32_MAX; /* suppress uninitialized variable warning */ \ 839 pixman_vector_t v; \ 840 pixman_fixed_t vx, vy; \ 841 pixman_fixed_t unit_x, unit_y; \ 842 int32_t left_pad, left_tz, right_tz, right_pad; \ 843 \ 844 dst_type_t *dst; \ 845 mask_type_t solid_mask; \ 846 const mask_type_t *mask = &solid_mask; \ 847 int src_stride, mask_stride, dst_stride; \ 848 \ 849 int src_width; \ 850 pixman_fixed_t src_width_fixed; \ 851 int max_x; \ 852 pixman_bool_t need_src_extension; \ 853 \ 854 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, dst_type_t, dst_stride, dst_line, 1); \ 855 if (flags & FLAG_HAVE_SOLID_MASK) \ 856 { \ 857 solid_mask = _pixman_image_get_solid (imp, mask_image, dest_image->bits.format); \ 858 mask_stride = 0; \ 859 } \ 860 else if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 861 { \ 862 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, mask_type_t, \ 863 mask_stride, mask_line, 1); \ 864 } \ 865 \ 866 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be \ 867 * transformed from destination space to source space */ \ 868 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, src_type_t, src_stride, src_first_line, 1); \ 869 \ 870 /* reference point is the center of the pixel */ \ 871 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; \ 872 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; \ 873 v.vector[2] = pixman_fixed_1; \ 874 \ 875 if (!pixman_transform_point_3d (src_image->common.transform, &v)) \ 876 return; \ 877 \ 878 unit_x = src_image->common.transform->matrix[0][0]; \ 879 unit_y = src_image->common.transform->matrix[1][1]; \ 880 \ 881 v.vector[0] -= pixman_fixed_1 / 2; \ 882 v.vector[1] -= pixman_fixed_1 / 2; \ 883 \ 884 vy = v.vector[1]; \ 885 \ 886 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD || \ 887 PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ 888 { \ 889 bilinear_pad_repeat_get_scanline_bounds (src_image->bits.width, v.vector[0], unit_x, \ 890 &left_pad, &left_tz, &width, &right_tz, &right_pad); \ 891 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ 892 { \ 893 /* PAD repeat does not need special handling for 'transition zones' and */ \ 894 /* they can be combined with 'padding zones' safely */ \ 895 left_pad += left_tz; \ 896 right_pad += right_tz; \ 897 left_tz = right_tz = 0; \ 898 } \ 899 v.vector[0] += left_pad * unit_x; \ 900 } \ 901 \ 902 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 903 { \ 904 vx = v.vector[0]; \ 905 repeat (PIXMAN_REPEAT_NORMAL, &vx, pixman_int_to_fixed(src_image->bits.width)); \ 906 max_x = pixman_fixed_to_int (vx + (width - 1) * (int64_t)unit_x) + 1; \ 907 \ 908 if (src_image->bits.width < REPEAT_NORMAL_MIN_WIDTH) \ 909 { \ 910 src_width = 0; \ 911 \ 912 while (src_width < REPEAT_NORMAL_MIN_WIDTH && src_width <= max_x) \ 913 src_width += src_image->bits.width; \ 914 \ 915 need_src_extension = TRUE; \ 916 } \ 917 else \ 918 { \ 919 src_width = src_image->bits.width; \ 920 need_src_extension = FALSE; \ 921 } \ 922 \ 923 src_width_fixed = pixman_int_to_fixed (src_width); \ 924 } \ 925 \ 926 while (--height >= 0) \ 927 { \ 928 int weight1, weight2; \ 929 dst = dst_line; \ 930 dst_line += dst_stride; \ 931 vx = v.vector[0]; \ 932 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 933 { \ 934 mask = mask_line; \ 935 mask_line += mask_stride; \ 936 } \ 937 \ 938 y1 = pixman_fixed_to_int (vy); \ 939 weight2 = pixman_fixed_to_bilinear_weight (vy); \ 940 if (weight2) \ 941 { \ 942 /* both weight1 and weight2 are smaller than BILINEAR_INTERPOLATION_RANGE */ \ 943 y2 = y1 + 1; \ 944 weight1 = BILINEAR_INTERPOLATION_RANGE - weight2; \ 945 } \ 946 else \ 947 { \ 948 /* set both top and bottom row to the same scanline and tweak weights */ \ 949 y2 = y1; \ 950 weight1 = weight2 = BILINEAR_INTERPOLATION_RANGE / 2; \ 951 } \ 952 vy += unit_y; \ 953 if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_PAD) \ 954 { \ 955 src_type_t *src1, *src2; \ 956 src_type_t buf1[2]; \ 957 src_type_t buf2[2]; \ 958 repeat (PIXMAN_REPEAT_PAD, &y1, src_image->bits.height); \ 959 repeat (PIXMAN_REPEAT_PAD, &y2, src_image->bits.height); \ 960 src1 = src_first_line + src_stride * y1; \ 961 src2 = src_first_line + src_stride * y2; \ 962 \ 963 if (left_pad > 0) \ 964 { \ 965 buf1[0] = buf1[1] = src1[0]; \ 966 buf2[0] = buf2[1] = src2[0]; \ 967 scanline_func (dst, mask, \ 968 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, FALSE); \ 969 dst += left_pad; \ 970 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 971 mask += left_pad; \ 972 } \ 973 if (width > 0) \ 974 { \ 975 scanline_func (dst, mask, \ 976 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ 977 dst += width; \ 978 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 979 mask += width; \ 980 } \ 981 if (right_pad > 0) \ 982 { \ 983 buf1[0] = buf1[1] = src1[src_image->bits.width - 1]; \ 984 buf2[0] = buf2[1] = src2[src_image->bits.width - 1]; \ 985 scanline_func (dst, mask, \ 986 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, FALSE); \ 987 } \ 988 } \ 989 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NONE) \ 990 { \ 991 src_type_t *src1, *src2; \ 992 src_type_t buf1[2]; \ 993 src_type_t buf2[2]; \ 994 /* handle top/bottom zero padding by just setting weights to 0 if needed */ \ 995 if (y1 < 0) \ 996 { \ 997 weight1 = 0; \ 998 y1 = 0; \ 999 } \ 1000 if (y1 >= src_image->bits.height) \ 1001 { \ 1002 weight1 = 0; \ 1003 y1 = src_image->bits.height - 1; \ 1004 } \ 1005 if (y2 < 0) \ 1006 { \ 1007 weight2 = 0; \ 1008 y2 = 0; \ 1009 } \ 1010 if (y2 >= src_image->bits.height) \ 1011 { \ 1012 weight2 = 0; \ 1013 y2 = src_image->bits.height - 1; \ 1014 } \ 1015 src1 = src_first_line + src_stride * y1; \ 1016 src2 = src_first_line + src_stride * y2; \ 1017 \ 1018 if (left_pad > 0) \ 1019 { \ 1020 buf1[0] = buf1[1] = 0; \ 1021 buf2[0] = buf2[1] = 0; \ 1022 scanline_func (dst, mask, \ 1023 buf1, buf2, left_pad, weight1, weight2, 0, 0, 0, TRUE); \ 1024 dst += left_pad; \ 1025 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1026 mask += left_pad; \ 1027 } \ 1028 if (left_tz > 0) \ 1029 { \ 1030 buf1[0] = 0; \ 1031 buf1[1] = src1[0]; \ 1032 buf2[0] = 0; \ 1033 buf2[1] = src2[0]; \ 1034 scanline_func (dst, mask, \ 1035 buf1, buf2, left_tz, weight1, weight2, \ 1036 pixman_fixed_frac (vx), unit_x, 0, FALSE); \ 1037 dst += left_tz; \ 1038 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1039 mask += left_tz; \ 1040 vx += left_tz * unit_x; \ 1041 } \ 1042 if (width > 0) \ 1043 { \ 1044 scanline_func (dst, mask, \ 1045 src1, src2, width, weight1, weight2, vx, unit_x, 0, FALSE); \ 1046 dst += width; \ 1047 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1048 mask += width; \ 1049 vx += width * unit_x; \ 1050 } \ 1051 if (right_tz > 0) \ 1052 { \ 1053 buf1[0] = src1[src_image->bits.width - 1]; \ 1054 buf1[1] = 0; \ 1055 buf2[0] = src2[src_image->bits.width - 1]; \ 1056 buf2[1] = 0; \ 1057 scanline_func (dst, mask, \ 1058 buf1, buf2, right_tz, weight1, weight2, \ 1059 pixman_fixed_frac (vx), unit_x, 0, FALSE); \ 1060 dst += right_tz; \ 1061 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1062 mask += right_tz; \ 1063 } \ 1064 if (right_pad > 0) \ 1065 { \ 1066 buf1[0] = buf1[1] = 0; \ 1067 buf2[0] = buf2[1] = 0; \ 1068 scanline_func (dst, mask, \ 1069 buf1, buf2, right_pad, weight1, weight2, 0, 0, 0, TRUE); \ 1070 } \ 1071 } \ 1072 else if (PIXMAN_REPEAT_ ## repeat_mode == PIXMAN_REPEAT_NORMAL) \ 1073 { \ 1074 int32_t num_pixels; \ 1075 int32_t width_remain; \ 1076 src_type_t * src_line_top; \ 1077 src_type_t * src_line_bottom; \ 1078 src_type_t buf1[2]; \ 1079 src_type_t buf2[2]; \ 1080 src_type_t extended_src_line0[REPEAT_NORMAL_MIN_WIDTH*2]; \ 1081 src_type_t extended_src_line1[REPEAT_NORMAL_MIN_WIDTH*2]; \ 1082 int i, j; \ 1083 \ 1084 repeat (PIXMAN_REPEAT_NORMAL, &y1, src_image->bits.height); \ 1085 repeat (PIXMAN_REPEAT_NORMAL, &y2, src_image->bits.height); \ 1086 src_line_top = src_first_line + src_stride * y1; \ 1087 src_line_bottom = src_first_line + src_stride * y2; \ 1088 \ 1089 if (need_src_extension) \ 1090 { \ 1091 for (i=0; i<src_width;) \ 1092 { \ 1093 for (j=0; j<src_image->bits.width; j++, i++) \ 1094 { \ 1095 extended_src_line0[i] = src_line_top[j]; \ 1096 extended_src_line1[i] = src_line_bottom[j]; \ 1097 } \ 1098 } \ 1099 \ 1100 src_line_top = &extended_src_line0[0]; \ 1101 src_line_bottom = &extended_src_line1[0]; \ 1102 } \ 1103 \ 1104 /* Top & Bottom wrap around buffer */ \ 1105 buf1[0] = src_line_top[src_width - 1]; \ 1106 buf1[1] = src_line_top[0]; \ 1107 buf2[0] = src_line_bottom[src_width - 1]; \ 1108 buf2[1] = src_line_bottom[0]; \ 1109 \ 1110 width_remain = width; \ 1111 \ 1112 while (width_remain > 0) \ 1113 { \ 1114 /* We use src_width_fixed because it can make vx in original source range */ \ 1115 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ 1116 \ 1117 /* Wrap around part */ \ 1118 if (pixman_fixed_to_int (vx) == src_width - 1) \ 1119 { \ 1120 /* for positive unit_x \ 1121 * num_pixels = max(n) + 1, where vx + n*unit_x < src_width_fixed \ 1122 * \ 1123 * vx is in range [0, src_width_fixed - pixman_fixed_e] \ 1124 * So we are safe from overflow. \ 1125 */ \ 1126 num_pixels = ((src_width_fixed - vx - pixman_fixed_e) / unit_x) + 1; \ 1127 \ 1128 if (num_pixels > width_remain) \ 1129 num_pixels = width_remain; \ 1130 \ 1131 scanline_func (dst, mask, buf1, buf2, num_pixels, \ 1132 weight1, weight2, pixman_fixed_frac(vx), \ 1133 unit_x, src_width_fixed, FALSE); \ 1134 \ 1135 width_remain -= num_pixels; \ 1136 vx += num_pixels * unit_x; \ 1137 dst += num_pixels; \ 1138 \ 1139 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1140 mask += num_pixels; \ 1141 \ 1142 repeat (PIXMAN_REPEAT_NORMAL, &vx, src_width_fixed); \ 1143 } \ 1144 \ 1145 /* Normal scanline composite */ \ 1146 if (pixman_fixed_to_int (vx) != src_width - 1 && width_remain > 0) \ 1147 { \ 1148 /* for positive unit_x \ 1149 * num_pixels = max(n) + 1, where vx + n*unit_x < (src_width_fixed - 1) \ 1150 * \ 1151 * vx is in range [0, src_width_fixed - pixman_fixed_e] \ 1152 * So we are safe from overflow here. \ 1153 */ \ 1154 num_pixels = ((src_width_fixed - pixman_fixed_1 - vx - pixman_fixed_e) \ 1155 / unit_x) + 1; \ 1156 \ 1157 if (num_pixels > width_remain) \ 1158 num_pixels = width_remain; \ 1159 \ 1160 scanline_func (dst, mask, src_line_top, src_line_bottom, num_pixels, \ 1161 weight1, weight2, vx, unit_x, src_width_fixed, FALSE); \ 1162 \ 1163 width_remain -= num_pixels; \ 1164 vx += num_pixels * unit_x; \ 1165 dst += num_pixels; \ 1166 \ 1167 if (flags & FLAG_HAVE_NON_SOLID_MASK) \ 1168 mask += num_pixels; \ 1169 } \ 1170 } \ 1171 } \ 1172 else \ 1173 { \ 1174 scanline_func (dst, mask, src_first_line + src_stride * y1, \ 1175 src_first_line + src_stride * y2, width, \ 1176 weight1, weight2, vx, unit_x, max_vx, FALSE); \ 1177 } \ 1178 } \ 1179 } 1180 1181 /* A workaround for old sun studio, see: https://bugs.freedesktop.org/show_bug.cgi?id=32764 */ 1182 #define FAST_BILINEAR_MAINLOOP_COMMON(scale_func_name, scanline_func, src_type_t, mask_type_t, \ 1183 dst_type_t, repeat_mode, flags) \ 1184 FAST_BILINEAR_MAINLOOP_INT(_ ## scale_func_name, scanline_func, src_type_t, mask_type_t,\ 1185 dst_type_t, repeat_mode, flags) 1186 1187 #define SCALED_BILINEAR_FLAGS \ 1188 (FAST_PATH_SCALE_TRANSFORM | \ 1189 FAST_PATH_NO_ALPHA_MAP | \ 1190 FAST_PATH_BILINEAR_FILTER | \ 1191 FAST_PATH_NO_ACCESSORS | \ 1192 FAST_PATH_NARROW_FORMAT) 1193 1194 #define SIMPLE_BILINEAR_FAST_PATH_PAD(op,s,d,func) \ 1195 { PIXMAN_OP_ ## op, \ 1196 PIXMAN_ ## s, \ 1197 (SCALED_BILINEAR_FLAGS | \ 1198 FAST_PATH_PAD_REPEAT | \ 1199 FAST_PATH_X_UNIT_POSITIVE), \ 1200 PIXMAN_null, 0, \ 1201 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1202 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ 1203 } 1204 1205 #define SIMPLE_BILINEAR_FAST_PATH_NONE(op,s,d,func) \ 1206 { PIXMAN_OP_ ## op, \ 1207 PIXMAN_ ## s, \ 1208 (SCALED_BILINEAR_FLAGS | \ 1209 FAST_PATH_NONE_REPEAT | \ 1210 FAST_PATH_X_UNIT_POSITIVE), \ 1211 PIXMAN_null, 0, \ 1212 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1213 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ 1214 } 1215 1216 #define SIMPLE_BILINEAR_FAST_PATH_COVER(op,s,d,func) \ 1217 { PIXMAN_OP_ ## op, \ 1218 PIXMAN_ ## s, \ 1219 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ 1220 PIXMAN_null, 0, \ 1221 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1222 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ 1223 } 1224 1225 #define SIMPLE_BILINEAR_FAST_PATH_NORMAL(op,s,d,func) \ 1226 { PIXMAN_OP_ ## op, \ 1227 PIXMAN_ ## s, \ 1228 (SCALED_BILINEAR_FLAGS | \ 1229 FAST_PATH_NORMAL_REPEAT | \ 1230 FAST_PATH_X_UNIT_POSITIVE), \ 1231 PIXMAN_null, 0, \ 1232 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1233 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ 1234 } 1235 1236 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD(op,s,d,func) \ 1237 { PIXMAN_OP_ ## op, \ 1238 PIXMAN_ ## s, \ 1239 (SCALED_BILINEAR_FLAGS | \ 1240 FAST_PATH_PAD_REPEAT | \ 1241 FAST_PATH_X_UNIT_POSITIVE), \ 1242 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 1243 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1244 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ 1245 } 1246 1247 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE(op,s,d,func) \ 1248 { PIXMAN_OP_ ## op, \ 1249 PIXMAN_ ## s, \ 1250 (SCALED_BILINEAR_FLAGS | \ 1251 FAST_PATH_NONE_REPEAT | \ 1252 FAST_PATH_X_UNIT_POSITIVE), \ 1253 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 1254 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1255 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ 1256 } 1257 1258 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER(op,s,d,func) \ 1259 { PIXMAN_OP_ ## op, \ 1260 PIXMAN_ ## s, \ 1261 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ 1262 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 1263 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1264 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ 1265 } 1266 1267 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL(op,s,d,func) \ 1268 { PIXMAN_OP_ ## op, \ 1269 PIXMAN_ ## s, \ 1270 (SCALED_BILINEAR_FLAGS | \ 1271 FAST_PATH_NORMAL_REPEAT | \ 1272 FAST_PATH_X_UNIT_POSITIVE), \ 1273 PIXMAN_a8, MASK_FLAGS (a8, FAST_PATH_UNIFIED_ALPHA), \ 1274 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1275 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ 1276 } 1277 1278 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD(op,s,d,func) \ 1279 { PIXMAN_OP_ ## op, \ 1280 PIXMAN_ ## s, \ 1281 (SCALED_BILINEAR_FLAGS | \ 1282 FAST_PATH_PAD_REPEAT | \ 1283 FAST_PATH_X_UNIT_POSITIVE), \ 1284 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 1285 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1286 fast_composite_scaled_bilinear_ ## func ## _pad ## _ ## op, \ 1287 } 1288 1289 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE(op,s,d,func) \ 1290 { PIXMAN_OP_ ## op, \ 1291 PIXMAN_ ## s, \ 1292 (SCALED_BILINEAR_FLAGS | \ 1293 FAST_PATH_NONE_REPEAT | \ 1294 FAST_PATH_X_UNIT_POSITIVE), \ 1295 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 1296 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1297 fast_composite_scaled_bilinear_ ## func ## _none ## _ ## op, \ 1298 } 1299 1300 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER(op,s,d,func) \ 1301 { PIXMAN_OP_ ## op, \ 1302 PIXMAN_ ## s, \ 1303 SCALED_BILINEAR_FLAGS | FAST_PATH_SAMPLES_COVER_CLIP_BILINEAR, \ 1304 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 1305 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1306 fast_composite_scaled_bilinear_ ## func ## _cover ## _ ## op, \ 1307 } 1308 1309 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL(op,s,d,func) \ 1310 { PIXMAN_OP_ ## op, \ 1311 PIXMAN_ ## s, \ 1312 (SCALED_BILINEAR_FLAGS | \ 1313 FAST_PATH_NORMAL_REPEAT | \ 1314 FAST_PATH_X_UNIT_POSITIVE), \ 1315 PIXMAN_solid, MASK_FLAGS (solid, FAST_PATH_UNIFIED_ALPHA), \ 1316 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1317 fast_composite_scaled_bilinear_ ## func ## _normal ## _ ## op, \ 1318 } 1319 1320 /* Prefer the use of 'cover' variant, because it is faster */ 1321 #define SIMPLE_BILINEAR_FAST_PATH(op,s,d,func) \ 1322 SIMPLE_BILINEAR_FAST_PATH_COVER (op,s,d,func), \ 1323 SIMPLE_BILINEAR_FAST_PATH_NONE (op,s,d,func), \ 1324 SIMPLE_BILINEAR_FAST_PATH_PAD (op,s,d,func), \ 1325 SIMPLE_BILINEAR_FAST_PATH_NORMAL (op,s,d,func) 1326 1327 #define SIMPLE_BILINEAR_A8_MASK_FAST_PATH(op,s,d,func) \ 1328 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_COVER (op,s,d,func), \ 1329 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NONE (op,s,d,func), \ 1330 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_PAD (op,s,d,func), \ 1331 SIMPLE_BILINEAR_A8_MASK_FAST_PATH_NORMAL (op,s,d,func) 1332 1333 #define SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH(op,s,d,func) \ 1334 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_COVER (op,s,d,func), \ 1335 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NONE (op,s,d,func), \ 1336 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_PAD (op,s,d,func), \ 1337 SIMPLE_BILINEAR_SOLID_MASK_FAST_PATH_NORMAL (op,s,d,func) 1338 1339 #endif 1340