1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */ 2 /* 3 * Copyright 2000 SuSE, Inc. 4 * Copyright 2007 Red Hat, Inc. 5 * 6 * Permission to use, copy, modify, distribute, and sell this software and its 7 * documentation for any purpose is hereby granted without fee, provided that 8 * the above copyright notice appear in all copies and that both that 9 * copyright notice and this permission notice appear in supporting 10 * documentation, and that the name of SuSE not be used in advertising or 11 * publicity pertaining to distribution of the software without specific, 12 * written prior permission. SuSE makes no representations about the 13 * suitability of this software for any purpose. It is provided "as is" 14 * without express or implied warranty. 15 * 16 * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE 18 * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 20 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 21 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 22 * 23 * Author: Keith Packard, SuSE, Inc. 24 */ 25 26 #ifdef HAVE_CONFIG_H 27 #include <config.h> 28 #endif 29 #include <string.h> 30 #include <stdlib.h> 31 #include "pixman-private.h" 32 #include "pixman-combine32.h" 33 #include "pixman-inlines.h" 34 35 static force_inline uint32_t 36 fetch_24 (uint8_t *a) 37 { 38 if (((uintptr_t)a) & 1) 39 { 40 #ifdef WORDS_BIGENDIAN 41 return (*a << 16) | (*(uint16_t *)(a + 1)); 42 #else 43 return *a | (*(uint16_t *)(a + 1) << 8); 44 #endif 45 } 46 else 47 { 48 #ifdef WORDS_BIGENDIAN 49 return (*(uint16_t *)a << 8) | *(a + 2); 50 #else 51 return *(uint16_t *)a | (*(a + 2) << 16); 52 #endif 53 } 54 } 55 56 static force_inline void 57 store_24 (uint8_t *a, 58 uint32_t v) 59 { 60 if (((uintptr_t)a) & 1) 61 { 62 #ifdef WORDS_BIGENDIAN 63 *a = (uint8_t) (v >> 16); 64 *(uint16_t *)(a + 1) = (uint16_t) (v); 65 #else 66 *a = (uint8_t) (v); 67 *(uint16_t *)(a + 1) = (uint16_t) (v >> 8); 68 #endif 69 } 70 else 71 { 72 #ifdef WORDS_BIGENDIAN 73 *(uint16_t *)a = (uint16_t)(v >> 8); 74 *(a + 2) = (uint8_t)v; 75 #else 76 *(uint16_t *)a = (uint16_t)v; 77 *(a + 2) = (uint8_t)(v >> 16); 78 #endif 79 } 80 } 81 82 static force_inline uint32_t 83 over (uint32_t src, 84 uint32_t dest) 85 { 86 uint32_t a = ~src >> 24; 87 88 UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src); 89 90 return dest; 91 } 92 93 static force_inline uint32_t 94 in (uint32_t x, 95 uint8_t y) 96 { 97 uint16_t a = y; 98 99 UN8x4_MUL_UN8 (x, a); 100 101 return x; 102 } 103 104 /* 105 * Naming convention: 106 * 107 * op_src_mask_dest 108 */ 109 static void 110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp, 111 pixman_composite_info_t *info) 112 { 113 PIXMAN_COMPOSITE_ARGS (info); 114 uint32_t *src, *src_line; 115 uint32_t *dst, *dst_line; 116 uint8_t *mask, *mask_line; 117 int src_stride, mask_stride, dst_stride; 118 uint8_t m; 119 uint32_t s, d; 120 int32_t w; 121 122 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 123 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 124 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 125 126 while (height--) 127 { 128 src = src_line; 129 src_line += src_stride; 130 dst = dst_line; 131 dst_line += dst_stride; 132 mask = mask_line; 133 mask_line += mask_stride; 134 135 w = width; 136 while (w--) 137 { 138 m = *mask++; 139 if (m) 140 { 141 s = *src | 0xff000000; 142 143 if (m == 0xff) 144 { 145 *dst = s; 146 } 147 else 148 { 149 d = in (s, m); 150 *dst = over (d, *dst); 151 } 152 } 153 src++; 154 dst++; 155 } 156 } 157 } 158 159 static void 160 fast_composite_in_n_8_8 (pixman_implementation_t *imp, 161 pixman_composite_info_t *info) 162 { 163 PIXMAN_COMPOSITE_ARGS (info); 164 uint32_t src, srca; 165 uint8_t *dst_line, *dst; 166 uint8_t *mask_line, *mask, m; 167 int dst_stride, mask_stride; 168 int32_t w; 169 uint16_t t; 170 171 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 172 173 srca = src >> 24; 174 175 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); 176 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 177 178 if (srca == 0xff) 179 { 180 while (height--) 181 { 182 dst = dst_line; 183 dst_line += dst_stride; 184 mask = mask_line; 185 mask_line += mask_stride; 186 w = width; 187 188 while (w--) 189 { 190 m = *mask++; 191 192 if (m == 0) 193 *dst = 0; 194 else if (m != 0xff) 195 *dst = MUL_UN8 (m, *dst, t); 196 197 dst++; 198 } 199 } 200 } 201 else 202 { 203 while (height--) 204 { 205 dst = dst_line; 206 dst_line += dst_stride; 207 mask = mask_line; 208 mask_line += mask_stride; 209 w = width; 210 211 while (w--) 212 { 213 m = *mask++; 214 m = MUL_UN8 (m, srca, t); 215 216 if (m == 0) 217 *dst = 0; 218 else if (m != 0xff) 219 *dst = MUL_UN8 (m, *dst, t); 220 221 dst++; 222 } 223 } 224 } 225 } 226 227 static void 228 fast_composite_in_8_8 (pixman_implementation_t *imp, 229 pixman_composite_info_t *info) 230 { 231 PIXMAN_COMPOSITE_ARGS (info); 232 uint8_t *dst_line, *dst; 233 uint8_t *src_line, *src; 234 int dst_stride, src_stride; 235 int32_t w; 236 uint8_t s; 237 uint16_t t; 238 239 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); 240 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); 241 242 while (height--) 243 { 244 dst = dst_line; 245 dst_line += dst_stride; 246 src = src_line; 247 src_line += src_stride; 248 w = width; 249 250 while (w--) 251 { 252 s = *src++; 253 254 if (s == 0) 255 *dst = 0; 256 else if (s != 0xff) 257 *dst = MUL_UN8 (s, *dst, t); 258 259 dst++; 260 } 261 } 262 } 263 264 static void 265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp, 266 pixman_composite_info_t *info) 267 { 268 PIXMAN_COMPOSITE_ARGS (info); 269 uint32_t src, srca; 270 uint32_t *dst_line, *dst, d; 271 uint8_t *mask_line, *mask, m; 272 int dst_stride, mask_stride; 273 int32_t w; 274 275 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 276 277 srca = src >> 24; 278 if (src == 0) 279 return; 280 281 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 282 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 283 284 while (height--) 285 { 286 dst = dst_line; 287 dst_line += dst_stride; 288 mask = mask_line; 289 mask_line += mask_stride; 290 w = width; 291 292 while (w--) 293 { 294 m = *mask++; 295 if (m == 0xff) 296 { 297 if (srca == 0xff) 298 *dst = src; 299 else 300 *dst = over (src, *dst); 301 } 302 else if (m) 303 { 304 d = in (src, m); 305 *dst = over (d, *dst); 306 } 307 dst++; 308 } 309 } 310 } 311 312 static void 313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp, 314 pixman_composite_info_t *info) 315 { 316 PIXMAN_COMPOSITE_ARGS (info); 317 uint32_t src, s; 318 uint32_t *dst_line, *dst, d; 319 uint32_t *mask_line, *mask, ma; 320 int dst_stride, mask_stride; 321 int32_t w; 322 323 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 324 325 if (src == 0) 326 return; 327 328 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 329 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); 330 331 while (height--) 332 { 333 dst = dst_line; 334 dst_line += dst_stride; 335 mask = mask_line; 336 mask_line += mask_stride; 337 w = width; 338 339 while (w--) 340 { 341 ma = *mask++; 342 343 if (ma) 344 { 345 d = *dst; 346 s = src; 347 348 UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d); 349 350 *dst = s; 351 } 352 353 dst++; 354 } 355 } 356 } 357 358 static void 359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp, 360 pixman_composite_info_t *info) 361 { 362 PIXMAN_COMPOSITE_ARGS (info); 363 uint32_t src, srca, s; 364 uint32_t *dst_line, *dst, d; 365 uint32_t *mask_line, *mask, ma; 366 int dst_stride, mask_stride; 367 int32_t w; 368 369 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 370 371 srca = src >> 24; 372 if (src == 0) 373 return; 374 375 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 376 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); 377 378 while (height--) 379 { 380 dst = dst_line; 381 dst_line += dst_stride; 382 mask = mask_line; 383 mask_line += mask_stride; 384 w = width; 385 386 while (w--) 387 { 388 ma = *mask++; 389 if (ma == 0xffffffff) 390 { 391 if (srca == 0xff) 392 *dst = src; 393 else 394 *dst = over (src, *dst); 395 } 396 else if (ma) 397 { 398 d = *dst; 399 s = src; 400 401 UN8x4_MUL_UN8x4 (s, ma); 402 UN8x4_MUL_UN8 (ma, srca); 403 ma = ~ma; 404 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); 405 406 *dst = d; 407 } 408 409 dst++; 410 } 411 } 412 } 413 414 static void 415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp, 416 pixman_composite_info_t *info) 417 { 418 PIXMAN_COMPOSITE_ARGS (info); 419 uint32_t src, srca; 420 uint8_t *dst_line, *dst; 421 uint32_t d; 422 uint8_t *mask_line, *mask, m; 423 int dst_stride, mask_stride; 424 int32_t w; 425 426 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 427 428 srca = src >> 24; 429 if (src == 0) 430 return; 431 432 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); 433 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 434 435 while (height--) 436 { 437 dst = dst_line; 438 dst_line += dst_stride; 439 mask = mask_line; 440 mask_line += mask_stride; 441 w = width; 442 443 while (w--) 444 { 445 m = *mask++; 446 if (m == 0xff) 447 { 448 if (srca == 0xff) 449 { 450 d = src; 451 } 452 else 453 { 454 d = fetch_24 (dst); 455 d = over (src, d); 456 } 457 store_24 (dst, d); 458 } 459 else if (m) 460 { 461 d = over (in (src, m), fetch_24 (dst)); 462 store_24 (dst, d); 463 } 464 dst += 3; 465 } 466 } 467 } 468 469 static void 470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp, 471 pixman_composite_info_t *info) 472 { 473 PIXMAN_COMPOSITE_ARGS (info); 474 uint32_t src, srca; 475 uint16_t *dst_line, *dst; 476 uint32_t d; 477 uint8_t *mask_line, *mask, m; 478 int dst_stride, mask_stride; 479 int32_t w; 480 481 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 482 483 srca = src >> 24; 484 if (src == 0) 485 return; 486 487 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); 488 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 489 490 while (height--) 491 { 492 dst = dst_line; 493 dst_line += dst_stride; 494 mask = mask_line; 495 mask_line += mask_stride; 496 w = width; 497 498 while (w--) 499 { 500 m = *mask++; 501 if (m == 0xff) 502 { 503 if (srca == 0xff) 504 { 505 d = src; 506 } 507 else 508 { 509 d = *dst; 510 d = over (src, convert_0565_to_0888 (d)); 511 } 512 *dst = convert_8888_to_0565 (d); 513 } 514 else if (m) 515 { 516 d = *dst; 517 d = over (in (src, m), convert_0565_to_0888 (d)); 518 *dst = convert_8888_to_0565 (d); 519 } 520 dst++; 521 } 522 } 523 } 524 525 static void 526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp, 527 pixman_composite_info_t *info) 528 { 529 PIXMAN_COMPOSITE_ARGS (info); 530 uint32_t src, srca, s; 531 uint16_t src16; 532 uint16_t *dst_line, *dst; 533 uint32_t d; 534 uint32_t *mask_line, *mask, ma; 535 int dst_stride, mask_stride; 536 int32_t w; 537 538 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 539 540 srca = src >> 24; 541 if (src == 0) 542 return; 543 544 src16 = convert_8888_to_0565 (src); 545 546 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); 547 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1); 548 549 while (height--) 550 { 551 dst = dst_line; 552 dst_line += dst_stride; 553 mask = mask_line; 554 mask_line += mask_stride; 555 w = width; 556 557 while (w--) 558 { 559 ma = *mask++; 560 if (ma == 0xffffffff) 561 { 562 if (srca == 0xff) 563 { 564 *dst = src16; 565 } 566 else 567 { 568 d = *dst; 569 d = over (src, convert_0565_to_0888 (d)); 570 *dst = convert_8888_to_0565 (d); 571 } 572 } 573 else if (ma) 574 { 575 d = *dst; 576 d = convert_0565_to_0888 (d); 577 578 s = src; 579 580 UN8x4_MUL_UN8x4 (s, ma); 581 UN8x4_MUL_UN8 (ma, srca); 582 ma = ~ma; 583 UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s); 584 585 *dst = convert_8888_to_0565 (d); 586 } 587 dst++; 588 } 589 } 590 } 591 592 static void 593 fast_composite_over_8888_8888 (pixman_implementation_t *imp, 594 pixman_composite_info_t *info) 595 { 596 PIXMAN_COMPOSITE_ARGS (info); 597 uint32_t *dst_line, *dst; 598 uint32_t *src_line, *src, s; 599 int dst_stride, src_stride; 600 uint8_t a; 601 int32_t w; 602 603 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 604 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 605 606 while (height--) 607 { 608 dst = dst_line; 609 dst_line += dst_stride; 610 src = src_line; 611 src_line += src_stride; 612 w = width; 613 614 while (w--) 615 { 616 s = *src++; 617 a = s >> 24; 618 if (a == 0xff) 619 *dst = s; 620 else if (s) 621 *dst = over (s, *dst); 622 dst++; 623 } 624 } 625 } 626 627 static void 628 fast_composite_src_x888_8888 (pixman_implementation_t *imp, 629 pixman_composite_info_t *info) 630 { 631 PIXMAN_COMPOSITE_ARGS (info); 632 uint32_t *dst_line, *dst; 633 uint32_t *src_line, *src; 634 int dst_stride, src_stride; 635 int32_t w; 636 637 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 638 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 639 640 while (height--) 641 { 642 dst = dst_line; 643 dst_line += dst_stride; 644 src = src_line; 645 src_line += src_stride; 646 w = width; 647 648 while (w--) 649 *dst++ = (*src++) | 0xff000000; 650 } 651 } 652 653 #if 0 654 static void 655 fast_composite_over_8888_0888 (pixman_implementation_t *imp, 656 pixman_composite_info_t *info) 657 { 658 PIXMAN_COMPOSITE_ARGS (info); 659 uint8_t *dst_line, *dst; 660 uint32_t d; 661 uint32_t *src_line, *src, s; 662 uint8_t a; 663 int dst_stride, src_stride; 664 int32_t w; 665 666 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3); 667 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 668 669 while (height--) 670 { 671 dst = dst_line; 672 dst_line += dst_stride; 673 src = src_line; 674 src_line += src_stride; 675 w = width; 676 677 while (w--) 678 { 679 s = *src++; 680 a = s >> 24; 681 if (a) 682 { 683 if (a == 0xff) 684 d = s; 685 else 686 d = over (s, fetch_24 (dst)); 687 688 store_24 (dst, d); 689 } 690 dst += 3; 691 } 692 } 693 } 694 #endif 695 696 static void 697 fast_composite_over_8888_0565 (pixman_implementation_t *imp, 698 pixman_composite_info_t *info) 699 { 700 PIXMAN_COMPOSITE_ARGS (info); 701 uint16_t *dst_line, *dst; 702 uint32_t d; 703 uint32_t *src_line, *src, s; 704 uint8_t a; 705 int dst_stride, src_stride; 706 int32_t w; 707 708 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 709 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); 710 711 while (height--) 712 { 713 dst = dst_line; 714 dst_line += dst_stride; 715 src = src_line; 716 src_line += src_stride; 717 w = width; 718 719 while (w--) 720 { 721 s = *src++; 722 a = s >> 24; 723 if (s) 724 { 725 if (a == 0xff) 726 { 727 d = s; 728 } 729 else 730 { 731 d = *dst; 732 d = over (s, convert_0565_to_0888 (d)); 733 } 734 *dst = convert_8888_to_0565 (d); 735 } 736 dst++; 737 } 738 } 739 } 740 741 static void 742 fast_composite_add_8_8 (pixman_implementation_t *imp, 743 pixman_composite_info_t *info) 744 { 745 PIXMAN_COMPOSITE_ARGS (info); 746 uint8_t *dst_line, *dst; 747 uint8_t *src_line, *src; 748 int dst_stride, src_stride; 749 int32_t w; 750 uint8_t s, d; 751 uint16_t t; 752 753 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1); 754 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); 755 756 while (height--) 757 { 758 dst = dst_line; 759 dst_line += dst_stride; 760 src = src_line; 761 src_line += src_stride; 762 w = width; 763 764 while (w--) 765 { 766 s = *src++; 767 if (s) 768 { 769 if (s != 0xff) 770 { 771 d = *dst; 772 t = d + s; 773 s = t | (0 - (t >> 8)); 774 } 775 *dst = s; 776 } 777 dst++; 778 } 779 } 780 } 781 782 static void 783 fast_composite_add_0565_0565 (pixman_implementation_t *imp, 784 pixman_composite_info_t *info) 785 { 786 PIXMAN_COMPOSITE_ARGS (info); 787 uint16_t *dst_line, *dst; 788 uint32_t d; 789 uint16_t *src_line, *src; 790 uint32_t s; 791 int dst_stride, src_stride; 792 int32_t w; 793 794 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1); 795 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1); 796 797 while (height--) 798 { 799 dst = dst_line; 800 dst_line += dst_stride; 801 src = src_line; 802 src_line += src_stride; 803 w = width; 804 805 while (w--) 806 { 807 s = *src++; 808 if (s) 809 { 810 d = *dst; 811 s = convert_0565_to_8888 (s); 812 if (d) 813 { 814 d = convert_0565_to_8888 (d); 815 UN8x4_ADD_UN8x4 (s, d); 816 } 817 *dst = convert_8888_to_0565 (s); 818 } 819 dst++; 820 } 821 } 822 } 823 824 static void 825 fast_composite_add_8888_8888 (pixman_implementation_t *imp, 826 pixman_composite_info_t *info) 827 { 828 PIXMAN_COMPOSITE_ARGS (info); 829 uint32_t *dst_line, *dst; 830 uint32_t *src_line, *src; 831 int dst_stride, src_stride; 832 int32_t w; 833 uint32_t s, d; 834 835 PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1); 836 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 837 838 while (height--) 839 { 840 dst = dst_line; 841 dst_line += dst_stride; 842 src = src_line; 843 src_line += src_stride; 844 w = width; 845 846 while (w--) 847 { 848 s = *src++; 849 if (s) 850 { 851 if (s != 0xffffffff) 852 { 853 d = *dst; 854 if (d) 855 UN8x4_ADD_UN8x4 (s, d); 856 } 857 *dst = s; 858 } 859 dst++; 860 } 861 } 862 } 863 864 static void 865 fast_composite_add_n_8_8 (pixman_implementation_t *imp, 866 pixman_composite_info_t *info) 867 { 868 PIXMAN_COMPOSITE_ARGS (info); 869 uint8_t *dst_line, *dst; 870 uint8_t *mask_line, *mask; 871 int dst_stride, mask_stride; 872 int32_t w; 873 uint32_t src; 874 uint8_t sa; 875 876 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1); 877 PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1); 878 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 879 sa = (src >> 24); 880 881 while (height--) 882 { 883 dst = dst_line; 884 dst_line += dst_stride; 885 mask = mask_line; 886 mask_line += mask_stride; 887 w = width; 888 889 while (w--) 890 { 891 uint16_t tmp; 892 uint16_t a; 893 uint32_t m, d; 894 uint32_t r; 895 896 a = *mask++; 897 d = *dst; 898 899 m = MUL_UN8 (sa, a, tmp); 900 r = ADD_UN8 (m, d, tmp); 901 902 *dst++ = r; 903 } 904 } 905 } 906 907 #ifdef WORDS_BIGENDIAN 908 #define CREATE_BITMASK(n) (0x80000000 >> (n)) 909 #define UPDATE_BITMASK(n) ((n) >> 1) 910 #else 911 #define CREATE_BITMASK(n) (1 << (n)) 912 #define UPDATE_BITMASK(n) ((n) << 1) 913 #endif 914 915 #define TEST_BIT(p, n) \ 916 (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31)) 917 #define SET_BIT(p, n) \ 918 do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0); 919 920 static void 921 fast_composite_add_1_1 (pixman_implementation_t *imp, 922 pixman_composite_info_t *info) 923 { 924 PIXMAN_COMPOSITE_ARGS (info); 925 uint32_t *dst_line, *dst; 926 uint32_t *src_line, *src; 927 int dst_stride, src_stride; 928 int32_t w; 929 930 PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t, 931 src_stride, src_line, 1); 932 PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t, 933 dst_stride, dst_line, 1); 934 935 while (height--) 936 { 937 dst = dst_line; 938 dst_line += dst_stride; 939 src = src_line; 940 src_line += src_stride; 941 w = width; 942 943 while (w--) 944 { 945 /* 946 * TODO: improve performance by processing uint32_t data instead 947 * of individual bits 948 */ 949 if (TEST_BIT (src, src_x + w)) 950 SET_BIT (dst, dest_x + w); 951 } 952 } 953 } 954 955 static void 956 fast_composite_over_n_1_8888 (pixman_implementation_t *imp, 957 pixman_composite_info_t *info) 958 { 959 PIXMAN_COMPOSITE_ARGS (info); 960 uint32_t src, srca; 961 uint32_t *dst, *dst_line; 962 uint32_t *mask, *mask_line; 963 int mask_stride, dst_stride; 964 uint32_t bitcache, bitmask; 965 int32_t w; 966 967 if (width <= 0) 968 return; 969 970 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 971 srca = src >> 24; 972 if (src == 0) 973 return; 974 975 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, 976 dst_stride, dst_line, 1); 977 PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, 978 mask_stride, mask_line, 1); 979 mask_line += mask_x >> 5; 980 981 if (srca == 0xff) 982 { 983 while (height--) 984 { 985 dst = dst_line; 986 dst_line += dst_stride; 987 mask = mask_line; 988 mask_line += mask_stride; 989 w = width; 990 991 bitcache = *mask++; 992 bitmask = CREATE_BITMASK (mask_x & 31); 993 994 while (w--) 995 { 996 if (bitmask == 0) 997 { 998 bitcache = *mask++; 999 bitmask = CREATE_BITMASK (0); 1000 } 1001 if (bitcache & bitmask) 1002 *dst = src; 1003 bitmask = UPDATE_BITMASK (bitmask); 1004 dst++; 1005 } 1006 } 1007 } 1008 else 1009 { 1010 while (height--) 1011 { 1012 dst = dst_line; 1013 dst_line += dst_stride; 1014 mask = mask_line; 1015 mask_line += mask_stride; 1016 w = width; 1017 1018 bitcache = *mask++; 1019 bitmask = CREATE_BITMASK (mask_x & 31); 1020 1021 while (w--) 1022 { 1023 if (bitmask == 0) 1024 { 1025 bitcache = *mask++; 1026 bitmask = CREATE_BITMASK (0); 1027 } 1028 if (bitcache & bitmask) 1029 *dst = over (src, *dst); 1030 bitmask = UPDATE_BITMASK (bitmask); 1031 dst++; 1032 } 1033 } 1034 } 1035 } 1036 1037 static void 1038 fast_composite_over_n_1_0565 (pixman_implementation_t *imp, 1039 pixman_composite_info_t *info) 1040 { 1041 PIXMAN_COMPOSITE_ARGS (info); 1042 uint32_t src, srca; 1043 uint16_t *dst, *dst_line; 1044 uint32_t *mask, *mask_line; 1045 int mask_stride, dst_stride; 1046 uint32_t bitcache, bitmask; 1047 int32_t w; 1048 uint32_t d; 1049 uint16_t src565; 1050 1051 if (width <= 0) 1052 return; 1053 1054 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1055 srca = src >> 24; 1056 if (src == 0) 1057 return; 1058 1059 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, 1060 dst_stride, dst_line, 1); 1061 PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t, 1062 mask_stride, mask_line, 1); 1063 mask_line += mask_x >> 5; 1064 1065 if (srca == 0xff) 1066 { 1067 src565 = convert_8888_to_0565 (src); 1068 while (height--) 1069 { 1070 dst = dst_line; 1071 dst_line += dst_stride; 1072 mask = mask_line; 1073 mask_line += mask_stride; 1074 w = width; 1075 1076 bitcache = *mask++; 1077 bitmask = CREATE_BITMASK (mask_x & 31); 1078 1079 while (w--) 1080 { 1081 if (bitmask == 0) 1082 { 1083 bitcache = *mask++; 1084 bitmask = CREATE_BITMASK (0); 1085 } 1086 if (bitcache & bitmask) 1087 *dst = src565; 1088 bitmask = UPDATE_BITMASK (bitmask); 1089 dst++; 1090 } 1091 } 1092 } 1093 else 1094 { 1095 while (height--) 1096 { 1097 dst = dst_line; 1098 dst_line += dst_stride; 1099 mask = mask_line; 1100 mask_line += mask_stride; 1101 w = width; 1102 1103 bitcache = *mask++; 1104 bitmask = CREATE_BITMASK (mask_x & 31); 1105 1106 while (w--) 1107 { 1108 if (bitmask == 0) 1109 { 1110 bitcache = *mask++; 1111 bitmask = CREATE_BITMASK (0); 1112 } 1113 if (bitcache & bitmask) 1114 { 1115 d = over (src, convert_0565_to_0888 (*dst)); 1116 *dst = convert_8888_to_0565 (d); 1117 } 1118 bitmask = UPDATE_BITMASK (bitmask); 1119 dst++; 1120 } 1121 } 1122 } 1123 } 1124 1125 /* 1126 * Simple bitblt 1127 */ 1128 1129 static void 1130 fast_composite_solid_fill (pixman_implementation_t *imp, 1131 pixman_composite_info_t *info) 1132 { 1133 PIXMAN_COMPOSITE_ARGS (info); 1134 uint32_t src; 1135 1136 src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format); 1137 1138 if (dest_image->bits.format == PIXMAN_a1) 1139 { 1140 src = src >> 31; 1141 } 1142 else if (dest_image->bits.format == PIXMAN_a8) 1143 { 1144 src = src >> 24; 1145 } 1146 else if (dest_image->bits.format == PIXMAN_r5g6b5 || 1147 dest_image->bits.format == PIXMAN_b5g6r5) 1148 { 1149 src = convert_8888_to_0565 (src); 1150 } 1151 1152 pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride, 1153 PIXMAN_FORMAT_BPP (dest_image->bits.format), 1154 dest_x, dest_y, 1155 width, height, 1156 src); 1157 } 1158 1159 static void 1160 fast_composite_src_memcpy (pixman_implementation_t *imp, 1161 pixman_composite_info_t *info) 1162 { 1163 PIXMAN_COMPOSITE_ARGS (info); 1164 int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8; 1165 uint32_t n_bytes = width * bpp; 1166 int dst_stride, src_stride; 1167 uint8_t *dst; 1168 uint8_t *src; 1169 1170 src_stride = src_image->bits.rowstride * 4; 1171 dst_stride = dest_image->bits.rowstride * 4; 1172 1173 src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp; 1174 dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp; 1175 1176 while (height--) 1177 { 1178 memcpy (dst, src, n_bytes); 1179 1180 dst += dst_stride; 1181 src += src_stride; 1182 } 1183 } 1184 1185 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER) 1186 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE) 1187 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD) 1188 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL) 1189 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER) 1190 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD) 1191 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL) 1192 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER) 1193 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE) 1194 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD) 1195 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL) 1196 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER) 1197 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE) 1198 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD) 1199 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL) 1200 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL) 1201 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER) 1202 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE) 1203 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD) 1204 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL) 1205 1206 #define REPEAT_MIN_WIDTH 32 1207 1208 static void 1209 fast_composite_tiled_repeat (pixman_implementation_t *imp, 1210 pixman_composite_info_t *info) 1211 { 1212 PIXMAN_COMPOSITE_ARGS (info); 1213 pixman_composite_func_t func; 1214 pixman_format_code_t mask_format; 1215 uint32_t src_flags, mask_flags; 1216 int32_t sx, sy; 1217 int32_t width_remain; 1218 int32_t num_pixels; 1219 int32_t src_width; 1220 int32_t i, j; 1221 pixman_image_t extended_src_image; 1222 uint32_t extended_src[REPEAT_MIN_WIDTH * 2]; 1223 pixman_bool_t need_src_extension; 1224 uint32_t *src_line; 1225 int32_t src_stride; 1226 int32_t src_bpp; 1227 pixman_composite_info_t info2 = *info; 1228 1229 src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) | 1230 FAST_PATH_SAMPLES_COVER_CLIP_NEAREST; 1231 1232 if (mask_image) 1233 { 1234 mask_format = mask_image->common.extended_format_code; 1235 mask_flags = info->mask_flags; 1236 } 1237 else 1238 { 1239 mask_format = PIXMAN_null; 1240 mask_flags = FAST_PATH_IS_OPAQUE; 1241 } 1242 1243 _pixman_implementation_lookup_composite ( 1244 imp->toplevel, info->op, 1245 src_image->common.extended_format_code, src_flags, 1246 mask_format, mask_flags, 1247 dest_image->common.extended_format_code, info->dest_flags, 1248 &imp, &func); 1249 1250 src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format); 1251 1252 if (src_image->bits.width < REPEAT_MIN_WIDTH && 1253 (src_bpp == 32 || src_bpp == 16 || src_bpp == 8) && 1254 !src_image->bits.indexed) 1255 { 1256 sx = src_x; 1257 sx = MOD (sx, src_image->bits.width); 1258 sx += width; 1259 src_width = 0; 1260 1261 while (src_width < REPEAT_MIN_WIDTH && src_width <= sx) 1262 src_width += src_image->bits.width; 1263 1264 src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t); 1265 1266 /* Initialize/validate stack-allocated temporary image */ 1267 _pixman_bits_image_init (&extended_src_image, src_image->bits.format, 1268 src_width, 1, &extended_src[0], src_stride, 1269 FALSE); 1270 _pixman_image_validate (&extended_src_image); 1271 1272 info2.src_image = &extended_src_image; 1273 need_src_extension = TRUE; 1274 } 1275 else 1276 { 1277 src_width = src_image->bits.width; 1278 need_src_extension = FALSE; 1279 } 1280 1281 sx = src_x; 1282 sy = src_y; 1283 1284 while (--height >= 0) 1285 { 1286 sx = MOD (sx, src_width); 1287 sy = MOD (sy, src_image->bits.height); 1288 1289 if (need_src_extension) 1290 { 1291 if (src_bpp == 32) 1292 { 1293 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1); 1294 1295 for (i = 0; i < src_width; ) 1296 { 1297 for (j = 0; j < src_image->bits.width; j++, i++) 1298 extended_src[i] = src_line[j]; 1299 } 1300 } 1301 else if (src_bpp == 16) 1302 { 1303 uint16_t *src_line_16; 1304 1305 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride, 1306 src_line_16, 1); 1307 src_line = (uint32_t*)src_line_16; 1308 1309 for (i = 0; i < src_width; ) 1310 { 1311 for (j = 0; j < src_image->bits.width; j++, i++) 1312 ((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j]; 1313 } 1314 } 1315 else if (src_bpp == 8) 1316 { 1317 uint8_t *src_line_8; 1318 1319 PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride, 1320 src_line_8, 1); 1321 src_line = (uint32_t*)src_line_8; 1322 1323 for (i = 0; i < src_width; ) 1324 { 1325 for (j = 0; j < src_image->bits.width; j++, i++) 1326 ((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j]; 1327 } 1328 } 1329 1330 info2.src_y = 0; 1331 } 1332 else 1333 { 1334 info2.src_y = sy; 1335 } 1336 1337 width_remain = width; 1338 1339 while (width_remain > 0) 1340 { 1341 num_pixels = src_width - sx; 1342 1343 if (num_pixels > width_remain) 1344 num_pixels = width_remain; 1345 1346 info2.src_x = sx; 1347 info2.width = num_pixels; 1348 info2.height = 1; 1349 1350 func (imp, &info2); 1351 1352 width_remain -= num_pixels; 1353 info2.mask_x += num_pixels; 1354 info2.dest_x += num_pixels; 1355 sx = 0; 1356 } 1357 1358 sx = src_x; 1359 sy++; 1360 info2.mask_x = info->mask_x; 1361 info2.mask_y++; 1362 info2.dest_x = info->dest_x; 1363 info2.dest_y++; 1364 } 1365 1366 if (need_src_extension) 1367 _pixman_image_fini (&extended_src_image); 1368 } 1369 1370 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */ 1371 static force_inline void 1372 scaled_nearest_scanline_565_565_SRC (uint16_t * dst, 1373 const uint16_t * src, 1374 int32_t w, 1375 pixman_fixed_t vx, 1376 pixman_fixed_t unit_x, 1377 pixman_fixed_t max_vx, 1378 pixman_bool_t fully_transparent_src) 1379 { 1380 uint16_t tmp1, tmp2, tmp3, tmp4; 1381 while ((w -= 4) >= 0) 1382 { 1383 tmp1 = *(src + pixman_fixed_to_int (vx)); 1384 vx += unit_x; 1385 tmp2 = *(src + pixman_fixed_to_int (vx)); 1386 vx += unit_x; 1387 tmp3 = *(src + pixman_fixed_to_int (vx)); 1388 vx += unit_x; 1389 tmp4 = *(src + pixman_fixed_to_int (vx)); 1390 vx += unit_x; 1391 *dst++ = tmp1; 1392 *dst++ = tmp2; 1393 *dst++ = tmp3; 1394 *dst++ = tmp4; 1395 } 1396 if (w & 2) 1397 { 1398 tmp1 = *(src + pixman_fixed_to_int (vx)); 1399 vx += unit_x; 1400 tmp2 = *(src + pixman_fixed_to_int (vx)); 1401 vx += unit_x; 1402 *dst++ = tmp1; 1403 *dst++ = tmp2; 1404 } 1405 if (w & 1) 1406 *dst = *(src + pixman_fixed_to_int (vx)); 1407 } 1408 1409 FAST_NEAREST_MAINLOOP (565_565_cover_SRC, 1410 scaled_nearest_scanline_565_565_SRC, 1411 uint16_t, uint16_t, COVER) 1412 FAST_NEAREST_MAINLOOP (565_565_none_SRC, 1413 scaled_nearest_scanline_565_565_SRC, 1414 uint16_t, uint16_t, NONE) 1415 FAST_NEAREST_MAINLOOP (565_565_pad_SRC, 1416 scaled_nearest_scanline_565_565_SRC, 1417 uint16_t, uint16_t, PAD) 1418 1419 static force_inline uint32_t 1420 fetch_nearest (pixman_repeat_t src_repeat, 1421 pixman_format_code_t format, 1422 uint32_t *src, int x, int src_width) 1423 { 1424 if (repeat (src_repeat, &x, src_width)) 1425 { 1426 if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8) 1427 return *(src + x) | 0xff000000; 1428 else 1429 return *(src + x); 1430 } 1431 else 1432 { 1433 return 0; 1434 } 1435 } 1436 1437 static force_inline void 1438 combine_over (uint32_t s, uint32_t *dst) 1439 { 1440 if (s) 1441 { 1442 uint8_t ia = 0xff - (s >> 24); 1443 1444 if (ia) 1445 UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s); 1446 else 1447 *dst = s; 1448 } 1449 } 1450 1451 static force_inline void 1452 combine_src (uint32_t s, uint32_t *dst) 1453 { 1454 *dst = s; 1455 } 1456 1457 static void 1458 fast_composite_scaled_nearest (pixman_implementation_t *imp, 1459 pixman_composite_info_t *info) 1460 { 1461 PIXMAN_COMPOSITE_ARGS (info); 1462 uint32_t *dst_line; 1463 uint32_t *src_line; 1464 int dst_stride, src_stride; 1465 int src_width, src_height; 1466 pixman_repeat_t src_repeat; 1467 pixman_fixed_t unit_x, unit_y; 1468 pixman_format_code_t src_format; 1469 pixman_vector_t v; 1470 pixman_fixed_t vy; 1471 1472 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1); 1473 /* pass in 0 instead of src_x and src_y because src_x and src_y need to be 1474 * transformed from destination space to source space 1475 */ 1476 PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1); 1477 1478 /* reference point is the center of the pixel */ 1479 v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2; 1480 v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2; 1481 v.vector[2] = pixman_fixed_1; 1482 1483 if (!pixman_transform_point_3d (src_image->common.transform, &v)) 1484 return; 1485 1486 unit_x = src_image->common.transform->matrix[0][0]; 1487 unit_y = src_image->common.transform->matrix[1][1]; 1488 1489 /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */ 1490 v.vector[0] -= pixman_fixed_e; 1491 v.vector[1] -= pixman_fixed_e; 1492 1493 src_height = src_image->bits.height; 1494 src_width = src_image->bits.width; 1495 src_repeat = src_image->common.repeat; 1496 src_format = src_image->bits.format; 1497 1498 vy = v.vector[1]; 1499 while (height--) 1500 { 1501 pixman_fixed_t vx = v.vector[0]; 1502 int y = pixman_fixed_to_int (vy); 1503 uint32_t *dst = dst_line; 1504 1505 dst_line += dst_stride; 1506 1507 /* adjust the y location by a unit vector in the y direction 1508 * this is equivalent to transforming y+1 of the destination point to source space */ 1509 vy += unit_y; 1510 1511 if (!repeat (src_repeat, &y, src_height)) 1512 { 1513 if (op == PIXMAN_OP_SRC) 1514 memset (dst, 0, sizeof (*dst) * width); 1515 } 1516 else 1517 { 1518 int w = width; 1519 1520 uint32_t *src = src_line + y * src_stride; 1521 1522 while (w >= 2) 1523 { 1524 uint32_t s1, s2; 1525 int x1, x2; 1526 1527 x1 = pixman_fixed_to_int (vx); 1528 vx += unit_x; 1529 1530 x2 = pixman_fixed_to_int (vx); 1531 vx += unit_x; 1532 1533 w -= 2; 1534 1535 s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width); 1536 s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width); 1537 1538 if (op == PIXMAN_OP_OVER) 1539 { 1540 combine_over (s1, dst++); 1541 combine_over (s2, dst++); 1542 } 1543 else 1544 { 1545 combine_src (s1, dst++); 1546 combine_src (s2, dst++); 1547 } 1548 } 1549 1550 while (w--) 1551 { 1552 uint32_t s; 1553 int x; 1554 1555 x = pixman_fixed_to_int (vx); 1556 vx += unit_x; 1557 1558 s = fetch_nearest (src_repeat, src_format, src, x, src_width); 1559 1560 if (op == PIXMAN_OP_OVER) 1561 combine_over (s, dst++); 1562 else 1563 combine_src (s, dst++); 1564 } 1565 } 1566 } 1567 } 1568 1569 #define CACHE_LINE_SIZE 64 1570 1571 #define FAST_SIMPLE_ROTATE(suffix, pix_type) \ 1572 \ 1573 static void \ 1574 blt_rotated_90_trivial_##suffix (pix_type *dst, \ 1575 int dst_stride, \ 1576 const pix_type *src, \ 1577 int src_stride, \ 1578 int w, \ 1579 int h) \ 1580 { \ 1581 int x, y; \ 1582 for (y = 0; y < h; y++) \ 1583 { \ 1584 const pix_type *s = src + (h - y - 1); \ 1585 pix_type *d = dst + dst_stride * y; \ 1586 for (x = 0; x < w; x++) \ 1587 { \ 1588 *d++ = *s; \ 1589 s += src_stride; \ 1590 } \ 1591 } \ 1592 } \ 1593 \ 1594 static void \ 1595 blt_rotated_270_trivial_##suffix (pix_type *dst, \ 1596 int dst_stride, \ 1597 const pix_type *src, \ 1598 int src_stride, \ 1599 int w, \ 1600 int h) \ 1601 { \ 1602 int x, y; \ 1603 for (y = 0; y < h; y++) \ 1604 { \ 1605 const pix_type *s = src + src_stride * (w - 1) + y; \ 1606 pix_type *d = dst + dst_stride * y; \ 1607 for (x = 0; x < w; x++) \ 1608 { \ 1609 *d++ = *s; \ 1610 s -= src_stride; \ 1611 } \ 1612 } \ 1613 } \ 1614 \ 1615 static void \ 1616 blt_rotated_90_##suffix (pix_type *dst, \ 1617 int dst_stride, \ 1618 const pix_type *src, \ 1619 int src_stride, \ 1620 int W, \ 1621 int H) \ 1622 { \ 1623 int x; \ 1624 int leading_pixels = 0, trailing_pixels = 0; \ 1625 const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ 1626 \ 1627 /* \ 1628 * split processing into handling destination as TILE_SIZExH cache line \ 1629 * aligned vertical stripes (optimistically assuming that destination \ 1630 * stride is a multiple of cache line, if not - it will be just a bit \ 1631 * slower) \ 1632 */ \ 1633 \ 1634 if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ 1635 { \ 1636 leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ 1637 (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ 1638 if (leading_pixels > W) \ 1639 leading_pixels = W; \ 1640 \ 1641 /* unaligned leading part NxH (where N < TILE_SIZE) */ \ 1642 blt_rotated_90_trivial_##suffix ( \ 1643 dst, \ 1644 dst_stride, \ 1645 src, \ 1646 src_stride, \ 1647 leading_pixels, \ 1648 H); \ 1649 \ 1650 dst += leading_pixels; \ 1651 src += leading_pixels * src_stride; \ 1652 W -= leading_pixels; \ 1653 } \ 1654 \ 1655 if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ 1656 { \ 1657 trailing_pixels = (((uintptr_t)(dst + W) & \ 1658 (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ 1659 if (trailing_pixels > W) \ 1660 trailing_pixels = W; \ 1661 W -= trailing_pixels; \ 1662 } \ 1663 \ 1664 for (x = 0; x < W; x += TILE_SIZE) \ 1665 { \ 1666 /* aligned middle part TILE_SIZExH */ \ 1667 blt_rotated_90_trivial_##suffix ( \ 1668 dst + x, \ 1669 dst_stride, \ 1670 src + src_stride * x, \ 1671 src_stride, \ 1672 TILE_SIZE, \ 1673 H); \ 1674 } \ 1675 \ 1676 if (trailing_pixels) \ 1677 { \ 1678 /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ 1679 blt_rotated_90_trivial_##suffix ( \ 1680 dst + W, \ 1681 dst_stride, \ 1682 src + W * src_stride, \ 1683 src_stride, \ 1684 trailing_pixels, \ 1685 H); \ 1686 } \ 1687 } \ 1688 \ 1689 static void \ 1690 blt_rotated_270_##suffix (pix_type *dst, \ 1691 int dst_stride, \ 1692 const pix_type *src, \ 1693 int src_stride, \ 1694 int W, \ 1695 int H) \ 1696 { \ 1697 int x; \ 1698 int leading_pixels = 0, trailing_pixels = 0; \ 1699 const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type); \ 1700 \ 1701 /* \ 1702 * split processing into handling destination as TILE_SIZExH cache line \ 1703 * aligned vertical stripes (optimistically assuming that destination \ 1704 * stride is a multiple of cache line, if not - it will be just a bit \ 1705 * slower) \ 1706 */ \ 1707 \ 1708 if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1)) \ 1709 { \ 1710 leading_pixels = TILE_SIZE - (((uintptr_t)dst & \ 1711 (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ 1712 if (leading_pixels > W) \ 1713 leading_pixels = W; \ 1714 \ 1715 /* unaligned leading part NxH (where N < TILE_SIZE) */ \ 1716 blt_rotated_270_trivial_##suffix ( \ 1717 dst, \ 1718 dst_stride, \ 1719 src + src_stride * (W - leading_pixels), \ 1720 src_stride, \ 1721 leading_pixels, \ 1722 H); \ 1723 \ 1724 dst += leading_pixels; \ 1725 W -= leading_pixels; \ 1726 } \ 1727 \ 1728 if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1)) \ 1729 { \ 1730 trailing_pixels = (((uintptr_t)(dst + W) & \ 1731 (CACHE_LINE_SIZE - 1)) / sizeof(pix_type)); \ 1732 if (trailing_pixels > W) \ 1733 trailing_pixels = W; \ 1734 W -= trailing_pixels; \ 1735 src += trailing_pixels * src_stride; \ 1736 } \ 1737 \ 1738 for (x = 0; x < W; x += TILE_SIZE) \ 1739 { \ 1740 /* aligned middle part TILE_SIZExH */ \ 1741 blt_rotated_270_trivial_##suffix ( \ 1742 dst + x, \ 1743 dst_stride, \ 1744 src + src_stride * (W - x - TILE_SIZE), \ 1745 src_stride, \ 1746 TILE_SIZE, \ 1747 H); \ 1748 } \ 1749 \ 1750 if (trailing_pixels) \ 1751 { \ 1752 /* unaligned trailing part NxH (where N < TILE_SIZE) */ \ 1753 blt_rotated_270_trivial_##suffix ( \ 1754 dst + W, \ 1755 dst_stride, \ 1756 src - trailing_pixels * src_stride, \ 1757 src_stride, \ 1758 trailing_pixels, \ 1759 H); \ 1760 } \ 1761 } \ 1762 \ 1763 static void \ 1764 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp, \ 1765 pixman_composite_info_t *info) \ 1766 { \ 1767 PIXMAN_COMPOSITE_ARGS (info); \ 1768 pix_type *dst_line; \ 1769 pix_type *src_line; \ 1770 int dst_stride, src_stride; \ 1771 int src_x_t, src_y_t; \ 1772 \ 1773 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ 1774 dst_stride, dst_line, 1); \ 1775 src_x_t = -src_y + pixman_fixed_to_int ( \ 1776 src_image->common.transform->matrix[0][2] + \ 1777 pixman_fixed_1 / 2 - pixman_fixed_e) - height;\ 1778 src_y_t = src_x + pixman_fixed_to_int ( \ 1779 src_image->common.transform->matrix[1][2] + \ 1780 pixman_fixed_1 / 2 - pixman_fixed_e); \ 1781 PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ 1782 src_stride, src_line, 1); \ 1783 blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride, \ 1784 width, height); \ 1785 } \ 1786 \ 1787 static void \ 1788 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp, \ 1789 pixman_composite_info_t *info) \ 1790 { \ 1791 PIXMAN_COMPOSITE_ARGS (info); \ 1792 pix_type *dst_line; \ 1793 pix_type *src_line; \ 1794 int dst_stride, src_stride; \ 1795 int src_x_t, src_y_t; \ 1796 \ 1797 PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type, \ 1798 dst_stride, dst_line, 1); \ 1799 src_x_t = src_y + pixman_fixed_to_int ( \ 1800 src_image->common.transform->matrix[0][2] + \ 1801 pixman_fixed_1 / 2 - pixman_fixed_e); \ 1802 src_y_t = -src_x + pixman_fixed_to_int ( \ 1803 src_image->common.transform->matrix[1][2] + \ 1804 pixman_fixed_1 / 2 - pixman_fixed_e) - width; \ 1805 PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type, \ 1806 src_stride, src_line, 1); \ 1807 blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride, \ 1808 width, height); \ 1809 } 1810 1811 FAST_SIMPLE_ROTATE (8, uint8_t) 1812 FAST_SIMPLE_ROTATE (565, uint16_t) 1813 FAST_SIMPLE_ROTATE (8888, uint32_t) 1814 1815 static const pixman_fast_path_t c_fast_paths[] = 1816 { 1817 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565), 1818 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565), 1819 PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888), 1820 PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888), 1821 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888), 1822 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888), 1823 PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888), 1824 PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888), 1825 PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888), 1826 PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888), 1827 PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888), 1828 PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888), 1829 PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5, fast_composite_over_n_1_0565), 1830 PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5, fast_composite_over_n_1_0565), 1831 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca), 1832 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca), 1833 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca), 1834 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca), 1835 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca), 1836 PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca), 1837 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888), 1838 PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888), 1839 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888), 1840 PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888), 1841 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888), 1842 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888), 1843 PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565), 1844 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888), 1845 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888), 1846 PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565), 1847 PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565), 1848 PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565), 1849 PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888), 1850 PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888), 1851 PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8), 1852 PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1), 1853 PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca), 1854 PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8), 1855 PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill), 1856 PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill), 1857 PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill), 1858 PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill), 1859 PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill), 1860 PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill), 1861 PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill), 1862 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888), 1863 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888), 1864 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), 1865 PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy), 1866 PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy), 1867 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), 1868 PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy), 1869 PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy), 1870 PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy), 1871 PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy), 1872 PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy), 1873 PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy), 1874 PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy), 1875 PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy), 1876 PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy), 1877 PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), 1878 PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy), 1879 PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy), 1880 PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8), 1881 PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8), 1882 1883 SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888), 1884 SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888), 1885 SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888), 1886 SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888), 1887 1888 SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888), 1889 SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888), 1890 1891 SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565), 1892 SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565), 1893 1894 SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565), 1895 1896 SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888), 1897 SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888), 1898 SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888), 1899 SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888), 1900 SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888), 1901 SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888), 1902 1903 SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888), 1904 SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888), 1905 SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888), 1906 SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888), 1907 1908 SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565), 1909 1910 #define NEAREST_FAST_PATH(op,s,d) \ 1911 { PIXMAN_OP_ ## op, \ 1912 PIXMAN_ ## s, SCALED_NEAREST_FLAGS, \ 1913 PIXMAN_null, 0, \ 1914 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1915 fast_composite_scaled_nearest, \ 1916 } 1917 1918 NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8), 1919 NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8), 1920 NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8), 1921 NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8), 1922 1923 NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8), 1924 NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8), 1925 NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8), 1926 NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8), 1927 1928 NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8), 1929 NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8), 1930 NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8), 1931 NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8), 1932 1933 NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8), 1934 NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8), 1935 NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8), 1936 NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8), 1937 1938 #define SIMPLE_ROTATE_FLAGS(angle) \ 1939 (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM | \ 1940 FAST_PATH_NEAREST_FILTER | \ 1941 FAST_PATH_SAMPLES_COVER_CLIP_NEAREST | \ 1942 FAST_PATH_STANDARD_FLAGS) 1943 1944 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix) \ 1945 { PIXMAN_OP_ ## op, \ 1946 PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90), \ 1947 PIXMAN_null, 0, \ 1948 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1949 fast_composite_rotate_90_##suffix, \ 1950 }, \ 1951 { PIXMAN_OP_ ## op, \ 1952 PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270), \ 1953 PIXMAN_null, 0, \ 1954 PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS, \ 1955 fast_composite_rotate_270_##suffix, \ 1956 } 1957 1958 SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888), 1959 SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888), 1960 SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888), 1961 SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565), 1962 SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8), 1963 1964 /* Simple repeat fast path entry. */ 1965 { PIXMAN_OP_any, 1966 PIXMAN_any, 1967 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE | 1968 FAST_PATH_NORMAL_REPEAT), 1969 PIXMAN_any, 0, 1970 PIXMAN_any, FAST_PATH_STD_DEST_FLAGS, 1971 fast_composite_tiled_repeat 1972 }, 1973 1974 { PIXMAN_OP_NONE }, 1975 }; 1976 1977 #ifdef WORDS_BIGENDIAN 1978 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n))) 1979 #else 1980 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs)) 1981 #endif 1982 1983 static force_inline void 1984 pixman_fill1_line (uint32_t *dst, int offs, int width, int v) 1985 { 1986 if (offs) 1987 { 1988 int leading_pixels = 32 - offs; 1989 if (leading_pixels >= width) 1990 { 1991 if (v) 1992 *dst |= A1_FILL_MASK (width, offs); 1993 else 1994 *dst &= ~A1_FILL_MASK (width, offs); 1995 return; 1996 } 1997 else 1998 { 1999 if (v) 2000 *dst++ |= A1_FILL_MASK (leading_pixels, offs); 2001 else 2002 *dst++ &= ~A1_FILL_MASK (leading_pixels, offs); 2003 width -= leading_pixels; 2004 } 2005 } 2006 while (width >= 32) 2007 { 2008 if (v) 2009 *dst++ = 0xFFFFFFFF; 2010 else 2011 *dst++ = 0; 2012 width -= 32; 2013 } 2014 if (width > 0) 2015 { 2016 if (v) 2017 *dst |= A1_FILL_MASK (width, 0); 2018 else 2019 *dst &= ~A1_FILL_MASK (width, 0); 2020 } 2021 } 2022 2023 static void 2024 pixman_fill1 (uint32_t *bits, 2025 int stride, 2026 int x, 2027 int y, 2028 int width, 2029 int height, 2030 uint32_t filler) 2031 { 2032 uint32_t *dst = bits + y * stride + (x >> 5); 2033 int offs = x & 31; 2034 2035 if (filler & 1) 2036 { 2037 while (height--) 2038 { 2039 pixman_fill1_line (dst, offs, width, 1); 2040 dst += stride; 2041 } 2042 } 2043 else 2044 { 2045 while (height--) 2046 { 2047 pixman_fill1_line (dst, offs, width, 0); 2048 dst += stride; 2049 } 2050 } 2051 } 2052 2053 static void 2054 pixman_fill8 (uint32_t *bits, 2055 int stride, 2056 int x, 2057 int y, 2058 int width, 2059 int height, 2060 uint32_t filler) 2061 { 2062 int byte_stride = stride * (int) sizeof (uint32_t); 2063 uint8_t *dst = (uint8_t *) bits; 2064 uint8_t v = filler & 0xff; 2065 int i; 2066 2067 dst = dst + y * byte_stride + x; 2068 2069 while (height--) 2070 { 2071 for (i = 0; i < width; ++i) 2072 dst[i] = v; 2073 2074 dst += byte_stride; 2075 } 2076 } 2077 2078 static void 2079 pixman_fill16 (uint32_t *bits, 2080 int stride, 2081 int x, 2082 int y, 2083 int width, 2084 int height, 2085 uint32_t filler) 2086 { 2087 int short_stride = 2088 (stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t); 2089 uint16_t *dst = (uint16_t *)bits; 2090 uint16_t v = filler & 0xffff; 2091 int i; 2092 2093 dst = dst + y * short_stride + x; 2094 2095 while (height--) 2096 { 2097 for (i = 0; i < width; ++i) 2098 dst[i] = v; 2099 2100 dst += short_stride; 2101 } 2102 } 2103 2104 static void 2105 pixman_fill32 (uint32_t *bits, 2106 int stride, 2107 int x, 2108 int y, 2109 int width, 2110 int height, 2111 uint32_t filler) 2112 { 2113 int i; 2114 2115 bits = bits + y * stride + x; 2116 2117 while (height--) 2118 { 2119 for (i = 0; i < width; ++i) 2120 bits[i] = filler; 2121 2122 bits += stride; 2123 } 2124 } 2125 2126 static pixman_bool_t 2127 fast_path_fill (pixman_implementation_t *imp, 2128 uint32_t * bits, 2129 int stride, 2130 int bpp, 2131 int x, 2132 int y, 2133 int width, 2134 int height, 2135 uint32_t filler) 2136 { 2137 switch (bpp) 2138 { 2139 case 1: 2140 pixman_fill1 (bits, stride, x, y, width, height, filler); 2141 break; 2142 2143 case 8: 2144 pixman_fill8 (bits, stride, x, y, width, height, filler); 2145 break; 2146 2147 case 16: 2148 pixman_fill16 (bits, stride, x, y, width, height, filler); 2149 break; 2150 2151 case 32: 2152 pixman_fill32 (bits, stride, x, y, width, height, filler); 2153 break; 2154 2155 default: 2156 return FALSE; 2157 } 2158 2159 return TRUE; 2160 } 2161 2162 /*****************************************************************************/ 2163 2164 static uint32_t * 2165 fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask) 2166 { 2167 int32_t w = iter->width; 2168 uint32_t *dst = iter->buffer; 2169 const uint16_t *src = (const uint16_t *)iter->bits; 2170 2171 iter->bits += iter->stride; 2172 2173 /* Align the source buffer at 4 bytes boundary */ 2174 if (w > 0 && ((uintptr_t)src & 3)) 2175 { 2176 *dst++ = convert_0565_to_8888 (*src++); 2177 w--; 2178 } 2179 /* Process two pixels per iteration */ 2180 while ((w -= 2) >= 0) 2181 { 2182 uint32_t sr, sb, sg, t0, t1; 2183 uint32_t s = *(const uint32_t *)src; 2184 src += 2; 2185 sr = (s >> 8) & 0x00F800F8; 2186 sb = (s << 3) & 0x00F800F8; 2187 sg = (s >> 3) & 0x00FC00FC; 2188 sr |= sr >> 5; 2189 sb |= sb >> 5; 2190 sg |= sg >> 6; 2191 t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) | 2192 (sb & 0xFF) | 0xFF000000; 2193 t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) | 2194 (sb >> 16) | 0xFF000000; 2195 #ifdef WORDS_BIGENDIAN 2196 *dst++ = t1; 2197 *dst++ = t0; 2198 #else 2199 *dst++ = t0; 2200 *dst++ = t1; 2201 #endif 2202 } 2203 if (w & 1) 2204 { 2205 *dst = convert_0565_to_8888 (*src); 2206 } 2207 2208 return iter->buffer; 2209 } 2210 2211 static uint32_t * 2212 fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask) 2213 { 2214 iter->bits += iter->stride; 2215 return iter->buffer; 2216 } 2217 2218 /* Helper function for a workaround, which tries to ensure that 0x1F001F 2219 * constant is always allocated in a register on RISC architectures. 2220 */ 2221 static force_inline uint32_t 2222 convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F) 2223 { 2224 uint32_t a, b; 2225 a = (s >> 3) & x1F001F; 2226 b = s & 0xFC00; 2227 a |= a >> 5; 2228 a |= b >> 5; 2229 return a; 2230 } 2231 2232 static void 2233 fast_write_back_r5g6b5 (pixman_iter_t *iter) 2234 { 2235 int32_t w = iter->width; 2236 uint16_t *dst = (uint16_t *)(iter->bits - iter->stride); 2237 const uint32_t *src = iter->buffer; 2238 /* Workaround to ensure that x1F001F variable is allocated in a register */ 2239 static volatile uint32_t volatile_x1F001F = 0x1F001F; 2240 uint32_t x1F001F = volatile_x1F001F; 2241 2242 while ((w -= 4) >= 0) 2243 { 2244 uint32_t s1 = *src++; 2245 uint32_t s2 = *src++; 2246 uint32_t s3 = *src++; 2247 uint32_t s4 = *src++; 2248 *dst++ = convert_8888_to_0565_workaround (s1, x1F001F); 2249 *dst++ = convert_8888_to_0565_workaround (s2, x1F001F); 2250 *dst++ = convert_8888_to_0565_workaround (s3, x1F001F); 2251 *dst++ = convert_8888_to_0565_workaround (s4, x1F001F); 2252 } 2253 if (w & 2) 2254 { 2255 *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); 2256 *dst++ = convert_8888_to_0565_workaround (*src++, x1F001F); 2257 } 2258 if (w & 1) 2259 { 2260 *dst = convert_8888_to_0565_workaround (*src, x1F001F); 2261 } 2262 } 2263 2264 typedef struct 2265 { 2266 pixman_format_code_t format; 2267 pixman_iter_get_scanline_t get_scanline; 2268 pixman_iter_write_back_t write_back; 2269 } fetcher_info_t; 2270 2271 static const fetcher_info_t fetchers[] = 2272 { 2273 { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 }, 2274 { PIXMAN_null } 2275 }; 2276 2277 static pixman_bool_t 2278 fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) 2279 { 2280 pixman_image_t *image = iter->image; 2281 2282 #define FLAGS \ 2283 (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | \ 2284 FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST) 2285 2286 if ((iter->iter_flags & ITER_NARROW) && 2287 (iter->image_flags & FLAGS) == FLAGS) 2288 { 2289 const fetcher_info_t *f; 2290 2291 for (f = &fetchers[0]; f->format != PIXMAN_null; f++) 2292 { 2293 if (image->common.extended_format_code == f->format) 2294 { 2295 uint8_t *b = (uint8_t *)image->bits.bits; 2296 int s = image->bits.rowstride * 4; 2297 2298 iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; 2299 iter->stride = s; 2300 2301 iter->get_scanline = f->get_scanline; 2302 return TRUE; 2303 } 2304 } 2305 } 2306 2307 return FALSE; 2308 } 2309 2310 static pixman_bool_t 2311 fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter) 2312 { 2313 pixman_image_t *image = iter->image; 2314 2315 if ((iter->iter_flags & ITER_NARROW) && 2316 (iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS) 2317 { 2318 const fetcher_info_t *f; 2319 2320 for (f = &fetchers[0]; f->format != PIXMAN_null; f++) 2321 { 2322 if (image->common.extended_format_code == f->format) 2323 { 2324 uint8_t *b = (uint8_t *)image->bits.bits; 2325 int s = image->bits.rowstride * 4; 2326 2327 iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8; 2328 iter->stride = s; 2329 2330 if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) == 2331 (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) 2332 { 2333 iter->get_scanline = fast_dest_fetch_noop; 2334 } 2335 else 2336 { 2337 iter->get_scanline = f->get_scanline; 2338 } 2339 iter->write_back = f->write_back; 2340 return TRUE; 2341 } 2342 } 2343 } 2344 return FALSE; 2345 } 2346 2347 2348 pixman_implementation_t * 2349 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback) 2350 { 2351 pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths); 2352 2353 imp->fill = fast_path_fill; 2354 imp->src_iter_init = fast_src_iter_init; 2355 imp->dest_iter_init = fast_dest_iter_init; 2356 2357 return imp; 2358 } 2359