Home | History | Annotate | Download | only in pixman
      1 /* -*- Mode: c; c-basic-offset: 4; tab-width: 8; indent-tabs-mode: t; -*- */
      2 /*
      3  * Copyright  2000 SuSE, Inc.
      4  * Copyright  2007 Red Hat, Inc.
      5  *
      6  * Permission to use, copy, modify, distribute, and sell this software and its
      7  * documentation for any purpose is hereby granted without fee, provided that
      8  * the above copyright notice appear in all copies and that both that
      9  * copyright notice and this permission notice appear in supporting
     10  * documentation, and that the name of SuSE not be used in advertising or
     11  * publicity pertaining to distribution of the software without specific,
     12  * written prior permission.  SuSE makes no representations about the
     13  * suitability of this software for any purpose.  It is provided "as is"
     14  * without express or implied warranty.
     15  *
     16  * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE
     18  * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     19  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
     20  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
     21  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
     22  *
     23  * Author:  Keith Packard, SuSE, Inc.
     24  */
     25 
     26 #ifdef HAVE_CONFIG_H
     27 #include <config.h>
     28 #endif
     29 #include <string.h>
     30 #include <stdlib.h>
     31 #include "pixman-private.h"
     32 #include "pixman-combine32.h"
     33 #include "pixman-inlines.h"
     34 
     35 static force_inline uint32_t
     36 fetch_24 (uint8_t *a)
     37 {
     38     if (((uintptr_t)a) & 1)
     39     {
     40 #ifdef WORDS_BIGENDIAN
     41 	return (*a << 16) | (*(uint16_t *)(a + 1));
     42 #else
     43 	return *a | (*(uint16_t *)(a + 1) << 8);
     44 #endif
     45     }
     46     else
     47     {
     48 #ifdef WORDS_BIGENDIAN
     49 	return (*(uint16_t *)a << 8) | *(a + 2);
     50 #else
     51 	return *(uint16_t *)a | (*(a + 2) << 16);
     52 #endif
     53     }
     54 }
     55 
     56 static force_inline void
     57 store_24 (uint8_t *a,
     58           uint32_t v)
     59 {
     60     if (((uintptr_t)a) & 1)
     61     {
     62 #ifdef WORDS_BIGENDIAN
     63 	*a = (uint8_t) (v >> 16);
     64 	*(uint16_t *)(a + 1) = (uint16_t) (v);
     65 #else
     66 	*a = (uint8_t) (v);
     67 	*(uint16_t *)(a + 1) = (uint16_t) (v >> 8);
     68 #endif
     69     }
     70     else
     71     {
     72 #ifdef WORDS_BIGENDIAN
     73 	*(uint16_t *)a = (uint16_t)(v >> 8);
     74 	*(a + 2) = (uint8_t)v;
     75 #else
     76 	*(uint16_t *)a = (uint16_t)v;
     77 	*(a + 2) = (uint8_t)(v >> 16);
     78 #endif
     79     }
     80 }
     81 
     82 static force_inline uint32_t
     83 over (uint32_t src,
     84       uint32_t dest)
     85 {
     86     uint32_t a = ~src >> 24;
     87 
     88     UN8x4_MUL_UN8_ADD_UN8x4 (dest, a, src);
     89 
     90     return dest;
     91 }
     92 
     93 static force_inline uint32_t
     94 in (uint32_t x,
     95     uint8_t  y)
     96 {
     97     uint16_t a = y;
     98 
     99     UN8x4_MUL_UN8 (x, a);
    100 
    101     return x;
    102 }
    103 
    104 /*
    105  * Naming convention:
    106  *
    107  *  op_src_mask_dest
    108  */
    109 static void
    110 fast_composite_over_x888_8_8888 (pixman_implementation_t *imp,
    111                                  pixman_composite_info_t *info)
    112 {
    113     PIXMAN_COMPOSITE_ARGS (info);
    114     uint32_t    *src, *src_line;
    115     uint32_t    *dst, *dst_line;
    116     uint8_t     *mask, *mask_line;
    117     int src_stride, mask_stride, dst_stride;
    118     uint8_t m;
    119     uint32_t s, d;
    120     int32_t w;
    121 
    122     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    123     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    124     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    125 
    126     while (height--)
    127     {
    128 	src = src_line;
    129 	src_line += src_stride;
    130 	dst = dst_line;
    131 	dst_line += dst_stride;
    132 	mask = mask_line;
    133 	mask_line += mask_stride;
    134 
    135 	w = width;
    136 	while (w--)
    137 	{
    138 	    m = *mask++;
    139 	    if (m)
    140 	    {
    141 		s = *src | 0xff000000;
    142 
    143 		if (m == 0xff)
    144 		{
    145 		    *dst = s;
    146 		}
    147 		else
    148 		{
    149 		    d = in (s, m);
    150 		    *dst = over (d, *dst);
    151 		}
    152 	    }
    153 	    src++;
    154 	    dst++;
    155 	}
    156     }
    157 }
    158 
    159 static void
    160 fast_composite_in_n_8_8 (pixman_implementation_t *imp,
    161                          pixman_composite_info_t *info)
    162 {
    163     PIXMAN_COMPOSITE_ARGS (info);
    164     uint32_t src, srca;
    165     uint8_t     *dst_line, *dst;
    166     uint8_t     *mask_line, *mask, m;
    167     int dst_stride, mask_stride;
    168     int32_t w;
    169     uint16_t t;
    170 
    171     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    172 
    173     srca = src >> 24;
    174 
    175     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
    176     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    177 
    178     if (srca == 0xff)
    179     {
    180 	while (height--)
    181 	{
    182 	    dst = dst_line;
    183 	    dst_line += dst_stride;
    184 	    mask = mask_line;
    185 	    mask_line += mask_stride;
    186 	    w = width;
    187 
    188 	    while (w--)
    189 	    {
    190 		m = *mask++;
    191 
    192 		if (m == 0)
    193 		    *dst = 0;
    194 		else if (m != 0xff)
    195 		    *dst = MUL_UN8 (m, *dst, t);
    196 
    197 		dst++;
    198 	    }
    199 	}
    200     }
    201     else
    202     {
    203 	while (height--)
    204 	{
    205 	    dst = dst_line;
    206 	    dst_line += dst_stride;
    207 	    mask = mask_line;
    208 	    mask_line += mask_stride;
    209 	    w = width;
    210 
    211 	    while (w--)
    212 	    {
    213 		m = *mask++;
    214 		m = MUL_UN8 (m, srca, t);
    215 
    216 		if (m == 0)
    217 		    *dst = 0;
    218 		else if (m != 0xff)
    219 		    *dst = MUL_UN8 (m, *dst, t);
    220 
    221 		dst++;
    222 	    }
    223 	}
    224     }
    225 }
    226 
    227 static void
    228 fast_composite_in_8_8 (pixman_implementation_t *imp,
    229                        pixman_composite_info_t *info)
    230 {
    231     PIXMAN_COMPOSITE_ARGS (info);
    232     uint8_t     *dst_line, *dst;
    233     uint8_t     *src_line, *src;
    234     int dst_stride, src_stride;
    235     int32_t w;
    236     uint8_t s;
    237     uint16_t t;
    238 
    239     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
    240     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
    241 
    242     while (height--)
    243     {
    244 	dst = dst_line;
    245 	dst_line += dst_stride;
    246 	src = src_line;
    247 	src_line += src_stride;
    248 	w = width;
    249 
    250 	while (w--)
    251 	{
    252 	    s = *src++;
    253 
    254 	    if (s == 0)
    255 		*dst = 0;
    256 	    else if (s != 0xff)
    257 		*dst = MUL_UN8 (s, *dst, t);
    258 
    259 	    dst++;
    260 	}
    261     }
    262 }
    263 
    264 static void
    265 fast_composite_over_n_8_8888 (pixman_implementation_t *imp,
    266                               pixman_composite_info_t *info)
    267 {
    268     PIXMAN_COMPOSITE_ARGS (info);
    269     uint32_t src, srca;
    270     uint32_t    *dst_line, *dst, d;
    271     uint8_t     *mask_line, *mask, m;
    272     int dst_stride, mask_stride;
    273     int32_t w;
    274 
    275     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    276 
    277     srca = src >> 24;
    278     if (src == 0)
    279 	return;
    280 
    281     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    282     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    283 
    284     while (height--)
    285     {
    286 	dst = dst_line;
    287 	dst_line += dst_stride;
    288 	mask = mask_line;
    289 	mask_line += mask_stride;
    290 	w = width;
    291 
    292 	while (w--)
    293 	{
    294 	    m = *mask++;
    295 	    if (m == 0xff)
    296 	    {
    297 		if (srca == 0xff)
    298 		    *dst = src;
    299 		else
    300 		    *dst = over (src, *dst);
    301 	    }
    302 	    else if (m)
    303 	    {
    304 		d = in (src, m);
    305 		*dst = over (d, *dst);
    306 	    }
    307 	    dst++;
    308 	}
    309     }
    310 }
    311 
    312 static void
    313 fast_composite_add_n_8888_8888_ca (pixman_implementation_t *imp,
    314 				   pixman_composite_info_t *info)
    315 {
    316     PIXMAN_COMPOSITE_ARGS (info);
    317     uint32_t src, s;
    318     uint32_t    *dst_line, *dst, d;
    319     uint32_t    *mask_line, *mask, ma;
    320     int dst_stride, mask_stride;
    321     int32_t w;
    322 
    323     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    324 
    325     if (src == 0)
    326 	return;
    327 
    328     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    329     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
    330 
    331     while (height--)
    332     {
    333 	dst = dst_line;
    334 	dst_line += dst_stride;
    335 	mask = mask_line;
    336 	mask_line += mask_stride;
    337 	w = width;
    338 
    339 	while (w--)
    340 	{
    341 	    ma = *mask++;
    342 
    343 	    if (ma)
    344 	    {
    345 		d = *dst;
    346 		s = src;
    347 
    348 		UN8x4_MUL_UN8x4_ADD_UN8x4 (s, ma, d);
    349 
    350 		*dst = s;
    351 	    }
    352 
    353 	    dst++;
    354 	}
    355     }
    356 }
    357 
    358 static void
    359 fast_composite_over_n_8888_8888_ca (pixman_implementation_t *imp,
    360                                     pixman_composite_info_t *info)
    361 {
    362     PIXMAN_COMPOSITE_ARGS (info);
    363     uint32_t src, srca, s;
    364     uint32_t    *dst_line, *dst, d;
    365     uint32_t    *mask_line, *mask, ma;
    366     int dst_stride, mask_stride;
    367     int32_t w;
    368 
    369     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    370 
    371     srca = src >> 24;
    372     if (src == 0)
    373 	return;
    374 
    375     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    376     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
    377 
    378     while (height--)
    379     {
    380 	dst = dst_line;
    381 	dst_line += dst_stride;
    382 	mask = mask_line;
    383 	mask_line += mask_stride;
    384 	w = width;
    385 
    386 	while (w--)
    387 	{
    388 	    ma = *mask++;
    389 	    if (ma == 0xffffffff)
    390 	    {
    391 		if (srca == 0xff)
    392 		    *dst = src;
    393 		else
    394 		    *dst = over (src, *dst);
    395 	    }
    396 	    else if (ma)
    397 	    {
    398 		d = *dst;
    399 		s = src;
    400 
    401 		UN8x4_MUL_UN8x4 (s, ma);
    402 		UN8x4_MUL_UN8 (ma, srca);
    403 		ma = ~ma;
    404 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
    405 
    406 		*dst = d;
    407 	    }
    408 
    409 	    dst++;
    410 	}
    411     }
    412 }
    413 
    414 static void
    415 fast_composite_over_n_8_0888 (pixman_implementation_t *imp,
    416                               pixman_composite_info_t *info)
    417 {
    418     PIXMAN_COMPOSITE_ARGS (info);
    419     uint32_t src, srca;
    420     uint8_t     *dst_line, *dst;
    421     uint32_t d;
    422     uint8_t     *mask_line, *mask, m;
    423     int dst_stride, mask_stride;
    424     int32_t w;
    425 
    426     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    427 
    428     srca = src >> 24;
    429     if (src == 0)
    430 	return;
    431 
    432     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
    433     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    434 
    435     while (height--)
    436     {
    437 	dst = dst_line;
    438 	dst_line += dst_stride;
    439 	mask = mask_line;
    440 	mask_line += mask_stride;
    441 	w = width;
    442 
    443 	while (w--)
    444 	{
    445 	    m = *mask++;
    446 	    if (m == 0xff)
    447 	    {
    448 		if (srca == 0xff)
    449 		{
    450 		    d = src;
    451 		}
    452 		else
    453 		{
    454 		    d = fetch_24 (dst);
    455 		    d = over (src, d);
    456 		}
    457 		store_24 (dst, d);
    458 	    }
    459 	    else if (m)
    460 	    {
    461 		d = over (in (src, m), fetch_24 (dst));
    462 		store_24 (dst, d);
    463 	    }
    464 	    dst += 3;
    465 	}
    466     }
    467 }
    468 
    469 static void
    470 fast_composite_over_n_8_0565 (pixman_implementation_t *imp,
    471                               pixman_composite_info_t *info)
    472 {
    473     PIXMAN_COMPOSITE_ARGS (info);
    474     uint32_t src, srca;
    475     uint16_t    *dst_line, *dst;
    476     uint32_t d;
    477     uint8_t     *mask_line, *mask, m;
    478     int dst_stride, mask_stride;
    479     int32_t w;
    480 
    481     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    482 
    483     srca = src >> 24;
    484     if (src == 0)
    485 	return;
    486 
    487     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    488     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    489 
    490     while (height--)
    491     {
    492 	dst = dst_line;
    493 	dst_line += dst_stride;
    494 	mask = mask_line;
    495 	mask_line += mask_stride;
    496 	w = width;
    497 
    498 	while (w--)
    499 	{
    500 	    m = *mask++;
    501 	    if (m == 0xff)
    502 	    {
    503 		if (srca == 0xff)
    504 		{
    505 		    d = src;
    506 		}
    507 		else
    508 		{
    509 		    d = *dst;
    510 		    d = over (src, convert_0565_to_0888 (d));
    511 		}
    512 		*dst = convert_8888_to_0565 (d);
    513 	    }
    514 	    else if (m)
    515 	    {
    516 		d = *dst;
    517 		d = over (in (src, m), convert_0565_to_0888 (d));
    518 		*dst = convert_8888_to_0565 (d);
    519 	    }
    520 	    dst++;
    521 	}
    522     }
    523 }
    524 
    525 static void
    526 fast_composite_over_n_8888_0565_ca (pixman_implementation_t *imp,
    527                                     pixman_composite_info_t *info)
    528 {
    529     PIXMAN_COMPOSITE_ARGS (info);
    530     uint32_t  src, srca, s;
    531     uint16_t  src16;
    532     uint16_t *dst_line, *dst;
    533     uint32_t  d;
    534     uint32_t *mask_line, *mask, ma;
    535     int dst_stride, mask_stride;
    536     int32_t w;
    537 
    538     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    539 
    540     srca = src >> 24;
    541     if (src == 0)
    542 	return;
    543 
    544     src16 = convert_8888_to_0565 (src);
    545 
    546     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    547     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint32_t, mask_stride, mask_line, 1);
    548 
    549     while (height--)
    550     {
    551 	dst = dst_line;
    552 	dst_line += dst_stride;
    553 	mask = mask_line;
    554 	mask_line += mask_stride;
    555 	w = width;
    556 
    557 	while (w--)
    558 	{
    559 	    ma = *mask++;
    560 	    if (ma == 0xffffffff)
    561 	    {
    562 		if (srca == 0xff)
    563 		{
    564 		    *dst = src16;
    565 		}
    566 		else
    567 		{
    568 		    d = *dst;
    569 		    d = over (src, convert_0565_to_0888 (d));
    570 		    *dst = convert_8888_to_0565 (d);
    571 		}
    572 	    }
    573 	    else if (ma)
    574 	    {
    575 		d = *dst;
    576 		d = convert_0565_to_0888 (d);
    577 
    578 		s = src;
    579 
    580 		UN8x4_MUL_UN8x4 (s, ma);
    581 		UN8x4_MUL_UN8 (ma, srca);
    582 		ma = ~ma;
    583 		UN8x4_MUL_UN8x4_ADD_UN8x4 (d, ma, s);
    584 
    585 		*dst = convert_8888_to_0565 (d);
    586 	    }
    587 	    dst++;
    588 	}
    589     }
    590 }
    591 
    592 static void
    593 fast_composite_over_8888_8888 (pixman_implementation_t *imp,
    594                                pixman_composite_info_t *info)
    595 {
    596     PIXMAN_COMPOSITE_ARGS (info);
    597     uint32_t    *dst_line, *dst;
    598     uint32_t    *src_line, *src, s;
    599     int dst_stride, src_stride;
    600     uint8_t a;
    601     int32_t w;
    602 
    603     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    604     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    605 
    606     while (height--)
    607     {
    608 	dst = dst_line;
    609 	dst_line += dst_stride;
    610 	src = src_line;
    611 	src_line += src_stride;
    612 	w = width;
    613 
    614 	while (w--)
    615 	{
    616 	    s = *src++;
    617 	    a = s >> 24;
    618 	    if (a == 0xff)
    619 		*dst = s;
    620 	    else if (s)
    621 		*dst = over (s, *dst);
    622 	    dst++;
    623 	}
    624     }
    625 }
    626 
    627 static void
    628 fast_composite_src_x888_8888 (pixman_implementation_t *imp,
    629 			      pixman_composite_info_t *info)
    630 {
    631     PIXMAN_COMPOSITE_ARGS (info);
    632     uint32_t    *dst_line, *dst;
    633     uint32_t    *src_line, *src;
    634     int dst_stride, src_stride;
    635     int32_t w;
    636 
    637     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    638     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    639 
    640     while (height--)
    641     {
    642 	dst = dst_line;
    643 	dst_line += dst_stride;
    644 	src = src_line;
    645 	src_line += src_stride;
    646 	w = width;
    647 
    648 	while (w--)
    649 	    *dst++ = (*src++) | 0xff000000;
    650     }
    651 }
    652 
    653 #if 0
    654 static void
    655 fast_composite_over_8888_0888 (pixman_implementation_t *imp,
    656 			       pixman_composite_info_t *info)
    657 {
    658     PIXMAN_COMPOSITE_ARGS (info);
    659     uint8_t     *dst_line, *dst;
    660     uint32_t d;
    661     uint32_t    *src_line, *src, s;
    662     uint8_t a;
    663     int dst_stride, src_stride;
    664     int32_t w;
    665 
    666     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 3);
    667     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    668 
    669     while (height--)
    670     {
    671 	dst = dst_line;
    672 	dst_line += dst_stride;
    673 	src = src_line;
    674 	src_line += src_stride;
    675 	w = width;
    676 
    677 	while (w--)
    678 	{
    679 	    s = *src++;
    680 	    a = s >> 24;
    681 	    if (a)
    682 	    {
    683 		if (a == 0xff)
    684 		    d = s;
    685 		else
    686 		    d = over (s, fetch_24 (dst));
    687 
    688 		store_24 (dst, d);
    689 	    }
    690 	    dst += 3;
    691 	}
    692     }
    693 }
    694 #endif
    695 
    696 static void
    697 fast_composite_over_8888_0565 (pixman_implementation_t *imp,
    698                                pixman_composite_info_t *info)
    699 {
    700     PIXMAN_COMPOSITE_ARGS (info);
    701     uint16_t    *dst_line, *dst;
    702     uint32_t d;
    703     uint32_t    *src_line, *src, s;
    704     uint8_t a;
    705     int dst_stride, src_stride;
    706     int32_t w;
    707 
    708     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    709     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    710 
    711     while (height--)
    712     {
    713 	dst = dst_line;
    714 	dst_line += dst_stride;
    715 	src = src_line;
    716 	src_line += src_stride;
    717 	w = width;
    718 
    719 	while (w--)
    720 	{
    721 	    s = *src++;
    722 	    a = s >> 24;
    723 	    if (s)
    724 	    {
    725 		if (a == 0xff)
    726 		{
    727 		    d = s;
    728 		}
    729 		else
    730 		{
    731 		    d = *dst;
    732 		    d = over (s, convert_0565_to_0888 (d));
    733 		}
    734 		*dst = convert_8888_to_0565 (d);
    735 	    }
    736 	    dst++;
    737 	}
    738     }
    739 }
    740 
    741 static void
    742 fast_composite_add_8_8 (pixman_implementation_t *imp,
    743 			pixman_composite_info_t *info)
    744 {
    745     PIXMAN_COMPOSITE_ARGS (info);
    746     uint8_t     *dst_line, *dst;
    747     uint8_t     *src_line, *src;
    748     int dst_stride, src_stride;
    749     int32_t w;
    750     uint8_t s, d;
    751     uint16_t t;
    752 
    753     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint8_t, src_stride, src_line, 1);
    754     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
    755 
    756     while (height--)
    757     {
    758 	dst = dst_line;
    759 	dst_line += dst_stride;
    760 	src = src_line;
    761 	src_line += src_stride;
    762 	w = width;
    763 
    764 	while (w--)
    765 	{
    766 	    s = *src++;
    767 	    if (s)
    768 	    {
    769 		if (s != 0xff)
    770 		{
    771 		    d = *dst;
    772 		    t = d + s;
    773 		    s = t | (0 - (t >> 8));
    774 		}
    775 		*dst = s;
    776 	    }
    777 	    dst++;
    778 	}
    779     }
    780 }
    781 
    782 static void
    783 fast_composite_add_0565_0565 (pixman_implementation_t *imp,
    784                               pixman_composite_info_t *info)
    785 {
    786     PIXMAN_COMPOSITE_ARGS (info);
    787     uint16_t    *dst_line, *dst;
    788     uint32_t	d;
    789     uint16_t    *src_line, *src;
    790     uint32_t	s;
    791     int dst_stride, src_stride;
    792     int32_t w;
    793 
    794     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint16_t, src_stride, src_line, 1);
    795     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t, dst_stride, dst_line, 1);
    796 
    797     while (height--)
    798     {
    799 	dst = dst_line;
    800 	dst_line += dst_stride;
    801 	src = src_line;
    802 	src_line += src_stride;
    803 	w = width;
    804 
    805 	while (w--)
    806 	{
    807 	    s = *src++;
    808 	    if (s)
    809 	    {
    810 		d = *dst;
    811 		s = convert_0565_to_8888 (s);
    812 		if (d)
    813 		{
    814 		    d = convert_0565_to_8888 (d);
    815 		    UN8x4_ADD_UN8x4 (s, d);
    816 		}
    817 		*dst = convert_8888_to_0565 (s);
    818 	    }
    819 	    dst++;
    820 	}
    821     }
    822 }
    823 
    824 static void
    825 fast_composite_add_8888_8888 (pixman_implementation_t *imp,
    826                               pixman_composite_info_t *info)
    827 {
    828     PIXMAN_COMPOSITE_ARGS (info);
    829     uint32_t    *dst_line, *dst;
    830     uint32_t    *src_line, *src;
    831     int dst_stride, src_stride;
    832     int32_t w;
    833     uint32_t s, d;
    834 
    835     PIXMAN_IMAGE_GET_LINE (src_image, src_x, src_y, uint32_t, src_stride, src_line, 1);
    836     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
    837 
    838     while (height--)
    839     {
    840 	dst = dst_line;
    841 	dst_line += dst_stride;
    842 	src = src_line;
    843 	src_line += src_stride;
    844 	w = width;
    845 
    846 	while (w--)
    847 	{
    848 	    s = *src++;
    849 	    if (s)
    850 	    {
    851 		if (s != 0xffffffff)
    852 		{
    853 		    d = *dst;
    854 		    if (d)
    855 			UN8x4_ADD_UN8x4 (s, d);
    856 		}
    857 		*dst = s;
    858 	    }
    859 	    dst++;
    860 	}
    861     }
    862 }
    863 
    864 static void
    865 fast_composite_add_n_8_8 (pixman_implementation_t *imp,
    866 			  pixman_composite_info_t *info)
    867 {
    868     PIXMAN_COMPOSITE_ARGS (info);
    869     uint8_t     *dst_line, *dst;
    870     uint8_t     *mask_line, *mask;
    871     int dst_stride, mask_stride;
    872     int32_t w;
    873     uint32_t src;
    874     uint8_t sa;
    875 
    876     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint8_t, dst_stride, dst_line, 1);
    877     PIXMAN_IMAGE_GET_LINE (mask_image, mask_x, mask_y, uint8_t, mask_stride, mask_line, 1);
    878     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    879     sa = (src >> 24);
    880 
    881     while (height--)
    882     {
    883 	dst = dst_line;
    884 	dst_line += dst_stride;
    885 	mask = mask_line;
    886 	mask_line += mask_stride;
    887 	w = width;
    888 
    889 	while (w--)
    890 	{
    891 	    uint16_t tmp;
    892 	    uint16_t a;
    893 	    uint32_t m, d;
    894 	    uint32_t r;
    895 
    896 	    a = *mask++;
    897 	    d = *dst;
    898 
    899 	    m = MUL_UN8 (sa, a, tmp);
    900 	    r = ADD_UN8 (m, d, tmp);
    901 
    902 	    *dst++ = r;
    903 	}
    904     }
    905 }
    906 
    907 #ifdef WORDS_BIGENDIAN
    908 #define CREATE_BITMASK(n) (0x80000000 >> (n))
    909 #define UPDATE_BITMASK(n) ((n) >> 1)
    910 #else
    911 #define CREATE_BITMASK(n) (1 << (n))
    912 #define UPDATE_BITMASK(n) ((n) << 1)
    913 #endif
    914 
    915 #define TEST_BIT(p, n)					\
    916     (*((p) + ((n) >> 5)) & CREATE_BITMASK ((n) & 31))
    917 #define SET_BIT(p, n)							\
    918     do { *((p) + ((n) >> 5)) |= CREATE_BITMASK ((n) & 31); } while (0);
    919 
    920 static void
    921 fast_composite_add_1_1 (pixman_implementation_t *imp,
    922 			pixman_composite_info_t *info)
    923 {
    924     PIXMAN_COMPOSITE_ARGS (info);
    925     uint32_t     *dst_line, *dst;
    926     uint32_t     *src_line, *src;
    927     int           dst_stride, src_stride;
    928     int32_t       w;
    929 
    930     PIXMAN_IMAGE_GET_LINE (src_image, 0, src_y, uint32_t,
    931                            src_stride, src_line, 1);
    932     PIXMAN_IMAGE_GET_LINE (dest_image, 0, dest_y, uint32_t,
    933                            dst_stride, dst_line, 1);
    934 
    935     while (height--)
    936     {
    937 	dst = dst_line;
    938 	dst_line += dst_stride;
    939 	src = src_line;
    940 	src_line += src_stride;
    941 	w = width;
    942 
    943 	while (w--)
    944 	{
    945 	    /*
    946 	     * TODO: improve performance by processing uint32_t data instead
    947 	     *       of individual bits
    948 	     */
    949 	    if (TEST_BIT (src, src_x + w))
    950 		SET_BIT (dst, dest_x + w);
    951 	}
    952     }
    953 }
    954 
    955 static void
    956 fast_composite_over_n_1_8888 (pixman_implementation_t *imp,
    957                               pixman_composite_info_t *info)
    958 {
    959     PIXMAN_COMPOSITE_ARGS (info);
    960     uint32_t     src, srca;
    961     uint32_t    *dst, *dst_line;
    962     uint32_t    *mask, *mask_line;
    963     int          mask_stride, dst_stride;
    964     uint32_t     bitcache, bitmask;
    965     int32_t      w;
    966 
    967     if (width <= 0)
    968 	return;
    969 
    970     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
    971     srca = src >> 24;
    972     if (src == 0)
    973 	return;
    974 
    975     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t,
    976                            dst_stride, dst_line, 1);
    977     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
    978                            mask_stride, mask_line, 1);
    979     mask_line += mask_x >> 5;
    980 
    981     if (srca == 0xff)
    982     {
    983 	while (height--)
    984 	{
    985 	    dst = dst_line;
    986 	    dst_line += dst_stride;
    987 	    mask = mask_line;
    988 	    mask_line += mask_stride;
    989 	    w = width;
    990 
    991 	    bitcache = *mask++;
    992 	    bitmask = CREATE_BITMASK (mask_x & 31);
    993 
    994 	    while (w--)
    995 	    {
    996 		if (bitmask == 0)
    997 		{
    998 		    bitcache = *mask++;
    999 		    bitmask = CREATE_BITMASK (0);
   1000 		}
   1001 		if (bitcache & bitmask)
   1002 		    *dst = src;
   1003 		bitmask = UPDATE_BITMASK (bitmask);
   1004 		dst++;
   1005 	    }
   1006 	}
   1007     }
   1008     else
   1009     {
   1010 	while (height--)
   1011 	{
   1012 	    dst = dst_line;
   1013 	    dst_line += dst_stride;
   1014 	    mask = mask_line;
   1015 	    mask_line += mask_stride;
   1016 	    w = width;
   1017 
   1018 	    bitcache = *mask++;
   1019 	    bitmask = CREATE_BITMASK (mask_x & 31);
   1020 
   1021 	    while (w--)
   1022 	    {
   1023 		if (bitmask == 0)
   1024 		{
   1025 		    bitcache = *mask++;
   1026 		    bitmask = CREATE_BITMASK (0);
   1027 		}
   1028 		if (bitcache & bitmask)
   1029 		    *dst = over (src, *dst);
   1030 		bitmask = UPDATE_BITMASK (bitmask);
   1031 		dst++;
   1032 	    }
   1033 	}
   1034     }
   1035 }
   1036 
   1037 static void
   1038 fast_composite_over_n_1_0565 (pixman_implementation_t *imp,
   1039                               pixman_composite_info_t *info)
   1040 {
   1041     PIXMAN_COMPOSITE_ARGS (info);
   1042     uint32_t     src, srca;
   1043     uint16_t    *dst, *dst_line;
   1044     uint32_t    *mask, *mask_line;
   1045     int          mask_stride, dst_stride;
   1046     uint32_t     bitcache, bitmask;
   1047     int32_t      w;
   1048     uint32_t     d;
   1049     uint16_t     src565;
   1050 
   1051     if (width <= 0)
   1052 	return;
   1053 
   1054     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1055     srca = src >> 24;
   1056     if (src == 0)
   1057 	return;
   1058 
   1059     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint16_t,
   1060                            dst_stride, dst_line, 1);
   1061     PIXMAN_IMAGE_GET_LINE (mask_image, 0, mask_y, uint32_t,
   1062                            mask_stride, mask_line, 1);
   1063     mask_line += mask_x >> 5;
   1064 
   1065     if (srca == 0xff)
   1066     {
   1067 	src565 = convert_8888_to_0565 (src);
   1068 	while (height--)
   1069 	{
   1070 	    dst = dst_line;
   1071 	    dst_line += dst_stride;
   1072 	    mask = mask_line;
   1073 	    mask_line += mask_stride;
   1074 	    w = width;
   1075 
   1076 	    bitcache = *mask++;
   1077 	    bitmask = CREATE_BITMASK (mask_x & 31);
   1078 
   1079 	    while (w--)
   1080 	    {
   1081 		if (bitmask == 0)
   1082 		{
   1083 		    bitcache = *mask++;
   1084 		    bitmask = CREATE_BITMASK (0);
   1085 		}
   1086 		if (bitcache & bitmask)
   1087 		    *dst = src565;
   1088 		bitmask = UPDATE_BITMASK (bitmask);
   1089 		dst++;
   1090 	    }
   1091 	}
   1092     }
   1093     else
   1094     {
   1095 	while (height--)
   1096 	{
   1097 	    dst = dst_line;
   1098 	    dst_line += dst_stride;
   1099 	    mask = mask_line;
   1100 	    mask_line += mask_stride;
   1101 	    w = width;
   1102 
   1103 	    bitcache = *mask++;
   1104 	    bitmask = CREATE_BITMASK (mask_x & 31);
   1105 
   1106 	    while (w--)
   1107 	    {
   1108 		if (bitmask == 0)
   1109 		{
   1110 		    bitcache = *mask++;
   1111 		    bitmask = CREATE_BITMASK (0);
   1112 		}
   1113 		if (bitcache & bitmask)
   1114 		{
   1115 		    d = over (src, convert_0565_to_0888 (*dst));
   1116 		    *dst = convert_8888_to_0565 (d);
   1117 		}
   1118 		bitmask = UPDATE_BITMASK (bitmask);
   1119 		dst++;
   1120 	    }
   1121 	}
   1122     }
   1123 }
   1124 
   1125 /*
   1126  * Simple bitblt
   1127  */
   1128 
   1129 static void
   1130 fast_composite_solid_fill (pixman_implementation_t *imp,
   1131                            pixman_composite_info_t *info)
   1132 {
   1133     PIXMAN_COMPOSITE_ARGS (info);
   1134     uint32_t src;
   1135 
   1136     src = _pixman_image_get_solid (imp, src_image, dest_image->bits.format);
   1137 
   1138     if (dest_image->bits.format == PIXMAN_a1)
   1139     {
   1140 	src = src >> 31;
   1141     }
   1142     else if (dest_image->bits.format == PIXMAN_a8)
   1143     {
   1144 	src = src >> 24;
   1145     }
   1146     else if (dest_image->bits.format == PIXMAN_r5g6b5 ||
   1147              dest_image->bits.format == PIXMAN_b5g6r5)
   1148     {
   1149 	src = convert_8888_to_0565 (src);
   1150     }
   1151 
   1152     pixman_fill (dest_image->bits.bits, dest_image->bits.rowstride,
   1153                  PIXMAN_FORMAT_BPP (dest_image->bits.format),
   1154                  dest_x, dest_y,
   1155                  width, height,
   1156                  src);
   1157 }
   1158 
   1159 static void
   1160 fast_composite_src_memcpy (pixman_implementation_t *imp,
   1161 			   pixman_composite_info_t *info)
   1162 {
   1163     PIXMAN_COMPOSITE_ARGS (info);
   1164     int bpp = PIXMAN_FORMAT_BPP (dest_image->bits.format) / 8;
   1165     uint32_t n_bytes = width * bpp;
   1166     int dst_stride, src_stride;
   1167     uint8_t    *dst;
   1168     uint8_t    *src;
   1169 
   1170     src_stride = src_image->bits.rowstride * 4;
   1171     dst_stride = dest_image->bits.rowstride * 4;
   1172 
   1173     src = (uint8_t *)src_image->bits.bits + src_y * src_stride + src_x * bpp;
   1174     dst = (uint8_t *)dest_image->bits.bits + dest_y * dst_stride + dest_x * bpp;
   1175 
   1176     while (height--)
   1177     {
   1178 	memcpy (dst, src, n_bytes);
   1179 
   1180 	dst += dst_stride;
   1181 	src += src_stride;
   1182     }
   1183 }
   1184 
   1185 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, SRC, COVER)
   1186 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, SRC, NONE)
   1187 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, SRC, PAD)
   1188 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, SRC, NORMAL)
   1189 FAST_NEAREST (x888_8888_cover, x888, 8888, uint32_t, uint32_t, SRC, COVER)
   1190 FAST_NEAREST (x888_8888_pad, x888, 8888, uint32_t, uint32_t, SRC, PAD)
   1191 FAST_NEAREST (x888_8888_normal, x888, 8888, uint32_t, uint32_t, SRC, NORMAL)
   1192 FAST_NEAREST (8888_8888_cover, 8888, 8888, uint32_t, uint32_t, OVER, COVER)
   1193 FAST_NEAREST (8888_8888_none, 8888, 8888, uint32_t, uint32_t, OVER, NONE)
   1194 FAST_NEAREST (8888_8888_pad, 8888, 8888, uint32_t, uint32_t, OVER, PAD)
   1195 FAST_NEAREST (8888_8888_normal, 8888, 8888, uint32_t, uint32_t, OVER, NORMAL)
   1196 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, SRC, COVER)
   1197 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, SRC, NONE)
   1198 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, SRC, PAD)
   1199 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, SRC, NORMAL)
   1200 FAST_NEAREST (565_565_normal, 0565, 0565, uint16_t, uint16_t, SRC, NORMAL)
   1201 FAST_NEAREST (8888_565_cover, 8888, 0565, uint32_t, uint16_t, OVER, COVER)
   1202 FAST_NEAREST (8888_565_none, 8888, 0565, uint32_t, uint16_t, OVER, NONE)
   1203 FAST_NEAREST (8888_565_pad, 8888, 0565, uint32_t, uint16_t, OVER, PAD)
   1204 FAST_NEAREST (8888_565_normal, 8888, 0565, uint32_t, uint16_t, OVER, NORMAL)
   1205 
   1206 #define REPEAT_MIN_WIDTH    32
   1207 
   1208 static void
   1209 fast_composite_tiled_repeat (pixman_implementation_t *imp,
   1210 			     pixman_composite_info_t *info)
   1211 {
   1212     PIXMAN_COMPOSITE_ARGS (info);
   1213     pixman_composite_func_t func;
   1214     pixman_format_code_t mask_format;
   1215     uint32_t src_flags, mask_flags;
   1216     int32_t sx, sy;
   1217     int32_t width_remain;
   1218     int32_t num_pixels;
   1219     int32_t src_width;
   1220     int32_t i, j;
   1221     pixman_image_t extended_src_image;
   1222     uint32_t extended_src[REPEAT_MIN_WIDTH * 2];
   1223     pixman_bool_t need_src_extension;
   1224     uint32_t *src_line;
   1225     int32_t src_stride;
   1226     int32_t src_bpp;
   1227     pixman_composite_info_t info2 = *info;
   1228 
   1229     src_flags = (info->src_flags & ~FAST_PATH_NORMAL_REPEAT) |
   1230 		    FAST_PATH_SAMPLES_COVER_CLIP_NEAREST;
   1231 
   1232     if (mask_image)
   1233     {
   1234 	mask_format = mask_image->common.extended_format_code;
   1235 	mask_flags = info->mask_flags;
   1236     }
   1237     else
   1238     {
   1239 	mask_format = PIXMAN_null;
   1240 	mask_flags = FAST_PATH_IS_OPAQUE;
   1241     }
   1242 
   1243     _pixman_implementation_lookup_composite (
   1244 	imp->toplevel, info->op,
   1245 	src_image->common.extended_format_code, src_flags,
   1246 	mask_format, mask_flags,
   1247 	dest_image->common.extended_format_code, info->dest_flags,
   1248 	&imp, &func);
   1249 
   1250     src_bpp = PIXMAN_FORMAT_BPP (src_image->bits.format);
   1251 
   1252     if (src_image->bits.width < REPEAT_MIN_WIDTH		&&
   1253 	(src_bpp == 32 || src_bpp == 16 || src_bpp == 8)	&&
   1254 	!src_image->bits.indexed)
   1255     {
   1256 	sx = src_x;
   1257 	sx = MOD (sx, src_image->bits.width);
   1258 	sx += width;
   1259 	src_width = 0;
   1260 
   1261 	while (src_width < REPEAT_MIN_WIDTH && src_width <= sx)
   1262 	    src_width += src_image->bits.width;
   1263 
   1264 	src_stride = (src_width * (src_bpp >> 3) + 3) / (int) sizeof (uint32_t);
   1265 
   1266 	/* Initialize/validate stack-allocated temporary image */
   1267 	_pixman_bits_image_init (&extended_src_image, src_image->bits.format,
   1268 				 src_width, 1, &extended_src[0], src_stride,
   1269 				 FALSE);
   1270 	_pixman_image_validate (&extended_src_image);
   1271 
   1272 	info2.src_image = &extended_src_image;
   1273 	need_src_extension = TRUE;
   1274     }
   1275     else
   1276     {
   1277 	src_width = src_image->bits.width;
   1278 	need_src_extension = FALSE;
   1279     }
   1280 
   1281     sx = src_x;
   1282     sy = src_y;
   1283 
   1284     while (--height >= 0)
   1285     {
   1286 	sx = MOD (sx, src_width);
   1287 	sy = MOD (sy, src_image->bits.height);
   1288 
   1289 	if (need_src_extension)
   1290 	{
   1291 	    if (src_bpp == 32)
   1292 	    {
   1293 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint32_t, src_stride, src_line, 1);
   1294 
   1295 		for (i = 0; i < src_width; )
   1296 		{
   1297 		    for (j = 0; j < src_image->bits.width; j++, i++)
   1298 			extended_src[i] = src_line[j];
   1299 		}
   1300 	    }
   1301 	    else if (src_bpp == 16)
   1302 	    {
   1303 		uint16_t *src_line_16;
   1304 
   1305 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint16_t, src_stride,
   1306 				       src_line_16, 1);
   1307 		src_line = (uint32_t*)src_line_16;
   1308 
   1309 		for (i = 0; i < src_width; )
   1310 		{
   1311 		    for (j = 0; j < src_image->bits.width; j++, i++)
   1312 			((uint16_t*)extended_src)[i] = ((uint16_t*)src_line)[j];
   1313 		}
   1314 	    }
   1315 	    else if (src_bpp == 8)
   1316 	    {
   1317 		uint8_t *src_line_8;
   1318 
   1319 		PIXMAN_IMAGE_GET_LINE (src_image, 0, sy, uint8_t, src_stride,
   1320 				       src_line_8, 1);
   1321 		src_line = (uint32_t*)src_line_8;
   1322 
   1323 		for (i = 0; i < src_width; )
   1324 		{
   1325 		    for (j = 0; j < src_image->bits.width; j++, i++)
   1326 			((uint8_t*)extended_src)[i] = ((uint8_t*)src_line)[j];
   1327 		}
   1328 	    }
   1329 
   1330 	    info2.src_y = 0;
   1331 	}
   1332 	else
   1333 	{
   1334 	    info2.src_y = sy;
   1335 	}
   1336 
   1337 	width_remain = width;
   1338 
   1339 	while (width_remain > 0)
   1340 	{
   1341 	    num_pixels = src_width - sx;
   1342 
   1343 	    if (num_pixels > width_remain)
   1344 		num_pixels = width_remain;
   1345 
   1346 	    info2.src_x = sx;
   1347 	    info2.width = num_pixels;
   1348 	    info2.height = 1;
   1349 
   1350 	    func (imp, &info2);
   1351 
   1352 	    width_remain -= num_pixels;
   1353 	    info2.mask_x += num_pixels;
   1354 	    info2.dest_x += num_pixels;
   1355 	    sx = 0;
   1356 	}
   1357 
   1358 	sx = src_x;
   1359 	sy++;
   1360 	info2.mask_x = info->mask_x;
   1361 	info2.mask_y++;
   1362 	info2.dest_x = info->dest_x;
   1363 	info2.dest_y++;
   1364     }
   1365 
   1366     if (need_src_extension)
   1367 	_pixman_image_fini (&extended_src_image);
   1368 }
   1369 
   1370 /* Use more unrolling for src_0565_0565 because it is typically CPU bound */
   1371 static force_inline void
   1372 scaled_nearest_scanline_565_565_SRC (uint16_t *       dst,
   1373 				     const uint16_t * src,
   1374 				     int32_t          w,
   1375 				     pixman_fixed_t   vx,
   1376 				     pixman_fixed_t   unit_x,
   1377 				     pixman_fixed_t   max_vx,
   1378 				     pixman_bool_t    fully_transparent_src)
   1379 {
   1380     uint16_t tmp1, tmp2, tmp3, tmp4;
   1381     while ((w -= 4) >= 0)
   1382     {
   1383 	tmp1 = *(src + pixman_fixed_to_int (vx));
   1384 	vx += unit_x;
   1385 	tmp2 = *(src + pixman_fixed_to_int (vx));
   1386 	vx += unit_x;
   1387 	tmp3 = *(src + pixman_fixed_to_int (vx));
   1388 	vx += unit_x;
   1389 	tmp4 = *(src + pixman_fixed_to_int (vx));
   1390 	vx += unit_x;
   1391 	*dst++ = tmp1;
   1392 	*dst++ = tmp2;
   1393 	*dst++ = tmp3;
   1394 	*dst++ = tmp4;
   1395     }
   1396     if (w & 2)
   1397     {
   1398 	tmp1 = *(src + pixman_fixed_to_int (vx));
   1399 	vx += unit_x;
   1400 	tmp2 = *(src + pixman_fixed_to_int (vx));
   1401 	vx += unit_x;
   1402 	*dst++ = tmp1;
   1403 	*dst++ = tmp2;
   1404     }
   1405     if (w & 1)
   1406 	*dst = *(src + pixman_fixed_to_int (vx));
   1407 }
   1408 
   1409 FAST_NEAREST_MAINLOOP (565_565_cover_SRC,
   1410 		       scaled_nearest_scanline_565_565_SRC,
   1411 		       uint16_t, uint16_t, COVER)
   1412 FAST_NEAREST_MAINLOOP (565_565_none_SRC,
   1413 		       scaled_nearest_scanline_565_565_SRC,
   1414 		       uint16_t, uint16_t, NONE)
   1415 FAST_NEAREST_MAINLOOP (565_565_pad_SRC,
   1416 		       scaled_nearest_scanline_565_565_SRC,
   1417 		       uint16_t, uint16_t, PAD)
   1418 
   1419 static force_inline uint32_t
   1420 fetch_nearest (pixman_repeat_t src_repeat,
   1421 	       pixman_format_code_t format,
   1422 	       uint32_t *src, int x, int src_width)
   1423 {
   1424     if (repeat (src_repeat, &x, src_width))
   1425     {
   1426 	if (format == PIXMAN_x8r8g8b8 || format == PIXMAN_x8b8g8r8)
   1427 	    return *(src + x) | 0xff000000;
   1428 	else
   1429 	    return *(src + x);
   1430     }
   1431     else
   1432     {
   1433 	return 0;
   1434     }
   1435 }
   1436 
   1437 static force_inline void
   1438 combine_over (uint32_t s, uint32_t *dst)
   1439 {
   1440     if (s)
   1441     {
   1442 	uint8_t ia = 0xff - (s >> 24);
   1443 
   1444 	if (ia)
   1445 	    UN8x4_MUL_UN8_ADD_UN8x4 (*dst, ia, s);
   1446 	else
   1447 	    *dst = s;
   1448     }
   1449 }
   1450 
   1451 static force_inline void
   1452 combine_src (uint32_t s, uint32_t *dst)
   1453 {
   1454     *dst = s;
   1455 }
   1456 
   1457 static void
   1458 fast_composite_scaled_nearest (pixman_implementation_t *imp,
   1459 			       pixman_composite_info_t *info)
   1460 {
   1461     PIXMAN_COMPOSITE_ARGS (info);
   1462     uint32_t       *dst_line;
   1463     uint32_t       *src_line;
   1464     int             dst_stride, src_stride;
   1465     int		    src_width, src_height;
   1466     pixman_repeat_t src_repeat;
   1467     pixman_fixed_t unit_x, unit_y;
   1468     pixman_format_code_t src_format;
   1469     pixman_vector_t v;
   1470     pixman_fixed_t vy;
   1471 
   1472     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, uint32_t, dst_stride, dst_line, 1);
   1473     /* pass in 0 instead of src_x and src_y because src_x and src_y need to be
   1474      * transformed from destination space to source space
   1475      */
   1476     PIXMAN_IMAGE_GET_LINE (src_image, 0, 0, uint32_t, src_stride, src_line, 1);
   1477 
   1478     /* reference point is the center of the pixel */
   1479     v.vector[0] = pixman_int_to_fixed (src_x) + pixman_fixed_1 / 2;
   1480     v.vector[1] = pixman_int_to_fixed (src_y) + pixman_fixed_1 / 2;
   1481     v.vector[2] = pixman_fixed_1;
   1482 
   1483     if (!pixman_transform_point_3d (src_image->common.transform, &v))
   1484 	return;
   1485 
   1486     unit_x = src_image->common.transform->matrix[0][0];
   1487     unit_y = src_image->common.transform->matrix[1][1];
   1488 
   1489     /* Round down to closest integer, ensuring that 0.5 rounds to 0, not 1 */
   1490     v.vector[0] -= pixman_fixed_e;
   1491     v.vector[1] -= pixman_fixed_e;
   1492 
   1493     src_height = src_image->bits.height;
   1494     src_width = src_image->bits.width;
   1495     src_repeat = src_image->common.repeat;
   1496     src_format = src_image->bits.format;
   1497 
   1498     vy = v.vector[1];
   1499     while (height--)
   1500     {
   1501         pixman_fixed_t vx = v.vector[0];
   1502 	int y = pixman_fixed_to_int (vy);
   1503 	uint32_t *dst = dst_line;
   1504 
   1505 	dst_line += dst_stride;
   1506 
   1507         /* adjust the y location by a unit vector in the y direction
   1508          * this is equivalent to transforming y+1 of the destination point to source space */
   1509         vy += unit_y;
   1510 
   1511 	if (!repeat (src_repeat, &y, src_height))
   1512 	{
   1513 	    if (op == PIXMAN_OP_SRC)
   1514 		memset (dst, 0, sizeof (*dst) * width);
   1515 	}
   1516 	else
   1517 	{
   1518 	    int w = width;
   1519 
   1520 	    uint32_t *src = src_line + y * src_stride;
   1521 
   1522 	    while (w >= 2)
   1523 	    {
   1524 		uint32_t s1, s2;
   1525 		int x1, x2;
   1526 
   1527 		x1 = pixman_fixed_to_int (vx);
   1528 		vx += unit_x;
   1529 
   1530 		x2 = pixman_fixed_to_int (vx);
   1531 		vx += unit_x;
   1532 
   1533 		w -= 2;
   1534 
   1535 		s1 = fetch_nearest (src_repeat, src_format, src, x1, src_width);
   1536 		s2 = fetch_nearest (src_repeat, src_format, src, x2, src_width);
   1537 
   1538 		if (op == PIXMAN_OP_OVER)
   1539 		{
   1540 		    combine_over (s1, dst++);
   1541 		    combine_over (s2, dst++);
   1542 		}
   1543 		else
   1544 		{
   1545 		    combine_src (s1, dst++);
   1546 		    combine_src (s2, dst++);
   1547 		}
   1548 	    }
   1549 
   1550 	    while (w--)
   1551 	    {
   1552 		uint32_t s;
   1553 		int x;
   1554 
   1555 		x = pixman_fixed_to_int (vx);
   1556 		vx += unit_x;
   1557 
   1558 		s = fetch_nearest (src_repeat, src_format, src, x, src_width);
   1559 
   1560 		if (op == PIXMAN_OP_OVER)
   1561 		    combine_over (s, dst++);
   1562 		else
   1563 		    combine_src (s, dst++);
   1564 	    }
   1565 	}
   1566     }
   1567 }
   1568 
   1569 #define CACHE_LINE_SIZE 64
   1570 
   1571 #define FAST_SIMPLE_ROTATE(suffix, pix_type)                                  \
   1572                                                                               \
   1573 static void                                                                   \
   1574 blt_rotated_90_trivial_##suffix (pix_type       *dst,                         \
   1575 				 int             dst_stride,                  \
   1576 				 const pix_type *src,                         \
   1577 				 int             src_stride,                  \
   1578 				 int             w,                           \
   1579 				 int             h)                           \
   1580 {                                                                             \
   1581     int x, y;                                                                 \
   1582     for (y = 0; y < h; y++)                                                   \
   1583     {                                                                         \
   1584 	const pix_type *s = src + (h - y - 1);                                \
   1585 	pix_type *d = dst + dst_stride * y;                                   \
   1586 	for (x = 0; x < w; x++)                                               \
   1587 	{                                                                     \
   1588 	    *d++ = *s;                                                        \
   1589 	    s += src_stride;                                                  \
   1590 	}                                                                     \
   1591     }                                                                         \
   1592 }                                                                             \
   1593                                                                               \
   1594 static void                                                                   \
   1595 blt_rotated_270_trivial_##suffix (pix_type       *dst,                        \
   1596 				  int             dst_stride,                 \
   1597 				  const pix_type *src,                        \
   1598 				  int             src_stride,                 \
   1599 				  int             w,                          \
   1600 				  int             h)                          \
   1601 {                                                                             \
   1602     int x, y;                                                                 \
   1603     for (y = 0; y < h; y++)                                                   \
   1604     {                                                                         \
   1605 	const pix_type *s = src + src_stride * (w - 1) + y;                   \
   1606 	pix_type *d = dst + dst_stride * y;                                   \
   1607 	for (x = 0; x < w; x++)                                               \
   1608 	{                                                                     \
   1609 	    *d++ = *s;                                                        \
   1610 	    s -= src_stride;                                                  \
   1611 	}                                                                     \
   1612     }                                                                         \
   1613 }                                                                             \
   1614                                                                               \
   1615 static void                                                                   \
   1616 blt_rotated_90_##suffix (pix_type       *dst,                                 \
   1617 			 int             dst_stride,                          \
   1618 			 const pix_type *src,                                 \
   1619 			 int             src_stride,                          \
   1620 			 int             W,                                   \
   1621 			 int             H)                                   \
   1622 {                                                                             \
   1623     int x;                                                                    \
   1624     int leading_pixels = 0, trailing_pixels = 0;                              \
   1625     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
   1626                                                                               \
   1627     /*                                                                        \
   1628      * split processing into handling destination as TILE_SIZExH cache line   \
   1629      * aligned vertical stripes (optimistically assuming that destination     \
   1630      * stride is a multiple of cache line, if not - it will be just a bit     \
   1631      * slower)                                                                \
   1632      */                                                                       \
   1633                                                                               \
   1634     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
   1635     {                                                                         \
   1636 	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
   1637 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
   1638 	if (leading_pixels > W)                                               \
   1639 	    leading_pixels = W;                                               \
   1640                                                                               \
   1641 	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
   1642 	blt_rotated_90_trivial_##suffix (                                     \
   1643 	    dst,                                                              \
   1644 	    dst_stride,                                                       \
   1645 	    src,                                                              \
   1646 	    src_stride,                                                       \
   1647 	    leading_pixels,                                                   \
   1648 	    H);                                                               \
   1649 	                                                                      \
   1650 	dst += leading_pixels;                                                \
   1651 	src += leading_pixels * src_stride;                                   \
   1652 	W -= leading_pixels;                                                  \
   1653     }                                                                         \
   1654                                                                               \
   1655     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
   1656     {                                                                         \
   1657 	trailing_pixels = (((uintptr_t)(dst + W) &                            \
   1658 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
   1659 	if (trailing_pixels > W)                                              \
   1660 	    trailing_pixels = W;                                              \
   1661 	W -= trailing_pixels;                                                 \
   1662     }                                                                         \
   1663                                                                               \
   1664     for (x = 0; x < W; x += TILE_SIZE)                                        \
   1665     {                                                                         \
   1666 	/* aligned middle part TILE_SIZExH */                                 \
   1667 	blt_rotated_90_trivial_##suffix (                                     \
   1668 	    dst + x,                                                          \
   1669 	    dst_stride,                                                       \
   1670 	    src + src_stride * x,                                             \
   1671 	    src_stride,                                                       \
   1672 	    TILE_SIZE,                                                        \
   1673 	    H);                                                               \
   1674     }                                                                         \
   1675                                                                               \
   1676     if (trailing_pixels)                                                      \
   1677     {                                                                         \
   1678 	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
   1679 	blt_rotated_90_trivial_##suffix (                                     \
   1680 	    dst + W,                                                          \
   1681 	    dst_stride,                                                       \
   1682 	    src + W * src_stride,                                             \
   1683 	    src_stride,                                                       \
   1684 	    trailing_pixels,                                                  \
   1685 	    H);                                                               \
   1686     }                                                                         \
   1687 }                                                                             \
   1688                                                                               \
   1689 static void                                                                   \
   1690 blt_rotated_270_##suffix (pix_type       *dst,                                \
   1691 			  int             dst_stride,                         \
   1692 			  const pix_type *src,                                \
   1693 			  int             src_stride,                         \
   1694 			  int             W,                                  \
   1695 			  int             H)                                  \
   1696 {                                                                             \
   1697     int x;                                                                    \
   1698     int leading_pixels = 0, trailing_pixels = 0;                              \
   1699     const int TILE_SIZE = CACHE_LINE_SIZE / sizeof(pix_type);                 \
   1700                                                                               \
   1701     /*                                                                        \
   1702      * split processing into handling destination as TILE_SIZExH cache line   \
   1703      * aligned vertical stripes (optimistically assuming that destination     \
   1704      * stride is a multiple of cache line, if not - it will be just a bit     \
   1705      * slower)                                                                \
   1706      */                                                                       \
   1707                                                                               \
   1708     if ((uintptr_t)dst & (CACHE_LINE_SIZE - 1))                               \
   1709     {                                                                         \
   1710 	leading_pixels = TILE_SIZE - (((uintptr_t)dst &                       \
   1711 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
   1712 	if (leading_pixels > W)                                               \
   1713 	    leading_pixels = W;                                               \
   1714                                                                               \
   1715 	/* unaligned leading part NxH (where N < TILE_SIZE) */                \
   1716 	blt_rotated_270_trivial_##suffix (                                    \
   1717 	    dst,                                                              \
   1718 	    dst_stride,                                                       \
   1719 	    src + src_stride * (W - leading_pixels),                          \
   1720 	    src_stride,                                                       \
   1721 	    leading_pixels,                                                   \
   1722 	    H);                                                               \
   1723 	                                                                      \
   1724 	dst += leading_pixels;                                                \
   1725 	W -= leading_pixels;                                                  \
   1726     }                                                                         \
   1727                                                                               \
   1728     if ((uintptr_t)(dst + W) & (CACHE_LINE_SIZE - 1))                         \
   1729     {                                                                         \
   1730 	trailing_pixels = (((uintptr_t)(dst + W) &                            \
   1731 			    (CACHE_LINE_SIZE - 1)) / sizeof(pix_type));       \
   1732 	if (trailing_pixels > W)                                              \
   1733 	    trailing_pixels = W;                                              \
   1734 	W -= trailing_pixels;                                                 \
   1735 	src += trailing_pixels * src_stride;                                  \
   1736     }                                                                         \
   1737                                                                               \
   1738     for (x = 0; x < W; x += TILE_SIZE)                                        \
   1739     {                                                                         \
   1740 	/* aligned middle part TILE_SIZExH */                                 \
   1741 	blt_rotated_270_trivial_##suffix (                                    \
   1742 	    dst + x,                                                          \
   1743 	    dst_stride,                                                       \
   1744 	    src + src_stride * (W - x - TILE_SIZE),                           \
   1745 	    src_stride,                                                       \
   1746 	    TILE_SIZE,                                                        \
   1747 	    H);                                                               \
   1748     }                                                                         \
   1749                                                                               \
   1750     if (trailing_pixels)                                                      \
   1751     {                                                                         \
   1752 	/* unaligned trailing part NxH (where N < TILE_SIZE) */               \
   1753 	blt_rotated_270_trivial_##suffix (                                    \
   1754 	    dst + W,                                                          \
   1755 	    dst_stride,                                                       \
   1756 	    src - trailing_pixels * src_stride,                               \
   1757 	    src_stride,                                                       \
   1758 	    trailing_pixels,                                                  \
   1759 	    H);                                                               \
   1760     }                                                                         \
   1761 }                                                                             \
   1762                                                                               \
   1763 static void                                                                   \
   1764 fast_composite_rotate_90_##suffix (pixman_implementation_t *imp,              \
   1765 				   pixman_composite_info_t *info)	      \
   1766 {									      \
   1767     PIXMAN_COMPOSITE_ARGS (info);					      \
   1768     pix_type       *dst_line;						      \
   1769     pix_type       *src_line;                                                 \
   1770     int             dst_stride, src_stride;                                   \
   1771     int             src_x_t, src_y_t;                                         \
   1772                                                                               \
   1773     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
   1774 			   dst_stride, dst_line, 1);                          \
   1775     src_x_t = -src_y + pixman_fixed_to_int (                                  \
   1776 				src_image->common.transform->matrix[0][2] +   \
   1777 				pixman_fixed_1 / 2 - pixman_fixed_e) - height;\
   1778     src_y_t = src_x + pixman_fixed_to_int (                                   \
   1779 				src_image->common.transform->matrix[1][2] +   \
   1780 				pixman_fixed_1 / 2 - pixman_fixed_e);         \
   1781     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
   1782 			   src_stride, src_line, 1);                          \
   1783     blt_rotated_90_##suffix (dst_line, dst_stride, src_line, src_stride,      \
   1784 			     width, height);                                  \
   1785 }                                                                             \
   1786                                                                               \
   1787 static void                                                                   \
   1788 fast_composite_rotate_270_##suffix (pixman_implementation_t *imp,             \
   1789 				    pixman_composite_info_t *info)            \
   1790 {                                                                             \
   1791     PIXMAN_COMPOSITE_ARGS (info);					      \
   1792     pix_type       *dst_line;						      \
   1793     pix_type       *src_line;                                                 \
   1794     int             dst_stride, src_stride;                                   \
   1795     int             src_x_t, src_y_t;                                         \
   1796                                                                               \
   1797     PIXMAN_IMAGE_GET_LINE (dest_image, dest_x, dest_y, pix_type,              \
   1798 			   dst_stride, dst_line, 1);                          \
   1799     src_x_t = src_y + pixman_fixed_to_int (                                   \
   1800 				src_image->common.transform->matrix[0][2] +   \
   1801 				pixman_fixed_1 / 2 - pixman_fixed_e);         \
   1802     src_y_t = -src_x + pixman_fixed_to_int (                                  \
   1803 				src_image->common.transform->matrix[1][2] +   \
   1804 				pixman_fixed_1 / 2 - pixman_fixed_e) - width; \
   1805     PIXMAN_IMAGE_GET_LINE (src_image, src_x_t, src_y_t, pix_type,             \
   1806 			   src_stride, src_line, 1);                          \
   1807     blt_rotated_270_##suffix (dst_line, dst_stride, src_line, src_stride,     \
   1808 			      width, height);                                 \
   1809 }
   1810 
   1811 FAST_SIMPLE_ROTATE (8, uint8_t)
   1812 FAST_SIMPLE_ROTATE (565, uint16_t)
   1813 FAST_SIMPLE_ROTATE (8888, uint32_t)
   1814 
   1815 static const pixman_fast_path_t c_fast_paths[] =
   1816 {
   1817     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r5g6b5, fast_composite_over_n_8_0565),
   1818     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b5g6r5, fast_composite_over_n_8_0565),
   1819     PIXMAN_STD_FAST_PATH (OVER, solid, a8, r8g8b8, fast_composite_over_n_8_0888),
   1820     PIXMAN_STD_FAST_PATH (OVER, solid, a8, b8g8r8, fast_composite_over_n_8_0888),
   1821     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8r8g8b8, fast_composite_over_n_8_8888),
   1822     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8r8g8b8, fast_composite_over_n_8_8888),
   1823     PIXMAN_STD_FAST_PATH (OVER, solid, a8, a8b8g8r8, fast_composite_over_n_8_8888),
   1824     PIXMAN_STD_FAST_PATH (OVER, solid, a8, x8b8g8r8, fast_composite_over_n_8_8888),
   1825     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8r8g8b8, fast_composite_over_n_1_8888),
   1826     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8r8g8b8, fast_composite_over_n_1_8888),
   1827     PIXMAN_STD_FAST_PATH (OVER, solid, a1, a8b8g8r8, fast_composite_over_n_1_8888),
   1828     PIXMAN_STD_FAST_PATH (OVER, solid, a1, x8b8g8r8, fast_composite_over_n_1_8888),
   1829     PIXMAN_STD_FAST_PATH (OVER, solid, a1, r5g6b5,   fast_composite_over_n_1_0565),
   1830     PIXMAN_STD_FAST_PATH (OVER, solid, a1, b5g6r5,   fast_composite_over_n_1_0565),
   1831     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, a8r8g8b8, fast_composite_over_n_8888_8888_ca),
   1832     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, x8r8g8b8, fast_composite_over_n_8888_8888_ca),
   1833     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8r8g8b8, r5g6b5, fast_composite_over_n_8888_0565_ca),
   1834     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, a8b8g8r8, fast_composite_over_n_8888_8888_ca),
   1835     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, x8b8g8r8, fast_composite_over_n_8888_8888_ca),
   1836     PIXMAN_STD_FAST_PATH_CA (OVER, solid, a8b8g8r8, b5g6r5, fast_composite_over_n_8888_0565_ca),
   1837     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, x8r8g8b8, fast_composite_over_x888_8_8888),
   1838     PIXMAN_STD_FAST_PATH (OVER, x8r8g8b8, a8, a8r8g8b8, fast_composite_over_x888_8_8888),
   1839     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, x8b8g8r8, fast_composite_over_x888_8_8888),
   1840     PIXMAN_STD_FAST_PATH (OVER, x8b8g8r8, a8, a8b8g8r8, fast_composite_over_x888_8_8888),
   1841     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, a8r8g8b8, fast_composite_over_8888_8888),
   1842     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, x8r8g8b8, fast_composite_over_8888_8888),
   1843     PIXMAN_STD_FAST_PATH (OVER, a8r8g8b8, null, r5g6b5, fast_composite_over_8888_0565),
   1844     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, a8b8g8r8, fast_composite_over_8888_8888),
   1845     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, x8b8g8r8, fast_composite_over_8888_8888),
   1846     PIXMAN_STD_FAST_PATH (OVER, a8b8g8r8, null, b5g6r5, fast_composite_over_8888_0565),
   1847     PIXMAN_STD_FAST_PATH (ADD, r5g6b5, null, r5g6b5, fast_composite_add_0565_0565),
   1848     PIXMAN_STD_FAST_PATH (ADD, b5g6r5, null, b5g6r5, fast_composite_add_0565_0565),
   1849     PIXMAN_STD_FAST_PATH (ADD, a8r8g8b8, null, a8r8g8b8, fast_composite_add_8888_8888),
   1850     PIXMAN_STD_FAST_PATH (ADD, a8b8g8r8, null, a8b8g8r8, fast_composite_add_8888_8888),
   1851     PIXMAN_STD_FAST_PATH (ADD, a8, null, a8, fast_composite_add_8_8),
   1852     PIXMAN_STD_FAST_PATH (ADD, a1, null, a1, fast_composite_add_1_1),
   1853     PIXMAN_STD_FAST_PATH_CA (ADD, solid, a8r8g8b8, a8r8g8b8, fast_composite_add_n_8888_8888_ca),
   1854     PIXMAN_STD_FAST_PATH (ADD, solid, a8, a8, fast_composite_add_n_8_8),
   1855     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8r8g8b8, fast_composite_solid_fill),
   1856     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8r8g8b8, fast_composite_solid_fill),
   1857     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8b8g8r8, fast_composite_solid_fill),
   1858     PIXMAN_STD_FAST_PATH (SRC, solid, null, x8b8g8r8, fast_composite_solid_fill),
   1859     PIXMAN_STD_FAST_PATH (SRC, solid, null, a1, fast_composite_solid_fill),
   1860     PIXMAN_STD_FAST_PATH (SRC, solid, null, a8, fast_composite_solid_fill),
   1861     PIXMAN_STD_FAST_PATH (SRC, solid, null, r5g6b5, fast_composite_solid_fill),
   1862     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, a8r8g8b8, fast_composite_src_x888_8888),
   1863     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, a8b8g8r8, fast_composite_src_x888_8888),
   1864     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
   1865     PIXMAN_STD_FAST_PATH (SRC, a8r8g8b8, null, a8r8g8b8, fast_composite_src_memcpy),
   1866     PIXMAN_STD_FAST_PATH (SRC, x8r8g8b8, null, x8r8g8b8, fast_composite_src_memcpy),
   1867     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
   1868     PIXMAN_STD_FAST_PATH (SRC, a8b8g8r8, null, a8b8g8r8, fast_composite_src_memcpy),
   1869     PIXMAN_STD_FAST_PATH (SRC, x8b8g8r8, null, x8b8g8r8, fast_composite_src_memcpy),
   1870     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8x8, fast_composite_src_memcpy),
   1871     PIXMAN_STD_FAST_PATH (SRC, b8g8r8a8, null, b8g8r8a8, fast_composite_src_memcpy),
   1872     PIXMAN_STD_FAST_PATH (SRC, b8g8r8x8, null, b8g8r8x8, fast_composite_src_memcpy),
   1873     PIXMAN_STD_FAST_PATH (SRC, r5g6b5, null, r5g6b5, fast_composite_src_memcpy),
   1874     PIXMAN_STD_FAST_PATH (SRC, b5g6r5, null, b5g6r5, fast_composite_src_memcpy),
   1875     PIXMAN_STD_FAST_PATH (SRC, r8g8b8, null, r8g8b8, fast_composite_src_memcpy),
   1876     PIXMAN_STD_FAST_PATH (SRC, b8g8r8, null, b8g8r8, fast_composite_src_memcpy),
   1877     PIXMAN_STD_FAST_PATH (SRC, x1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
   1878     PIXMAN_STD_FAST_PATH (SRC, a1r5g5b5, null, x1r5g5b5, fast_composite_src_memcpy),
   1879     PIXMAN_STD_FAST_PATH (SRC, a8, null, a8, fast_composite_src_memcpy),
   1880     PIXMAN_STD_FAST_PATH (IN, a8, null, a8, fast_composite_in_8_8),
   1881     PIXMAN_STD_FAST_PATH (IN, solid, a8, a8, fast_composite_in_n_8_8),
   1882 
   1883     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888_8888),
   1884     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888_8888),
   1885     SIMPLE_NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8, 8888_8888),
   1886     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8, 8888_8888),
   1887 
   1888     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888_8888),
   1889     SIMPLE_NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8, 8888_8888),
   1890 
   1891     SIMPLE_NEAREST_FAST_PATH (SRC, x8r8g8b8, r5g6b5, 8888_565),
   1892     SIMPLE_NEAREST_FAST_PATH (SRC, a8r8g8b8, r5g6b5, 8888_565),
   1893 
   1894     SIMPLE_NEAREST_FAST_PATH (SRC, r5g6b5, r5g6b5, 565_565),
   1895 
   1896     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
   1897     SIMPLE_NEAREST_FAST_PATH_COVER (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
   1898     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
   1899     SIMPLE_NEAREST_FAST_PATH_PAD (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
   1900     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8r8g8b8, a8r8g8b8, x888_8888),
   1901     SIMPLE_NEAREST_FAST_PATH_NORMAL (SRC, x8b8g8r8, a8b8g8r8, x888_8888),
   1902 
   1903     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8, 8888_8888),
   1904     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8, 8888_8888),
   1905     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8, 8888_8888),
   1906     SIMPLE_NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8, 8888_8888),
   1907 
   1908     SIMPLE_NEAREST_FAST_PATH (OVER, a8r8g8b8, r5g6b5, 8888_565),
   1909 
   1910 #define NEAREST_FAST_PATH(op,s,d)		\
   1911     {   PIXMAN_OP_ ## op,			\
   1912 	PIXMAN_ ## s, SCALED_NEAREST_FLAGS,	\
   1913 	PIXMAN_null, 0,				\
   1914 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,	\
   1915 	fast_composite_scaled_nearest,		\
   1916     }
   1917 
   1918     NEAREST_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8),
   1919     NEAREST_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8),
   1920     NEAREST_FAST_PATH (SRC, x8b8g8r8, x8b8g8r8),
   1921     NEAREST_FAST_PATH (SRC, a8b8g8r8, x8b8g8r8),
   1922 
   1923     NEAREST_FAST_PATH (SRC, x8r8g8b8, a8r8g8b8),
   1924     NEAREST_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8),
   1925     NEAREST_FAST_PATH (SRC, x8b8g8r8, a8b8g8r8),
   1926     NEAREST_FAST_PATH (SRC, a8b8g8r8, a8b8g8r8),
   1927 
   1928     NEAREST_FAST_PATH (OVER, x8r8g8b8, x8r8g8b8),
   1929     NEAREST_FAST_PATH (OVER, a8r8g8b8, x8r8g8b8),
   1930     NEAREST_FAST_PATH (OVER, x8b8g8r8, x8b8g8r8),
   1931     NEAREST_FAST_PATH (OVER, a8b8g8r8, x8b8g8r8),
   1932 
   1933     NEAREST_FAST_PATH (OVER, x8r8g8b8, a8r8g8b8),
   1934     NEAREST_FAST_PATH (OVER, a8r8g8b8, a8r8g8b8),
   1935     NEAREST_FAST_PATH (OVER, x8b8g8r8, a8b8g8r8),
   1936     NEAREST_FAST_PATH (OVER, a8b8g8r8, a8b8g8r8),
   1937 
   1938 #define SIMPLE_ROTATE_FLAGS(angle)					  \
   1939     (FAST_PATH_ROTATE_ ## angle ## _TRANSFORM	|			  \
   1940      FAST_PATH_NEAREST_FILTER			|			  \
   1941      FAST_PATH_SAMPLES_COVER_CLIP_NEAREST	|			  \
   1942      FAST_PATH_STANDARD_FLAGS)
   1943 
   1944 #define SIMPLE_ROTATE_FAST_PATH(op,s,d,suffix)				  \
   1945     {   PIXMAN_OP_ ## op,						  \
   1946 	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (90),				  \
   1947 	PIXMAN_null, 0,							  \
   1948 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
   1949 	fast_composite_rotate_90_##suffix,				  \
   1950     },									  \
   1951     {   PIXMAN_OP_ ## op,						  \
   1952 	PIXMAN_ ## s, SIMPLE_ROTATE_FLAGS (270),			  \
   1953 	PIXMAN_null, 0,							  \
   1954 	PIXMAN_ ## d, FAST_PATH_STD_DEST_FLAGS,				  \
   1955 	fast_composite_rotate_270_##suffix,				  \
   1956     }
   1957 
   1958     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, a8r8g8b8, 8888),
   1959     SIMPLE_ROTATE_FAST_PATH (SRC, a8r8g8b8, x8r8g8b8, 8888),
   1960     SIMPLE_ROTATE_FAST_PATH (SRC, x8r8g8b8, x8r8g8b8, 8888),
   1961     SIMPLE_ROTATE_FAST_PATH (SRC, r5g6b5, r5g6b5, 565),
   1962     SIMPLE_ROTATE_FAST_PATH (SRC, a8, a8, 8),
   1963 
   1964     /* Simple repeat fast path entry. */
   1965     {	PIXMAN_OP_any,
   1966 	PIXMAN_any,
   1967 	(FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM | FAST_PATH_BITS_IMAGE |
   1968 	 FAST_PATH_NORMAL_REPEAT),
   1969 	PIXMAN_any, 0,
   1970 	PIXMAN_any, FAST_PATH_STD_DEST_FLAGS,
   1971 	fast_composite_tiled_repeat
   1972     },
   1973 
   1974     {   PIXMAN_OP_NONE	},
   1975 };
   1976 
   1977 #ifdef WORDS_BIGENDIAN
   1978 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (32 - (offs) - (n)))
   1979 #else
   1980 #define A1_FILL_MASK(n, offs) (((1U << (n)) - 1) << (offs))
   1981 #endif
   1982 
   1983 static force_inline void
   1984 pixman_fill1_line (uint32_t *dst, int offs, int width, int v)
   1985 {
   1986     if (offs)
   1987     {
   1988 	int leading_pixels = 32 - offs;
   1989 	if (leading_pixels >= width)
   1990 	{
   1991 	    if (v)
   1992 		*dst |= A1_FILL_MASK (width, offs);
   1993 	    else
   1994 		*dst &= ~A1_FILL_MASK (width, offs);
   1995 	    return;
   1996 	}
   1997 	else
   1998 	{
   1999 	    if (v)
   2000 		*dst++ |= A1_FILL_MASK (leading_pixels, offs);
   2001 	    else
   2002 		*dst++ &= ~A1_FILL_MASK (leading_pixels, offs);
   2003 	    width -= leading_pixels;
   2004 	}
   2005     }
   2006     while (width >= 32)
   2007     {
   2008 	if (v)
   2009 	    *dst++ = 0xFFFFFFFF;
   2010 	else
   2011 	    *dst++ = 0;
   2012 	width -= 32;
   2013     }
   2014     if (width > 0)
   2015     {
   2016 	if (v)
   2017 	    *dst |= A1_FILL_MASK (width, 0);
   2018 	else
   2019 	    *dst &= ~A1_FILL_MASK (width, 0);
   2020     }
   2021 }
   2022 
   2023 static void
   2024 pixman_fill1 (uint32_t *bits,
   2025               int       stride,
   2026               int       x,
   2027               int       y,
   2028               int       width,
   2029               int       height,
   2030               uint32_t  filler)
   2031 {
   2032     uint32_t *dst = bits + y * stride + (x >> 5);
   2033     int offs = x & 31;
   2034 
   2035     if (filler & 1)
   2036     {
   2037 	while (height--)
   2038 	{
   2039 	    pixman_fill1_line (dst, offs, width, 1);
   2040 	    dst += stride;
   2041 	}
   2042     }
   2043     else
   2044     {
   2045 	while (height--)
   2046 	{
   2047 	    pixman_fill1_line (dst, offs, width, 0);
   2048 	    dst += stride;
   2049 	}
   2050     }
   2051 }
   2052 
   2053 static void
   2054 pixman_fill8 (uint32_t *bits,
   2055               int       stride,
   2056               int       x,
   2057               int       y,
   2058               int       width,
   2059               int       height,
   2060               uint32_t  filler)
   2061 {
   2062     int byte_stride = stride * (int) sizeof (uint32_t);
   2063     uint8_t *dst = (uint8_t *) bits;
   2064     uint8_t v = filler & 0xff;
   2065     int i;
   2066 
   2067     dst = dst + y * byte_stride + x;
   2068 
   2069     while (height--)
   2070     {
   2071 	for (i = 0; i < width; ++i)
   2072 	    dst[i] = v;
   2073 
   2074 	dst += byte_stride;
   2075     }
   2076 }
   2077 
   2078 static void
   2079 pixman_fill16 (uint32_t *bits,
   2080                int       stride,
   2081                int       x,
   2082                int       y,
   2083                int       width,
   2084                int       height,
   2085                uint32_t  filler)
   2086 {
   2087     int short_stride =
   2088 	(stride * (int)sizeof (uint32_t)) / (int)sizeof (uint16_t);
   2089     uint16_t *dst = (uint16_t *)bits;
   2090     uint16_t v = filler & 0xffff;
   2091     int i;
   2092 
   2093     dst = dst + y * short_stride + x;
   2094 
   2095     while (height--)
   2096     {
   2097 	for (i = 0; i < width; ++i)
   2098 	    dst[i] = v;
   2099 
   2100 	dst += short_stride;
   2101     }
   2102 }
   2103 
   2104 static void
   2105 pixman_fill32 (uint32_t *bits,
   2106                int       stride,
   2107                int       x,
   2108                int       y,
   2109                int       width,
   2110                int       height,
   2111                uint32_t  filler)
   2112 {
   2113     int i;
   2114 
   2115     bits = bits + y * stride + x;
   2116 
   2117     while (height--)
   2118     {
   2119 	for (i = 0; i < width; ++i)
   2120 	    bits[i] = filler;
   2121 
   2122 	bits += stride;
   2123     }
   2124 }
   2125 
   2126 static pixman_bool_t
   2127 fast_path_fill (pixman_implementation_t *imp,
   2128                 uint32_t *               bits,
   2129                 int                      stride,
   2130                 int                      bpp,
   2131                 int                      x,
   2132                 int                      y,
   2133                 int                      width,
   2134                 int                      height,
   2135                 uint32_t		 filler)
   2136 {
   2137     switch (bpp)
   2138     {
   2139     case 1:
   2140 	pixman_fill1 (bits, stride, x, y, width, height, filler);
   2141 	break;
   2142 
   2143     case 8:
   2144 	pixman_fill8 (bits, stride, x, y, width, height, filler);
   2145 	break;
   2146 
   2147     case 16:
   2148 	pixman_fill16 (bits, stride, x, y, width, height, filler);
   2149 	break;
   2150 
   2151     case 32:
   2152 	pixman_fill32 (bits, stride, x, y, width, height, filler);
   2153 	break;
   2154 
   2155     default:
   2156 	return FALSE;
   2157     }
   2158 
   2159     return TRUE;
   2160 }
   2161 
   2162 /*****************************************************************************/
   2163 
   2164 static uint32_t *
   2165 fast_fetch_r5g6b5 (pixman_iter_t *iter, const uint32_t *mask)
   2166 {
   2167     int32_t w = iter->width;
   2168     uint32_t *dst = iter->buffer;
   2169     const uint16_t *src = (const uint16_t *)iter->bits;
   2170 
   2171     iter->bits += iter->stride;
   2172 
   2173     /* Align the source buffer at 4 bytes boundary */
   2174     if (w > 0 && ((uintptr_t)src & 3))
   2175     {
   2176 	*dst++ = convert_0565_to_8888 (*src++);
   2177 	w--;
   2178     }
   2179     /* Process two pixels per iteration */
   2180     while ((w -= 2) >= 0)
   2181     {
   2182 	uint32_t sr, sb, sg, t0, t1;
   2183 	uint32_t s = *(const uint32_t *)src;
   2184 	src += 2;
   2185 	sr = (s >> 8) & 0x00F800F8;
   2186 	sb = (s << 3) & 0x00F800F8;
   2187 	sg = (s >> 3) & 0x00FC00FC;
   2188 	sr |= sr >> 5;
   2189 	sb |= sb >> 5;
   2190 	sg |= sg >> 6;
   2191 	t0 = ((sr << 16) & 0x00FF0000) | ((sg << 8) & 0x0000FF00) |
   2192 	     (sb & 0xFF) | 0xFF000000;
   2193 	t1 = (sr & 0x00FF0000) | ((sg >> 8) & 0x0000FF00) |
   2194 	     (sb >> 16) | 0xFF000000;
   2195 #ifdef WORDS_BIGENDIAN
   2196 	*dst++ = t1;
   2197 	*dst++ = t0;
   2198 #else
   2199 	*dst++ = t0;
   2200 	*dst++ = t1;
   2201 #endif
   2202     }
   2203     if (w & 1)
   2204     {
   2205 	*dst = convert_0565_to_8888 (*src);
   2206     }
   2207 
   2208     return iter->buffer;
   2209 }
   2210 
   2211 static uint32_t *
   2212 fast_dest_fetch_noop (pixman_iter_t *iter, const uint32_t *mask)
   2213 {
   2214     iter->bits += iter->stride;
   2215     return iter->buffer;
   2216 }
   2217 
   2218 /* Helper function for a workaround, which tries to ensure that 0x1F001F
   2219  * constant is always allocated in a register on RISC architectures.
   2220  */
   2221 static force_inline uint32_t
   2222 convert_8888_to_0565_workaround (uint32_t s, uint32_t x1F001F)
   2223 {
   2224     uint32_t a, b;
   2225     a = (s >> 3) & x1F001F;
   2226     b = s & 0xFC00;
   2227     a |= a >> 5;
   2228     a |= b >> 5;
   2229     return a;
   2230 }
   2231 
   2232 static void
   2233 fast_write_back_r5g6b5 (pixman_iter_t *iter)
   2234 {
   2235     int32_t w = iter->width;
   2236     uint16_t *dst = (uint16_t *)(iter->bits - iter->stride);
   2237     const uint32_t *src = iter->buffer;
   2238     /* Workaround to ensure that x1F001F variable is allocated in a register */
   2239     static volatile uint32_t volatile_x1F001F = 0x1F001F;
   2240     uint32_t x1F001F = volatile_x1F001F;
   2241 
   2242     while ((w -= 4) >= 0)
   2243     {
   2244 	uint32_t s1 = *src++;
   2245 	uint32_t s2 = *src++;
   2246 	uint32_t s3 = *src++;
   2247 	uint32_t s4 = *src++;
   2248 	*dst++ = convert_8888_to_0565_workaround (s1, x1F001F);
   2249 	*dst++ = convert_8888_to_0565_workaround (s2, x1F001F);
   2250 	*dst++ = convert_8888_to_0565_workaround (s3, x1F001F);
   2251 	*dst++ = convert_8888_to_0565_workaround (s4, x1F001F);
   2252     }
   2253     if (w & 2)
   2254     {
   2255 	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
   2256 	*dst++ = convert_8888_to_0565_workaround (*src++, x1F001F);
   2257     }
   2258     if (w & 1)
   2259     {
   2260 	*dst = convert_8888_to_0565_workaround (*src, x1F001F);
   2261     }
   2262 }
   2263 
   2264 typedef struct
   2265 {
   2266     pixman_format_code_t	format;
   2267     pixman_iter_get_scanline_t	get_scanline;
   2268     pixman_iter_write_back_t	write_back;
   2269 } fetcher_info_t;
   2270 
   2271 static const fetcher_info_t fetchers[] =
   2272 {
   2273     { PIXMAN_r5g6b5, fast_fetch_r5g6b5, fast_write_back_r5g6b5 },
   2274     { PIXMAN_null }
   2275 };
   2276 
   2277 static pixman_bool_t
   2278 fast_src_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
   2279 {
   2280     pixman_image_t *image = iter->image;
   2281 
   2282 #define FLAGS								\
   2283     (FAST_PATH_STANDARD_FLAGS | FAST_PATH_ID_TRANSFORM |		\
   2284      FAST_PATH_BITS_IMAGE | FAST_PATH_SAMPLES_COVER_CLIP_NEAREST)
   2285 
   2286     if ((iter->iter_flags & ITER_NARROW)			&&
   2287 	(iter->image_flags & FLAGS) == FLAGS)
   2288     {
   2289 	const fetcher_info_t *f;
   2290 
   2291 	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
   2292 	{
   2293 	    if (image->common.extended_format_code == f->format)
   2294 	    {
   2295 		uint8_t *b = (uint8_t *)image->bits.bits;
   2296 		int s = image->bits.rowstride * 4;
   2297 
   2298 		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
   2299 		iter->stride = s;
   2300 
   2301 		iter->get_scanline = f->get_scanline;
   2302 		return TRUE;
   2303 	    }
   2304 	}
   2305     }
   2306 
   2307     return FALSE;
   2308 }
   2309 
   2310 static pixman_bool_t
   2311 fast_dest_iter_init (pixman_implementation_t *imp, pixman_iter_t *iter)
   2312 {
   2313     pixman_image_t *image = iter->image;
   2314 
   2315     if ((iter->iter_flags & ITER_NARROW)		&&
   2316 	(iter->image_flags & FAST_PATH_STD_DEST_FLAGS) == FAST_PATH_STD_DEST_FLAGS)
   2317     {
   2318 	const fetcher_info_t *f;
   2319 
   2320 	for (f = &fetchers[0]; f->format != PIXMAN_null; f++)
   2321 	{
   2322 	    if (image->common.extended_format_code == f->format)
   2323 	    {
   2324 		uint8_t *b = (uint8_t *)image->bits.bits;
   2325 		int s = image->bits.rowstride * 4;
   2326 
   2327 		iter->bits = b + s * iter->y + iter->x * PIXMAN_FORMAT_BPP (f->format) / 8;
   2328 		iter->stride = s;
   2329 
   2330 		if ((iter->iter_flags & (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA)) ==
   2331 		    (ITER_IGNORE_RGB | ITER_IGNORE_ALPHA))
   2332 		{
   2333 		    iter->get_scanline = fast_dest_fetch_noop;
   2334 		}
   2335 		else
   2336 		{
   2337 		    iter->get_scanline = f->get_scanline;
   2338 		}
   2339 		iter->write_back = f->write_back;
   2340 		return TRUE;
   2341 	    }
   2342 	}
   2343     }
   2344     return FALSE;
   2345 }
   2346 
   2347 
   2348 pixman_implementation_t *
   2349 _pixman_implementation_create_fast_path (pixman_implementation_t *fallback)
   2350 {
   2351     pixman_implementation_t *imp = _pixman_implementation_create (fallback, c_fast_paths);
   2352 
   2353     imp->fill = fast_path_fill;
   2354     imp->src_iter_init = fast_src_iter_init;
   2355     imp->dest_iter_init = fast_dest_iter_init;
   2356 
   2357     return imp;
   2358 }
   2359