Home | History | Annotate | Download | only in video
      1 /*
      2     SDL - Simple DirectMedia Layer
      3     Copyright (C) 1997-2006 Sam Lantinga
      4 
      5     This library is free software; you can redistribute it and/or
      6     modify it under the terms of the GNU Lesser General Public
      7     License as published by the Free Software Foundation; either
      8     version 2.1 of the License, or (at your option) any later version.
      9 
     10     This library is distributed in the hope that it will be useful,
     11     but WITHOUT ANY WARRANTY; without even the implied warranty of
     12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13     Lesser General Public License for more details.
     14 
     15     You should have received a copy of the GNU Lesser General Public
     16     License along with this library; if not, write to the Free Software
     17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
     18 
     19     Sam Lantinga
     20     slouken (at) libsdl.org
     21 */
     22 #include "SDL_config.h"
     23 
     24 #include "SDL_video.h"
     25 #include "SDL_endian.h"
     26 #include "SDL_cpuinfo.h"
     27 #include "SDL_blit.h"
     28 
     29 /* Functions to blit from N-bit surfaces to other surfaces */
     30 
     31 #if SDL_ALTIVEC_BLITTERS
     32 #if __MWERKS__
     33 #pragma altivec_model on
     34 #endif
     35 #ifdef HAVE_ALTIVEC_H
     36 #include <altivec.h>
     37 #endif
     38 #define assert(X)
     39 #ifdef __MACOSX__
     40 #include <sys/sysctl.h>
     41 static size_t GetL3CacheSize( void )
     42 {
     43     const char key[] = "hw.l3cachesize";
     44     u_int64_t result = 0;
     45     size_t typeSize = sizeof( result );
     46 
     47 
     48     int err = sysctlbyname( key, &result, &typeSize, NULL, 0 );
     49     if( 0 != err ) return 0;
     50 
     51     return result;
     52 }
     53 #else
     54 static size_t GetL3CacheSize( void )
     55 {
     56     /* XXX: Just guess G4 */
     57     return 2097152;
     58 }
     59 #endif /* __MACOSX__ */
     60 
     61 #if (defined(__MACOSX__) && (__GNUC__ < 4))
     62     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
     63         (vector unsigned char) ( a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p )
     64     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
     65         (vector unsigned short) ( a,b,c,d,e,f,g,h )
     66 #else
     67     #define VECUINT8_LITERAL(a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p) \
     68         (vector unsigned char) { a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p }
     69     #define VECUINT16_LITERAL(a,b,c,d,e,f,g,h) \
     70         (vector unsigned short) { a,b,c,d,e,f,g,h }
     71 #endif
     72 
     73 #define UNALIGNED_PTR(x) (((size_t) x) & 0x0000000F)
     74 #define VSWIZZLE32(a,b,c,d) (vector unsigned char) \
     75                                ( 0x00+a, 0x00+b, 0x00+c, 0x00+d, \
     76                                  0x04+a, 0x04+b, 0x04+c, 0x04+d, \
     77                                  0x08+a, 0x08+b, 0x08+c, 0x08+d, \
     78                                  0x0C+a, 0x0C+b, 0x0C+c, 0x0C+d )
     79 
     80 #define MAKE8888(dstfmt, r, g, b, a)  \
     81     ( ((r<<dstfmt->Rshift)&dstfmt->Rmask) | \
     82       ((g<<dstfmt->Gshift)&dstfmt->Gmask) | \
     83       ((b<<dstfmt->Bshift)&dstfmt->Bmask) | \
     84       ((a<<dstfmt->Ashift)&dstfmt->Amask) )
     85 
     86 /*
     87  * Data Stream Touch...Altivec cache prefetching.
     88  *
     89  *  Don't use this on a G5...however, the speed boost is very significant
     90  *   on a G4.
     91  */
     92 #define DST_CHAN_SRC 1
     93 #define DST_CHAN_DEST 2
     94 
     95 /* macro to set DST control word value... */
     96 #define DST_CTRL(size, count, stride) \
     97     (((size) << 24) | ((count) << 16) | (stride))
     98 
     99 #define VEC_ALIGNER(src) ((UNALIGNED_PTR(src)) \
    100     ? vec_lvsl(0, src) \
    101     : vec_add(vec_lvsl(8, src), vec_splat_u8(8)))
    102 
    103 /* Calculate the permute vector used for 32->32 swizzling */
    104 static vector unsigned char calc_swizzle32(const SDL_PixelFormat *srcfmt,
    105                                   const SDL_PixelFormat *dstfmt)
    106 {
    107     /*
    108     * We have to assume that the bits that aren't used by other
    109      *  colors is alpha, and it's one complete byte, since some formats
    110      *  leave alpha with a zero mask, but we should still swizzle the bits.
    111      */
    112     /* ARGB */
    113     const static struct SDL_PixelFormat default_pixel_format = {
    114         NULL, 0, 0,
    115         0, 0, 0, 0,
    116         16, 8, 0, 24,
    117         0x00FF0000, 0x0000FF00, 0x000000FF, 0xFF000000,
    118         0, 0};
    119     if (!srcfmt) {
    120         srcfmt = &default_pixel_format;
    121     }
    122     if (!dstfmt) {
    123         dstfmt = &default_pixel_format;
    124     }
    125     const vector unsigned char plus = VECUINT8_LITERAL(
    126                                       0x00, 0x00, 0x00, 0x00,
    127                                       0x04, 0x04, 0x04, 0x04,
    128                                       0x08, 0x08, 0x08, 0x08,
    129                                       0x0C, 0x0C, 0x0C, 0x0C );
    130     vector unsigned char vswiz;
    131     vector unsigned int srcvec;
    132 #define RESHIFT(X) (3 - ((X) >> 3))
    133     Uint32 rmask = RESHIFT(srcfmt->Rshift) << (dstfmt->Rshift);
    134     Uint32 gmask = RESHIFT(srcfmt->Gshift) << (dstfmt->Gshift);
    135     Uint32 bmask = RESHIFT(srcfmt->Bshift) << (dstfmt->Bshift);
    136     Uint32 amask;
    137     /* Use zero for alpha if either surface doesn't have alpha */
    138     if (dstfmt->Amask) {
    139         amask = ((srcfmt->Amask) ? RESHIFT(srcfmt->Ashift) : 0x10) << (dstfmt->Ashift);
    140     } else {
    141         amask = 0x10101010 & ((dstfmt->Rmask | dstfmt->Gmask | dstfmt->Bmask) ^ 0xFFFFFFFF);
    142     }
    143 #undef RESHIFT
    144     ((unsigned int *)(char*)&srcvec)[0] = (rmask | gmask | bmask | amask);
    145     vswiz = vec_add(plus, (vector unsigned char)vec_splat(srcvec, 0));
    146     return(vswiz);
    147 }
    148 
    149 static void Blit_RGB888_RGB565(SDL_BlitInfo *info);
    150 static void Blit_RGB888_RGB565Altivec(SDL_BlitInfo *info) {
    151     int height = info->d_height;
    152     Uint8 *src = (Uint8 *) info->s_pixels;
    153     int srcskip = info->s_skip;
    154     Uint8 *dst = (Uint8 *) info->d_pixels;
    155     int dstskip = info->d_skip;
    156     SDL_PixelFormat *srcfmt = info->src;
    157     vector unsigned char valpha = vec_splat_u8(0);
    158     vector unsigned char vpermute = calc_swizzle32(srcfmt, NULL);
    159     vector unsigned char vgmerge = VECUINT8_LITERAL(
    160         0x00, 0x02, 0x00, 0x06,
    161         0x00, 0x0a, 0x00, 0x0e,
    162         0x00, 0x12, 0x00, 0x16,
    163         0x00, 0x1a, 0x00, 0x1e);
    164     vector unsigned short v1 = vec_splat_u16(1);
    165     vector unsigned short v3 = vec_splat_u16(3);
    166     vector unsigned short v3f = VECUINT16_LITERAL(
    167         0x003f, 0x003f, 0x003f, 0x003f,
    168         0x003f, 0x003f, 0x003f, 0x003f);
    169     vector unsigned short vfc = VECUINT16_LITERAL(
    170         0x00fc, 0x00fc, 0x00fc, 0x00fc,
    171         0x00fc, 0x00fc, 0x00fc, 0x00fc);
    172     vector unsigned short vf800 = (vector unsigned short)vec_splat_u8(-7);
    173     vf800 = vec_sl(vf800, vec_splat_u16(8));
    174 
    175     while (height--) {
    176         vector unsigned char valigner;
    177         vector unsigned char voverflow;
    178         vector unsigned char vsrc;
    179 
    180         int width = info->d_width;
    181         int extrawidth;
    182 
    183         /* do scalar until we can align... */
    184 #define ONE_PIXEL_BLEND(condition, widthvar) \
    185         while (condition) { \
    186             Uint32 Pixel; \
    187             unsigned sR, sG, sB, sA; \
    188             DISEMBLE_RGBA((Uint8 *)src, 4, srcfmt, Pixel, \
    189                           sR, sG, sB, sA); \
    190             *(Uint16 *)(dst) = (((sR << 8) & 0x0000F800) | \
    191                                 ((sG << 3) & 0x000007E0) | \
    192                                 ((sB >> 3) & 0x0000001F)); \
    193             dst += 2; \
    194             src += 4; \
    195             widthvar--; \
    196         }
    197 
    198         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    199 
    200         /* After all that work, here's the vector part! */
    201         extrawidth = (width % 8);  /* trailing unaligned stores */
    202         width -= extrawidth;
    203         vsrc = vec_ld(0, src);
    204         valigner = VEC_ALIGNER(src);
    205 
    206         while (width) {
    207             vector unsigned short vpixel, vrpixel, vgpixel, vbpixel;
    208             vector unsigned int vsrc1, vsrc2;
    209             vector unsigned char vdst;
    210 
    211             voverflow = vec_ld(15, src);
    212             vsrc = vec_perm(vsrc, voverflow, valigner);
    213             vsrc1 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
    214             src += 16;
    215             vsrc = voverflow;
    216             voverflow = vec_ld(15, src);
    217             vsrc = vec_perm(vsrc, voverflow, valigner);
    218             vsrc2 = (vector unsigned int)vec_perm(vsrc, valpha, vpermute);
    219             /* 1555 */
    220             vpixel = (vector unsigned short)vec_packpx(vsrc1, vsrc2);
    221             vgpixel = (vector unsigned short)vec_perm(vsrc1, vsrc2, vgmerge);
    222             vgpixel = vec_and(vgpixel, vfc);
    223             vgpixel = vec_sl(vgpixel, v3);
    224             vrpixel = vec_sl(vpixel, v1);
    225             vrpixel = vec_and(vrpixel, vf800);
    226             vbpixel = vec_and(vpixel, v3f);
    227             vdst = vec_or((vector unsigned char)vrpixel, (vector unsigned char)vgpixel);
    228             /* 565 */
    229             vdst = vec_or(vdst, (vector unsigned char)vbpixel);
    230             vec_st(vdst, 0, dst);
    231 
    232             width -= 8;
    233             src += 16;
    234             dst += 16;
    235             vsrc = voverflow;
    236         }
    237 
    238         assert(width == 0);
    239 
    240         /* do scalar until we can align... */
    241         ONE_PIXEL_BLEND((extrawidth), extrawidth);
    242 #undef ONE_PIXEL_BLEND
    243 
    244         src += srcskip;  /* move to next row, accounting for pitch. */
    245         dst += dstskip;
    246     }
    247 
    248 
    249 }
    250 
    251 static void Blit_RGB565_32Altivec(SDL_BlitInfo *info) {
    252     int height = info->d_height;
    253     Uint8 *src = (Uint8 *) info->s_pixels;
    254     int srcskip = info->s_skip;
    255     Uint8 *dst = (Uint8 *) info->d_pixels;
    256     int dstskip = info->d_skip;
    257     SDL_PixelFormat *srcfmt = info->src;
    258     SDL_PixelFormat *dstfmt = info->dst;
    259     unsigned alpha;
    260     vector unsigned char valpha;
    261     vector unsigned char vpermute;
    262     vector unsigned short vf800;
    263     vector unsigned int v8 = vec_splat_u32(8);
    264     vector unsigned int v16 = vec_add(v8, v8);
    265     vector unsigned short v2 = vec_splat_u16(2);
    266     vector unsigned short v3 = vec_splat_u16(3);
    267     /*
    268         0x10 - 0x1f is the alpha
    269         0x00 - 0x0e evens are the red
    270         0x01 - 0x0f odds are zero
    271     */
    272     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
    273         0x10, 0x00, 0x01, 0x01,
    274         0x10, 0x02, 0x01, 0x01,
    275         0x10, 0x04, 0x01, 0x01,
    276         0x10, 0x06, 0x01, 0x01
    277     );
    278     vector unsigned char vredalpha2 = (vector unsigned char) (
    279         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
    280     );
    281     /*
    282         0x00 - 0x0f is ARxx ARxx ARxx ARxx
    283         0x11 - 0x0f odds are blue
    284     */
    285     vector unsigned char vblue1 = VECUINT8_LITERAL(
    286         0x00, 0x01, 0x02, 0x11,
    287         0x04, 0x05, 0x06, 0x13,
    288         0x08, 0x09, 0x0a, 0x15,
    289         0x0c, 0x0d, 0x0e, 0x17
    290     );
    291     vector unsigned char vblue2 = (vector unsigned char)(
    292         vec_add((vector unsigned int)vblue1, v8)
    293     );
    294     /*
    295         0x00 - 0x0f is ARxB ARxB ARxB ARxB
    296         0x10 - 0x0e evens are green
    297     */
    298     vector unsigned char vgreen1 = VECUINT8_LITERAL(
    299         0x00, 0x01, 0x10, 0x03,
    300         0x04, 0x05, 0x12, 0x07,
    301         0x08, 0x09, 0x14, 0x0b,
    302         0x0c, 0x0d, 0x16, 0x0f
    303     );
    304     vector unsigned char vgreen2 = (vector unsigned char)(
    305         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
    306     );
    307 
    308 
    309     assert(srcfmt->BytesPerPixel == 2);
    310     assert(dstfmt->BytesPerPixel == 4);
    311 
    312     vf800 = (vector unsigned short)vec_splat_u8(-7);
    313     vf800 = vec_sl(vf800, vec_splat_u16(8));
    314 
    315     if (dstfmt->Amask && srcfmt->alpha) {
    316         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
    317         valpha = vec_splat(valpha, 0);
    318     } else {
    319         alpha = 0;
    320         valpha = vec_splat_u8(0);
    321     }
    322 
    323     vpermute = calc_swizzle32(NULL, dstfmt);
    324     while (height--) {
    325         vector unsigned char valigner;
    326         vector unsigned char voverflow;
    327         vector unsigned char vsrc;
    328 
    329         int width = info->d_width;
    330         int extrawidth;
    331 
    332         /* do scalar until we can align... */
    333 #define ONE_PIXEL_BLEND(condition, widthvar) \
    334         while (condition) { \
    335             unsigned sR, sG, sB; \
    336             unsigned short Pixel = *((unsigned short *)src); \
    337             sR = (Pixel >> 8) & 0xf8; \
    338             sG = (Pixel >> 3) & 0xfc; \
    339             sB = (Pixel << 3) & 0xf8; \
    340             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
    341             src += 2; \
    342             dst += 4; \
    343             widthvar--; \
    344         }
    345         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    346 
    347         /* After all that work, here's the vector part! */
    348         extrawidth = (width % 8);  /* trailing unaligned stores */
    349         width -= extrawidth;
    350         vsrc = vec_ld(0, src);
    351         valigner = VEC_ALIGNER(src);
    352 
    353         while (width) {
    354             vector unsigned short vR, vG, vB;
    355             vector unsigned char vdst1, vdst2;
    356 
    357             voverflow = vec_ld(15, src);
    358             vsrc = vec_perm(vsrc, voverflow, valigner);
    359 
    360             vR = vec_and((vector unsigned short)vsrc, vf800);
    361             vB = vec_sl((vector unsigned short)vsrc, v3);
    362             vG = vec_sl(vB, v2);
    363 
    364             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
    365             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
    366             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
    367             vdst1 = vec_perm(vdst1, valpha, vpermute);
    368             vec_st(vdst1, 0, dst);
    369 
    370             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
    371             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
    372             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
    373             vdst2 = vec_perm(vdst2, valpha, vpermute);
    374             vec_st(vdst2, 16, dst);
    375 
    376             width -= 8;
    377             dst += 32;
    378             src += 16;
    379             vsrc = voverflow;
    380         }
    381 
    382         assert(width == 0);
    383 
    384 
    385         /* do scalar until we can align... */
    386         ONE_PIXEL_BLEND((extrawidth), extrawidth);
    387 #undef ONE_PIXEL_BLEND
    388 
    389         src += srcskip;  /* move to next row, accounting for pitch. */
    390         dst += dstskip;
    391     }
    392 
    393 }
    394 
    395 
    396 static void Blit_RGB555_32Altivec(SDL_BlitInfo *info) {
    397     int height = info->d_height;
    398     Uint8 *src = (Uint8 *) info->s_pixels;
    399     int srcskip = info->s_skip;
    400     Uint8 *dst = (Uint8 *) info->d_pixels;
    401     int dstskip = info->d_skip;
    402     SDL_PixelFormat *srcfmt = info->src;
    403     SDL_PixelFormat *dstfmt = info->dst;
    404     unsigned alpha;
    405     vector unsigned char valpha;
    406     vector unsigned char vpermute;
    407     vector unsigned short vf800;
    408     vector unsigned int v8 = vec_splat_u32(8);
    409     vector unsigned int v16 = vec_add(v8, v8);
    410     vector unsigned short v1 = vec_splat_u16(1);
    411     vector unsigned short v3 = vec_splat_u16(3);
    412     /*
    413         0x10 - 0x1f is the alpha
    414         0x00 - 0x0e evens are the red
    415         0x01 - 0x0f odds are zero
    416     */
    417     vector unsigned char vredalpha1 = VECUINT8_LITERAL(
    418         0x10, 0x00, 0x01, 0x01,
    419         0x10, 0x02, 0x01, 0x01,
    420         0x10, 0x04, 0x01, 0x01,
    421         0x10, 0x06, 0x01, 0x01
    422     );
    423     vector unsigned char vredalpha2 = (vector unsigned char)(
    424         vec_add((vector unsigned int)vredalpha1, vec_sl(v8, v16))
    425     );
    426     /*
    427         0x00 - 0x0f is ARxx ARxx ARxx ARxx
    428         0x11 - 0x0f odds are blue
    429     */
    430     vector unsigned char vblue1 = VECUINT8_LITERAL(
    431         0x00, 0x01, 0x02, 0x11,
    432         0x04, 0x05, 0x06, 0x13,
    433         0x08, 0x09, 0x0a, 0x15,
    434         0x0c, 0x0d, 0x0e, 0x17
    435     );
    436     vector unsigned char vblue2 = (vector unsigned char)(
    437         vec_add((vector unsigned int)vblue1, v8)
    438     );
    439     /*
    440         0x00 - 0x0f is ARxB ARxB ARxB ARxB
    441         0x10 - 0x0e evens are green
    442     */
    443     vector unsigned char vgreen1 = VECUINT8_LITERAL(
    444         0x00, 0x01, 0x10, 0x03,
    445         0x04, 0x05, 0x12, 0x07,
    446         0x08, 0x09, 0x14, 0x0b,
    447         0x0c, 0x0d, 0x16, 0x0f
    448     );
    449     vector unsigned char vgreen2 = (vector unsigned char)(
    450         vec_add((vector unsigned int)vgreen1, vec_sl(v8, v8))
    451     );
    452 
    453 
    454     assert(srcfmt->BytesPerPixel == 2);
    455     assert(dstfmt->BytesPerPixel == 4);
    456 
    457     vf800 = (vector unsigned short)vec_splat_u8(-7);
    458     vf800 = vec_sl(vf800, vec_splat_u16(8));
    459 
    460     if (dstfmt->Amask && srcfmt->alpha) {
    461         ((unsigned char *)&valpha)[0] = alpha = srcfmt->alpha;
    462         valpha = vec_splat(valpha, 0);
    463     } else {
    464         alpha = 0;
    465         valpha = vec_splat_u8(0);
    466     }
    467 
    468     vpermute = calc_swizzle32(NULL, dstfmt);
    469     while (height--) {
    470         vector unsigned char valigner;
    471         vector unsigned char voverflow;
    472         vector unsigned char vsrc;
    473 
    474         int width = info->d_width;
    475         int extrawidth;
    476 
    477         /* do scalar until we can align... */
    478 #define ONE_PIXEL_BLEND(condition, widthvar) \
    479         while (condition) { \
    480             unsigned sR, sG, sB; \
    481             unsigned short Pixel = *((unsigned short *)src); \
    482             sR = (Pixel >> 7) & 0xf8; \
    483             sG = (Pixel >> 2) & 0xf8; \
    484             sB = (Pixel << 3) & 0xf8; \
    485             ASSEMBLE_RGBA(dst, 4, dstfmt, sR, sG, sB, alpha); \
    486             src += 2; \
    487             dst += 4; \
    488             widthvar--; \
    489         }
    490         ONE_PIXEL_BLEND(((UNALIGNED_PTR(dst)) && (width)), width);
    491 
    492         /* After all that work, here's the vector part! */
    493         extrawidth = (width % 8);  /* trailing unaligned stores */
    494         width -= extrawidth;
    495         vsrc = vec_ld(0, src);
    496         valigner = VEC_ALIGNER(src);
    497 
    498         while (width) {
    499             vector unsigned short vR, vG, vB;
    500             vector unsigned char vdst1, vdst2;
    501 
    502             voverflow = vec_ld(15, src);
    503             vsrc = vec_perm(vsrc, voverflow, valigner);
    504 
    505             vR = vec_and(vec_sl((vector unsigned short)vsrc,v1), vf800);
    506             vB = vec_sl((vector unsigned short)vsrc, v3);
    507             vG = vec_sl(vB, v3);
    508 
    509             vdst1 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha1);
    510             vdst1 = vec_perm(vdst1, (vector unsigned char)vB, vblue1);
    511             vdst1 = vec_perm(vdst1, (vector unsigned char)vG, vgreen1);
    512             vdst1 = vec_perm(vdst1, valpha, vpermute);
    513             vec_st(vdst1, 0, dst);
    514 
    515             vdst2 = (vector unsigned char)vec_perm((vector unsigned char)vR, valpha, vredalpha2);
    516             vdst2 = vec_perm(vdst2, (vector unsigned char)vB, vblue2);
    517             vdst2 = vec_perm(vdst2, (vector unsigned char)vG, vgreen2);
    518             vdst2 = vec_perm(vdst2, valpha, vpermute);
    519             vec_st(vdst2, 16, dst);
    520 
    521             width -= 8;
    522             dst += 32;
    523             src += 16;
    524             vsrc = voverflow;
    525         }
    526 
    527         assert(width == 0);
    528 
    529 
    530         /* do scalar until we can align... */
    531         ONE_PIXEL_BLEND((extrawidth), extrawidth);
    532 #undef ONE_PIXEL_BLEND
    533 
    534         src += srcskip;  /* move to next row, accounting for pitch. */
    535         dst += dstskip;
    536     }
    537 
    538 }
    539 
    540 static void BlitNtoNKey(SDL_BlitInfo *info);
    541 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info);
    542 static void Blit32to32KeyAltivec(SDL_BlitInfo *info)
    543 {
    544     int height = info->d_height;
    545     Uint32 *srcp = (Uint32 *) info->s_pixels;
    546     int srcskip = info->s_skip;
    547     Uint32 *dstp = (Uint32 *) info->d_pixels;
    548     int dstskip = info->d_skip;
    549     SDL_PixelFormat *srcfmt = info->src;
    550     int srcbpp = srcfmt->BytesPerPixel;
    551     SDL_PixelFormat *dstfmt = info->dst;
    552     int dstbpp = dstfmt->BytesPerPixel;
    553     int copy_alpha = (srcfmt->Amask && dstfmt->Amask);
    554 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
    555     Uint32 rgbmask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
    556 	Uint32 ckey = info->src->colorkey;
    557     vector unsigned int valpha;
    558     vector unsigned char vpermute;
    559     vector unsigned char vzero;
    560     vector unsigned int vckey;
    561     vector unsigned int vrgbmask;
    562     vpermute = calc_swizzle32(srcfmt, dstfmt);
    563     if (info->d_width < 16) {
    564         if(copy_alpha) {
    565             BlitNtoNKeyCopyAlpha(info);
    566         } else {
    567             BlitNtoNKey(info);
    568         }
    569         return;
    570     }
    571     vzero = vec_splat_u8(0);
    572     if (alpha) {
    573         ((unsigned char *)&valpha)[0] = (unsigned char)alpha;
    574         valpha = (vector unsigned int)vec_splat((vector unsigned char)valpha, 0);
    575     } else {
    576         valpha = (vector unsigned int)vzero;
    577     }
    578     ckey &= rgbmask;
    579     ((unsigned int *)(char*)&vckey)[0] = ckey;
    580     vckey = vec_splat(vckey, 0);
    581     ((unsigned int *)(char*)&vrgbmask)[0] = rgbmask;
    582     vrgbmask = vec_splat(vrgbmask, 0);
    583 
    584     while (height--) {
    585 #define ONE_PIXEL_BLEND(condition, widthvar) \
    586         if (copy_alpha) { \
    587             while (condition) { \
    588                 Uint32 Pixel; \
    589                 unsigned sR, sG, sB, sA; \
    590                 DISEMBLE_RGBA((Uint8 *)srcp, srcbpp, srcfmt, Pixel, \
    591                           sR, sG, sB, sA); \
    592                 if ( (Pixel & rgbmask) != ckey ) { \
    593                       ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
    594                             sR, sG, sB, sA); \
    595                 } \
    596                 dstp = (Uint32 *) (((Uint8 *) dstp) + dstbpp); \
    597                 srcp = (Uint32 *) (((Uint8 *) srcp) + srcbpp); \
    598                 widthvar--; \
    599             } \
    600         } else { \
    601             while (condition) { \
    602                 Uint32 Pixel; \
    603                 unsigned sR, sG, sB; \
    604                 RETRIEVE_RGB_PIXEL((Uint8 *)srcp, srcbpp, Pixel); \
    605                 if ( Pixel != ckey ) { \
    606                     RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB); \
    607                     ASSEMBLE_RGBA((Uint8 *)dstp, dstbpp, dstfmt, \
    608                               sR, sG, sB, alpha); \
    609                 } \
    610                 dstp = (Uint32 *) (((Uint8 *)dstp) + dstbpp); \
    611                 srcp = (Uint32 *) (((Uint8 *)srcp) + srcbpp); \
    612                 widthvar--; \
    613             } \
    614         }
    615         int width = info->d_width;
    616         ONE_PIXEL_BLEND((UNALIGNED_PTR(dstp)) && (width), width);
    617         assert(width > 0);
    618         if (width > 0) {
    619             int extrawidth = (width % 4);
    620             vector unsigned char valigner = VEC_ALIGNER(srcp);
    621             vector unsigned int vs = vec_ld(0, srcp);
    622             width -= extrawidth;
    623             assert(width >= 4);
    624             while (width) {
    625                 vector unsigned char vsel;
    626                 vector unsigned int vd;
    627                 vector unsigned int voverflow = vec_ld(15, srcp);
    628                 /* load the source vec */
    629                 vs = vec_perm(vs, voverflow, valigner);
    630                 /* vsel is set for items that match the key */
    631                 vsel = (vector unsigned char)vec_and(vs, vrgbmask);
    632                 vsel = (vector unsigned char)vec_cmpeq(vs, vckey);
    633                 /* permute the src vec to the dest format */
    634                 vs = vec_perm(vs, valpha, vpermute);
    635                 /* load the destination vec */
    636                 vd = vec_ld(0, dstp);
    637                 /* select the source and dest into vs */
    638                 vd = (vector unsigned int)vec_sel((vector unsigned char)vs, (vector unsigned char)vd, vsel);
    639 
    640                 vec_st(vd, 0, dstp);
    641                 srcp += 4;
    642                 width -= 4;
    643                 dstp += 4;
    644                 vs = voverflow;
    645             }
    646             ONE_PIXEL_BLEND((extrawidth), extrawidth);
    647 #undef ONE_PIXEL_BLEND
    648             srcp += srcskip >> 2;
    649             dstp += dstskip >> 2;
    650         }
    651     }
    652 }
    653 
    654 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
    655 /* Use this on a G5 */
    656 static void ConvertAltivec32to32_noprefetch(SDL_BlitInfo *info)
    657 {
    658     int height = info->d_height;
    659     Uint32 *src = (Uint32 *) info->s_pixels;
    660     int srcskip = info->s_skip;
    661     Uint32 *dst = (Uint32 *) info->d_pixels;
    662     int dstskip = info->d_skip;
    663     SDL_PixelFormat *srcfmt = info->src;
    664     SDL_PixelFormat *dstfmt = info->dst;
    665     vector unsigned int vzero = vec_splat_u32(0);
    666     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
    667     if (dstfmt->Amask && !srcfmt->Amask) {
    668         if (srcfmt->alpha) {
    669             vector unsigned char valpha;
    670             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
    671             vzero = (vector unsigned int)vec_splat(valpha, 0);
    672         }
    673     }
    674 
    675     assert(srcfmt->BytesPerPixel == 4);
    676     assert(dstfmt->BytesPerPixel == 4);
    677 
    678     while (height--) {
    679         vector unsigned char valigner;
    680         vector unsigned int vbits;
    681         vector unsigned int voverflow;
    682         Uint32 bits;
    683         Uint8 r, g, b, a;
    684 
    685         int width = info->d_width;
    686         int extrawidth;
    687 
    688         /* do scalar until we can align... */
    689         while ((UNALIGNED_PTR(dst)) && (width)) {
    690             bits = *(src++);
    691             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    692             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    693             width--;
    694         }
    695 
    696         /* After all that work, here's the vector part! */
    697         extrawidth = (width % 4);
    698         width -= extrawidth;
    699         valigner = VEC_ALIGNER(src);
    700         vbits = vec_ld(0, src);
    701 
    702        while (width) {
    703             voverflow = vec_ld(15, src);
    704             src += 4;
    705             width -= 4;
    706             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
    707             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
    708             vec_st(vbits, 0, dst);  /* store it back out. */
    709             dst += 4;
    710             vbits = voverflow;
    711         }
    712 
    713         assert(width == 0);
    714 
    715         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
    716         while (extrawidth) {
    717             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
    718             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    719             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    720             extrawidth--;
    721         }
    722 
    723         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
    724         dst += dstskip >> 2;
    725     }
    726 
    727 }
    728 
    729 /* Altivec code to swizzle one 32-bit surface to a different 32-bit format. */
    730 /* Use this on a G4 */
    731 static void ConvertAltivec32to32_prefetch(SDL_BlitInfo *info)
    732 {
    733     const int scalar_dst_lead = sizeof (Uint32) * 4;
    734     const int vector_dst_lead = sizeof (Uint32) * 16;
    735 
    736     int height = info->d_height;
    737     Uint32 *src = (Uint32 *) info->s_pixels;
    738     int srcskip = info->s_skip;
    739     Uint32 *dst = (Uint32 *) info->d_pixels;
    740     int dstskip = info->d_skip;
    741     SDL_PixelFormat *srcfmt = info->src;
    742     SDL_PixelFormat *dstfmt = info->dst;
    743     vector unsigned int vzero = vec_splat_u32(0);
    744     vector unsigned char vpermute = calc_swizzle32(srcfmt, dstfmt);
    745     if (dstfmt->Amask && !srcfmt->Amask) {
    746         if (srcfmt->alpha) {
    747             vector unsigned char valpha;
    748             ((unsigned char *)&valpha)[0] = srcfmt->alpha;
    749             vzero = (vector unsigned int)vec_splat(valpha, 0);
    750         }
    751     }
    752 
    753     assert(srcfmt->BytesPerPixel == 4);
    754     assert(dstfmt->BytesPerPixel == 4);
    755 
    756     while (height--) {
    757         vector unsigned char valigner;
    758         vector unsigned int vbits;
    759         vector unsigned int voverflow;
    760         Uint32 bits;
    761         Uint8 r, g, b, a;
    762 
    763         int width = info->d_width;
    764         int extrawidth;
    765 
    766         /* do scalar until we can align... */
    767         while ((UNALIGNED_PTR(dst)) && (width)) {
    768             vec_dstt(src+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
    769             vec_dstst(dst+scalar_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
    770             bits = *(src++);
    771             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    772             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    773             width--;
    774         }
    775 
    776         /* After all that work, here's the vector part! */
    777         extrawidth = (width % 4);
    778         width -= extrawidth;
    779         valigner = VEC_ALIGNER(src);
    780         vbits = vec_ld(0, src);
    781 
    782         while (width) {
    783             vec_dstt(src+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_SRC);
    784             vec_dstst(dst+vector_dst_lead, DST_CTRL(2,32,1024), DST_CHAN_DEST);
    785             voverflow = vec_ld(15, src);
    786             src += 4;
    787             width -= 4;
    788             vbits = vec_perm(vbits, voverflow, valigner);  /* src is ready. */
    789             vbits = vec_perm(vbits, vzero, vpermute);  /* swizzle it. */
    790             vec_st(vbits, 0, dst);  /* store it back out. */
    791             dst += 4;
    792             vbits = voverflow;
    793         }
    794 
    795         assert(width == 0);
    796 
    797         /* cover pixels at the end of the row that didn't fit in 16 bytes. */
    798         while (extrawidth) {
    799             bits = *(src++);  /* max 7 pixels, don't bother with prefetch. */
    800             RGBA_FROM_8888(bits, srcfmt, r, g, b, a);
    801             *(dst++) = MAKE8888(dstfmt, r, g, b, a);
    802             extrawidth--;
    803         }
    804 
    805         src += srcskip >> 2;  /* move to next row, accounting for pitch. */
    806         dst += dstskip >> 2;
    807     }
    808 
    809     vec_dss(DST_CHAN_SRC);
    810     vec_dss(DST_CHAN_DEST);
    811 }
    812 
    813 static Uint32 GetBlitFeatures( void )
    814 {
    815     static Uint32 features = 0xffffffff;
    816     if (features == 0xffffffff) {
    817         /* Provide an override for testing .. */
    818         char *override = SDL_getenv("SDL_ALTIVEC_BLIT_FEATURES");
    819         if (override) {
    820             features = 0;
    821             SDL_sscanf(override, "%u", &features);
    822         } else {
    823             features = ( 0
    824                 /* Feature 1 is has-MMX */
    825                 | ((SDL_HasMMX()) ? 1 : 0)
    826                 /* Feature 2 is has-AltiVec */
    827                 | ((SDL_HasAltiVec()) ? 2 : 0)
    828                 /* Feature 4 is dont-use-prefetch */
    829                 /* !!!! FIXME: Check for G5 or later, not the cache size! Always prefetch on a G4. */
    830                 | ((GetL3CacheSize() == 0) ? 4 : 0)
    831             );
    832         }
    833     }
    834     return features;
    835 }
    836 #if __MWERKS__
    837 #pragma altivec_model off
    838 #endif
    839 #else
    840 /* Feature 1 is has-MMX */
    841 #define GetBlitFeatures() ((Uint32)(SDL_HasMMX() ? 1 : 0))
    842 #endif
    843 
    844 /* This is now endian dependent */
    845 #if SDL_BYTEORDER == SDL_LIL_ENDIAN
    846 #define HI	1
    847 #define LO	0
    848 #else /* SDL_BYTEORDER == SDL_BIG_ENDIAN */
    849 #define HI	0
    850 #define LO	1
    851 #endif
    852 
    853 #if SDL_HERMES_BLITTERS
    854 
    855 /* Heheheh, we coerce Hermes into using SDL blit information */
    856 #define X86_ASSEMBLER
    857 #define HermesConverterInterface	SDL_BlitInfo
    858 #define HermesClearInterface		void
    859 #define STACKCALL
    860 
    861 #include "../hermes/HeadMMX.h"
    862 #include "../hermes/HeadX86.h"
    863 
    864 #else
    865 
    866 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
    867 #define RGB888_RGB332(dst, src) { \
    868 	dst = (Uint8)((((src)&0x00E00000)>>16)| \
    869 	              (((src)&0x0000E000)>>11)| \
    870 	              (((src)&0x000000C0)>>6)); \
    871 }
    872 static void Blit_RGB888_index8(SDL_BlitInfo *info)
    873 {
    874 #ifndef USE_DUFFS_LOOP
    875 	int c;
    876 #endif
    877 	int width, height;
    878 	Uint32 *src;
    879 	const Uint8 *map;
    880 	Uint8 *dst;
    881 	int srcskip, dstskip;
    882 
    883 	/* Set up some basic variables */
    884 	width = info->d_width;
    885 	height = info->d_height;
    886 	src = (Uint32 *)info->s_pixels;
    887 	srcskip = info->s_skip/4;
    888 	dst = info->d_pixels;
    889 	dstskip = info->d_skip;
    890 	map = info->table;
    891 
    892 	if ( map == NULL ) {
    893 		while ( height-- ) {
    894 #ifdef USE_DUFFS_LOOP
    895 			DUFFS_LOOP(
    896 				RGB888_RGB332(*dst++, *src);
    897 			, width);
    898 #else
    899 			for ( c=width/4; c; --c ) {
    900 				/* Pack RGB into 8bit pixel */
    901 				++src;
    902 				RGB888_RGB332(*dst++, *src);
    903 				++src;
    904 				RGB888_RGB332(*dst++, *src);
    905 				++src;
    906 				RGB888_RGB332(*dst++, *src);
    907 				++src;
    908 			}
    909 			switch ( width & 3 ) {
    910 				case 3:
    911 					RGB888_RGB332(*dst++, *src);
    912 					++src;
    913 				case 2:
    914 					RGB888_RGB332(*dst++, *src);
    915 					++src;
    916 				case 1:
    917 					RGB888_RGB332(*dst++, *src);
    918 					++src;
    919 			}
    920 #endif /* USE_DUFFS_LOOP */
    921 			src += srcskip;
    922 			dst += dstskip;
    923 		}
    924 	} else {
    925 		int Pixel;
    926 
    927 		while ( height-- ) {
    928 #ifdef USE_DUFFS_LOOP
    929 			DUFFS_LOOP(
    930 				RGB888_RGB332(Pixel, *src);
    931 				*dst++ = map[Pixel];
    932 				++src;
    933 			, width);
    934 #else
    935 			for ( c=width/4; c; --c ) {
    936 				/* Pack RGB into 8bit pixel */
    937 				RGB888_RGB332(Pixel, *src);
    938 				*dst++ = map[Pixel];
    939 				++src;
    940 				RGB888_RGB332(Pixel, *src);
    941 				*dst++ = map[Pixel];
    942 				++src;
    943 				RGB888_RGB332(Pixel, *src);
    944 				*dst++ = map[Pixel];
    945 				++src;
    946 				RGB888_RGB332(Pixel, *src);
    947 				*dst++ = map[Pixel];
    948 				++src;
    949 			}
    950 			switch ( width & 3 ) {
    951 				case 3:
    952 					RGB888_RGB332(Pixel, *src);
    953 					*dst++ = map[Pixel];
    954 					++src;
    955 				case 2:
    956 					RGB888_RGB332(Pixel, *src);
    957 					*dst++ = map[Pixel];
    958 					++src;
    959 				case 1:
    960 					RGB888_RGB332(Pixel, *src);
    961 					*dst++ = map[Pixel];
    962 					++src;
    963 			}
    964 #endif /* USE_DUFFS_LOOP */
    965 			src += srcskip;
    966 			dst += dstskip;
    967 		}
    968 	}
    969 }
    970 /* Special optimized blit for RGB 8-8-8 --> RGB 5-5-5 */
    971 #define RGB888_RGB555(dst, src) { \
    972 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>9)| \
    973 	                            (((*src)&0x0000F800)>>6)| \
    974 	                            (((*src)&0x000000F8)>>3)); \
    975 }
    976 #define RGB888_RGB555_TWO(dst, src) { \
    977 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>9)| \
    978 	                     (((src[HI])&0x0000F800)>>6)| \
    979 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
    980 	                     (((src[LO])&0x00F80000)>>9)| \
    981 	                     (((src[LO])&0x0000F800)>>6)| \
    982 	                     (((src[LO])&0x000000F8)>>3); \
    983 }
    984 static void Blit_RGB888_RGB555(SDL_BlitInfo *info)
    985 {
    986 #ifndef USE_DUFFS_LOOP
    987 	int c;
    988 #endif
    989 	int width, height;
    990 	Uint32 *src;
    991 	Uint16 *dst;
    992 	int srcskip, dstskip;
    993 
    994 	/* Set up some basic variables */
    995 	width = info->d_width;
    996 	height = info->d_height;
    997 	src = (Uint32 *)info->s_pixels;
    998 	srcskip = info->s_skip/4;
    999 	dst = (Uint16 *)info->d_pixels;
   1000 	dstskip = info->d_skip/2;
   1001 
   1002 #ifdef USE_DUFFS_LOOP
   1003 	while ( height-- ) {
   1004 		DUFFS_LOOP(
   1005 			RGB888_RGB555(dst, src);
   1006 			++src;
   1007 			++dst;
   1008 		, width);
   1009 		src += srcskip;
   1010 		dst += dstskip;
   1011 	}
   1012 #else
   1013 	/* Memory align at 4-byte boundary, if necessary */
   1014 	if ( (long)dst & 0x03 ) {
   1015 		/* Don't do anything if width is 0 */
   1016 		if ( width == 0 ) {
   1017 			return;
   1018 		}
   1019 		--width;
   1020 
   1021 		while ( height-- ) {
   1022 			/* Perform copy alignment */
   1023 			RGB888_RGB555(dst, src);
   1024 			++src;
   1025 			++dst;
   1026 
   1027 			/* Copy in 4 pixel chunks */
   1028 			for ( c=width/4; c; --c ) {
   1029 				RGB888_RGB555_TWO(dst, src);
   1030 				src += 2;
   1031 				dst += 2;
   1032 				RGB888_RGB555_TWO(dst, src);
   1033 				src += 2;
   1034 				dst += 2;
   1035 			}
   1036 			/* Get any leftovers */
   1037 			switch (width & 3) {
   1038 				case 3:
   1039 					RGB888_RGB555(dst, src);
   1040 					++src;
   1041 					++dst;
   1042 				case 2:
   1043 					RGB888_RGB555_TWO(dst, src);
   1044 					src += 2;
   1045 					dst += 2;
   1046 					break;
   1047 				case 1:
   1048 					RGB888_RGB555(dst, src);
   1049 					++src;
   1050 					++dst;
   1051 					break;
   1052 			}
   1053 			src += srcskip;
   1054 			dst += dstskip;
   1055 		}
   1056 	} else {
   1057 		while ( height-- ) {
   1058 			/* Copy in 4 pixel chunks */
   1059 			for ( c=width/4; c; --c ) {
   1060 				RGB888_RGB555_TWO(dst, src);
   1061 				src += 2;
   1062 				dst += 2;
   1063 				RGB888_RGB555_TWO(dst, src);
   1064 				src += 2;
   1065 				dst += 2;
   1066 			}
   1067 			/* Get any leftovers */
   1068 			switch (width & 3) {
   1069 				case 3:
   1070 					RGB888_RGB555(dst, src);
   1071 					++src;
   1072 					++dst;
   1073 				case 2:
   1074 					RGB888_RGB555_TWO(dst, src);
   1075 					src += 2;
   1076 					dst += 2;
   1077 					break;
   1078 				case 1:
   1079 					RGB888_RGB555(dst, src);
   1080 					++src;
   1081 					++dst;
   1082 					break;
   1083 			}
   1084 			src += srcskip;
   1085 			dst += dstskip;
   1086 		}
   1087 	}
   1088 #endif /* USE_DUFFS_LOOP */
   1089 }
   1090 /* Special optimized blit for RGB 8-8-8 --> RGB 5-6-5 */
   1091 #define RGB888_RGB565(dst, src) { \
   1092 	*(Uint16 *)(dst) = (Uint16)((((*src)&0x00F80000)>>8)| \
   1093 	                            (((*src)&0x0000FC00)>>5)| \
   1094 	                            (((*src)&0x000000F8)>>3)); \
   1095 }
   1096 #define RGB888_RGB565_TWO(dst, src) { \
   1097 	*(Uint32 *)(dst) = (((((src[HI])&0x00F80000)>>8)| \
   1098 	                     (((src[HI])&0x0000FC00)>>5)| \
   1099 	                     (((src[HI])&0x000000F8)>>3))<<16)| \
   1100 	                     (((src[LO])&0x00F80000)>>8)| \
   1101 	                     (((src[LO])&0x0000FC00)>>5)| \
   1102 	                     (((src[LO])&0x000000F8)>>3); \
   1103 }
   1104 static void Blit_RGB888_RGB565(SDL_BlitInfo *info)
   1105 {
   1106 #ifndef USE_DUFFS_LOOP
   1107 	int c;
   1108 #endif
   1109 	int width, height;
   1110 	Uint32 *src;
   1111 	Uint16 *dst;
   1112 	int srcskip, dstskip;
   1113 
   1114 	/* Set up some basic variables */
   1115 	width = info->d_width;
   1116 	height = info->d_height;
   1117 	src = (Uint32 *)info->s_pixels;
   1118 	srcskip = info->s_skip/4;
   1119 	dst = (Uint16 *)info->d_pixels;
   1120 	dstskip = info->d_skip/2;
   1121 
   1122 #ifdef USE_DUFFS_LOOP
   1123 	while ( height-- ) {
   1124 		DUFFS_LOOP(
   1125 			RGB888_RGB565(dst, src);
   1126 			++src;
   1127 			++dst;
   1128 		, width);
   1129 		src += srcskip;
   1130 		dst += dstskip;
   1131 	}
   1132 #else
   1133 	/* Memory align at 4-byte boundary, if necessary */
   1134 	if ( (long)dst & 0x03 ) {
   1135 		/* Don't do anything if width is 0 */
   1136 		if ( width == 0 ) {
   1137 			return;
   1138 		}
   1139 		--width;
   1140 
   1141 		while ( height-- ) {
   1142 			/* Perform copy alignment */
   1143 			RGB888_RGB565(dst, src);
   1144 			++src;
   1145 			++dst;
   1146 
   1147 			/* Copy in 4 pixel chunks */
   1148 			for ( c=width/4; c; --c ) {
   1149 				RGB888_RGB565_TWO(dst, src);
   1150 				src += 2;
   1151 				dst += 2;
   1152 				RGB888_RGB565_TWO(dst, src);
   1153 				src += 2;
   1154 				dst += 2;
   1155 			}
   1156 			/* Get any leftovers */
   1157 			switch (width & 3) {
   1158 				case 3:
   1159 					RGB888_RGB565(dst, src);
   1160 					++src;
   1161 					++dst;
   1162 				case 2:
   1163 					RGB888_RGB565_TWO(dst, src);
   1164 					src += 2;
   1165 					dst += 2;
   1166 					break;
   1167 				case 1:
   1168 					RGB888_RGB565(dst, src);
   1169 					++src;
   1170 					++dst;
   1171 					break;
   1172 			}
   1173 			src += srcskip;
   1174 			dst += dstskip;
   1175 		}
   1176 	} else {
   1177 		while ( height-- ) {
   1178 			/* Copy in 4 pixel chunks */
   1179 			for ( c=width/4; c; --c ) {
   1180 				RGB888_RGB565_TWO(dst, src);
   1181 				src += 2;
   1182 				dst += 2;
   1183 				RGB888_RGB565_TWO(dst, src);
   1184 				src += 2;
   1185 				dst += 2;
   1186 			}
   1187 			/* Get any leftovers */
   1188 			switch (width & 3) {
   1189 				case 3:
   1190 					RGB888_RGB565(dst, src);
   1191 					++src;
   1192 					++dst;
   1193 				case 2:
   1194 					RGB888_RGB565_TWO(dst, src);
   1195 					src += 2;
   1196 					dst += 2;
   1197 					break;
   1198 				case 1:
   1199 					RGB888_RGB565(dst, src);
   1200 					++src;
   1201 					++dst;
   1202 					break;
   1203 			}
   1204 			src += srcskip;
   1205 			dst += dstskip;
   1206 		}
   1207 	}
   1208 #endif /* USE_DUFFS_LOOP */
   1209 }
   1210 
   1211 #endif /* SDL_HERMES_BLITTERS */
   1212 
   1213 
   1214 /* Special optimized blit for RGB 5-6-5 --> 32-bit RGB surfaces */
   1215 #define RGB565_32(dst, src, map) (map[src[LO]*2] + map[src[HI]*2+1])
   1216 static void Blit_RGB565_32(SDL_BlitInfo *info, const Uint32 *map)
   1217 {
   1218 #ifndef USE_DUFFS_LOOP
   1219 	int c;
   1220 #endif
   1221 	int width, height;
   1222 	Uint8 *src;
   1223 	Uint32 *dst;
   1224 	int srcskip, dstskip;
   1225 
   1226 	/* Set up some basic variables */
   1227 	width = info->d_width;
   1228 	height = info->d_height;
   1229 	src = (Uint8 *)info->s_pixels;
   1230 	srcskip = info->s_skip;
   1231 	dst = (Uint32 *)info->d_pixels;
   1232 	dstskip = info->d_skip/4;
   1233 
   1234 #ifdef USE_DUFFS_LOOP
   1235 	while ( height-- ) {
   1236 		DUFFS_LOOP(
   1237 		{
   1238 			*dst++ = RGB565_32(dst, src, map);
   1239 			src += 2;
   1240 		},
   1241 		width);
   1242 		src += srcskip;
   1243 		dst += dstskip;
   1244 	}
   1245 #else
   1246 	while ( height-- ) {
   1247 		/* Copy in 4 pixel chunks */
   1248 		for ( c=width/4; c; --c ) {
   1249 			*dst++ = RGB565_32(dst, src, map);
   1250 			src += 2;
   1251 			*dst++ = RGB565_32(dst, src, map);
   1252 			src += 2;
   1253 			*dst++ = RGB565_32(dst, src, map);
   1254 			src += 2;
   1255 			*dst++ = RGB565_32(dst, src, map);
   1256 			src += 2;
   1257 		}
   1258 		/* Get any leftovers */
   1259 		switch (width & 3) {
   1260 			case 3:
   1261 				*dst++ = RGB565_32(dst, src, map);
   1262 				src += 2;
   1263 			case 2:
   1264 				*dst++ = RGB565_32(dst, src, map);
   1265 				src += 2;
   1266 			case 1:
   1267 				*dst++ = RGB565_32(dst, src, map);
   1268 				src += 2;
   1269 				break;
   1270 		}
   1271 		src += srcskip;
   1272 		dst += dstskip;
   1273 	}
   1274 #endif /* USE_DUFFS_LOOP */
   1275 }
   1276 
   1277 /* Special optimized blit for RGB 5-6-5 --> ARGB 8-8-8-8 */
   1278 static const Uint32 RGB565_ARGB8888_LUT[512] = {
   1279 		0x00000000, 0xff000000, 0x00000008, 0xff002000,
   1280 		0x00000010, 0xff004000, 0x00000018, 0xff006100,
   1281 		0x00000020, 0xff008100, 0x00000029, 0xff00a100,
   1282 		0x00000031, 0xff00c200, 0x00000039, 0xff00e200,
   1283 		0x00000041, 0xff080000, 0x0000004a, 0xff082000,
   1284 		0x00000052, 0xff084000, 0x0000005a, 0xff086100,
   1285 		0x00000062, 0xff088100, 0x0000006a, 0xff08a100,
   1286 		0x00000073, 0xff08c200, 0x0000007b, 0xff08e200,
   1287 		0x00000083, 0xff100000, 0x0000008b, 0xff102000,
   1288 		0x00000094, 0xff104000, 0x0000009c, 0xff106100,
   1289 		0x000000a4, 0xff108100, 0x000000ac, 0xff10a100,
   1290 		0x000000b4, 0xff10c200, 0x000000bd, 0xff10e200,
   1291 		0x000000c5, 0xff180000, 0x000000cd, 0xff182000,
   1292 		0x000000d5, 0xff184000, 0x000000de, 0xff186100,
   1293 		0x000000e6, 0xff188100, 0x000000ee, 0xff18a100,
   1294 		0x000000f6, 0xff18c200, 0x000000ff, 0xff18e200,
   1295 		0x00000400, 0xff200000, 0x00000408, 0xff202000,
   1296 		0x00000410, 0xff204000, 0x00000418, 0xff206100,
   1297 		0x00000420, 0xff208100, 0x00000429, 0xff20a100,
   1298 		0x00000431, 0xff20c200, 0x00000439, 0xff20e200,
   1299 		0x00000441, 0xff290000, 0x0000044a, 0xff292000,
   1300 		0x00000452, 0xff294000, 0x0000045a, 0xff296100,
   1301 		0x00000462, 0xff298100, 0x0000046a, 0xff29a100,
   1302 		0x00000473, 0xff29c200, 0x0000047b, 0xff29e200,
   1303 		0x00000483, 0xff310000, 0x0000048b, 0xff312000,
   1304 		0x00000494, 0xff314000, 0x0000049c, 0xff316100,
   1305 		0x000004a4, 0xff318100, 0x000004ac, 0xff31a100,
   1306 		0x000004b4, 0xff31c200, 0x000004bd, 0xff31e200,
   1307 		0x000004c5, 0xff390000, 0x000004cd, 0xff392000,
   1308 		0x000004d5, 0xff394000, 0x000004de, 0xff396100,
   1309 		0x000004e6, 0xff398100, 0x000004ee, 0xff39a100,
   1310 		0x000004f6, 0xff39c200, 0x000004ff, 0xff39e200,
   1311 		0x00000800, 0xff410000, 0x00000808, 0xff412000,
   1312 		0x00000810, 0xff414000, 0x00000818, 0xff416100,
   1313 		0x00000820, 0xff418100, 0x00000829, 0xff41a100,
   1314 		0x00000831, 0xff41c200, 0x00000839, 0xff41e200,
   1315 		0x00000841, 0xff4a0000, 0x0000084a, 0xff4a2000,
   1316 		0x00000852, 0xff4a4000, 0x0000085a, 0xff4a6100,
   1317 		0x00000862, 0xff4a8100, 0x0000086a, 0xff4aa100,
   1318 		0x00000873, 0xff4ac200, 0x0000087b, 0xff4ae200,
   1319 		0x00000883, 0xff520000, 0x0000088b, 0xff522000,
   1320 		0x00000894, 0xff524000, 0x0000089c, 0xff526100,
   1321 		0x000008a4, 0xff528100, 0x000008ac, 0xff52a100,
   1322 		0x000008b4, 0xff52c200, 0x000008bd, 0xff52e200,
   1323 		0x000008c5, 0xff5a0000, 0x000008cd, 0xff5a2000,
   1324 		0x000008d5, 0xff5a4000, 0x000008de, 0xff5a6100,
   1325 		0x000008e6, 0xff5a8100, 0x000008ee, 0xff5aa100,
   1326 		0x000008f6, 0xff5ac200, 0x000008ff, 0xff5ae200,
   1327 		0x00000c00, 0xff620000, 0x00000c08, 0xff622000,
   1328 		0x00000c10, 0xff624000, 0x00000c18, 0xff626100,
   1329 		0x00000c20, 0xff628100, 0x00000c29, 0xff62a100,
   1330 		0x00000c31, 0xff62c200, 0x00000c39, 0xff62e200,
   1331 		0x00000c41, 0xff6a0000, 0x00000c4a, 0xff6a2000,
   1332 		0x00000c52, 0xff6a4000, 0x00000c5a, 0xff6a6100,
   1333 		0x00000c62, 0xff6a8100, 0x00000c6a, 0xff6aa100,
   1334 		0x00000c73, 0xff6ac200, 0x00000c7b, 0xff6ae200,
   1335 		0x00000c83, 0xff730000, 0x00000c8b, 0xff732000,
   1336 		0x00000c94, 0xff734000, 0x00000c9c, 0xff736100,
   1337 		0x00000ca4, 0xff738100, 0x00000cac, 0xff73a100,
   1338 		0x00000cb4, 0xff73c200, 0x00000cbd, 0xff73e200,
   1339 		0x00000cc5, 0xff7b0000, 0x00000ccd, 0xff7b2000,
   1340 		0x00000cd5, 0xff7b4000, 0x00000cde, 0xff7b6100,
   1341 		0x00000ce6, 0xff7b8100, 0x00000cee, 0xff7ba100,
   1342 		0x00000cf6, 0xff7bc200, 0x00000cff, 0xff7be200,
   1343 		0x00001000, 0xff830000, 0x00001008, 0xff832000,
   1344 		0x00001010, 0xff834000, 0x00001018, 0xff836100,
   1345 		0x00001020, 0xff838100, 0x00001029, 0xff83a100,
   1346 		0x00001031, 0xff83c200, 0x00001039, 0xff83e200,
   1347 		0x00001041, 0xff8b0000, 0x0000104a, 0xff8b2000,
   1348 		0x00001052, 0xff8b4000, 0x0000105a, 0xff8b6100,
   1349 		0x00001062, 0xff8b8100, 0x0000106a, 0xff8ba100,
   1350 		0x00001073, 0xff8bc200, 0x0000107b, 0xff8be200,
   1351 		0x00001083, 0xff940000, 0x0000108b, 0xff942000,
   1352 		0x00001094, 0xff944000, 0x0000109c, 0xff946100,
   1353 		0x000010a4, 0xff948100, 0x000010ac, 0xff94a100,
   1354 		0x000010b4, 0xff94c200, 0x000010bd, 0xff94e200,
   1355 		0x000010c5, 0xff9c0000, 0x000010cd, 0xff9c2000,
   1356 		0x000010d5, 0xff9c4000, 0x000010de, 0xff9c6100,
   1357 		0x000010e6, 0xff9c8100, 0x000010ee, 0xff9ca100,
   1358 		0x000010f6, 0xff9cc200, 0x000010ff, 0xff9ce200,
   1359 		0x00001400, 0xffa40000, 0x00001408, 0xffa42000,
   1360 		0x00001410, 0xffa44000, 0x00001418, 0xffa46100,
   1361 		0x00001420, 0xffa48100, 0x00001429, 0xffa4a100,
   1362 		0x00001431, 0xffa4c200, 0x00001439, 0xffa4e200,
   1363 		0x00001441, 0xffac0000, 0x0000144a, 0xffac2000,
   1364 		0x00001452, 0xffac4000, 0x0000145a, 0xffac6100,
   1365 		0x00001462, 0xffac8100, 0x0000146a, 0xffaca100,
   1366 		0x00001473, 0xffacc200, 0x0000147b, 0xfface200,
   1367 		0x00001483, 0xffb40000, 0x0000148b, 0xffb42000,
   1368 		0x00001494, 0xffb44000, 0x0000149c, 0xffb46100,
   1369 		0x000014a4, 0xffb48100, 0x000014ac, 0xffb4a100,
   1370 		0x000014b4, 0xffb4c200, 0x000014bd, 0xffb4e200,
   1371 		0x000014c5, 0xffbd0000, 0x000014cd, 0xffbd2000,
   1372 		0x000014d5, 0xffbd4000, 0x000014de, 0xffbd6100,
   1373 		0x000014e6, 0xffbd8100, 0x000014ee, 0xffbda100,
   1374 		0x000014f6, 0xffbdc200, 0x000014ff, 0xffbde200,
   1375 		0x00001800, 0xffc50000, 0x00001808, 0xffc52000,
   1376 		0x00001810, 0xffc54000, 0x00001818, 0xffc56100,
   1377 		0x00001820, 0xffc58100, 0x00001829, 0xffc5a100,
   1378 		0x00001831, 0xffc5c200, 0x00001839, 0xffc5e200,
   1379 		0x00001841, 0xffcd0000, 0x0000184a, 0xffcd2000,
   1380 		0x00001852, 0xffcd4000, 0x0000185a, 0xffcd6100,
   1381 		0x00001862, 0xffcd8100, 0x0000186a, 0xffcda100,
   1382 		0x00001873, 0xffcdc200, 0x0000187b, 0xffcde200,
   1383 		0x00001883, 0xffd50000, 0x0000188b, 0xffd52000,
   1384 		0x00001894, 0xffd54000, 0x0000189c, 0xffd56100,
   1385 		0x000018a4, 0xffd58100, 0x000018ac, 0xffd5a100,
   1386 		0x000018b4, 0xffd5c200, 0x000018bd, 0xffd5e200,
   1387 		0x000018c5, 0xffde0000, 0x000018cd, 0xffde2000,
   1388 		0x000018d5, 0xffde4000, 0x000018de, 0xffde6100,
   1389 		0x000018e6, 0xffde8100, 0x000018ee, 0xffdea100,
   1390 		0x000018f6, 0xffdec200, 0x000018ff, 0xffdee200,
   1391 		0x00001c00, 0xffe60000, 0x00001c08, 0xffe62000,
   1392 		0x00001c10, 0xffe64000, 0x00001c18, 0xffe66100,
   1393 		0x00001c20, 0xffe68100, 0x00001c29, 0xffe6a100,
   1394 		0x00001c31, 0xffe6c200, 0x00001c39, 0xffe6e200,
   1395 		0x00001c41, 0xffee0000, 0x00001c4a, 0xffee2000,
   1396 		0x00001c52, 0xffee4000, 0x00001c5a, 0xffee6100,
   1397 		0x00001c62, 0xffee8100, 0x00001c6a, 0xffeea100,
   1398 		0x00001c73, 0xffeec200, 0x00001c7b, 0xffeee200,
   1399 		0x00001c83, 0xfff60000, 0x00001c8b, 0xfff62000,
   1400 		0x00001c94, 0xfff64000, 0x00001c9c, 0xfff66100,
   1401 		0x00001ca4, 0xfff68100, 0x00001cac, 0xfff6a100,
   1402 		0x00001cb4, 0xfff6c200, 0x00001cbd, 0xfff6e200,
   1403 		0x00001cc5, 0xffff0000, 0x00001ccd, 0xffff2000,
   1404 		0x00001cd5, 0xffff4000, 0x00001cde, 0xffff6100,
   1405 		0x00001ce6, 0xffff8100, 0x00001cee, 0xffffa100,
   1406 		0x00001cf6, 0xffffc200, 0x00001cff, 0xffffe200
   1407 };
   1408 static void Blit_RGB565_ARGB8888(SDL_BlitInfo *info)
   1409 {
   1410     Blit_RGB565_32(info, RGB565_ARGB8888_LUT);
   1411 }
   1412 
   1413 /* Special optimized blit for RGB 5-6-5 --> ABGR 8-8-8-8 */
   1414 static const Uint32 RGB565_ABGR8888_LUT[512] = {
   1415 		0xff000000, 0x00000000, 0xff080000, 0x00002000,
   1416 		0xff100000, 0x00004000, 0xff180000, 0x00006100,
   1417 		0xff200000, 0x00008100, 0xff290000, 0x0000a100,
   1418 		0xff310000, 0x0000c200, 0xff390000, 0x0000e200,
   1419 		0xff410000, 0x00000008, 0xff4a0000, 0x00002008,
   1420 		0xff520000, 0x00004008, 0xff5a0000, 0x00006108,
   1421 		0xff620000, 0x00008108, 0xff6a0000, 0x0000a108,
   1422 		0xff730000, 0x0000c208, 0xff7b0000, 0x0000e208,
   1423 		0xff830000, 0x00000010, 0xff8b0000, 0x00002010,
   1424 		0xff940000, 0x00004010, 0xff9c0000, 0x00006110,
   1425 		0xffa40000, 0x00008110, 0xffac0000, 0x0000a110,
   1426 		0xffb40000, 0x0000c210, 0xffbd0000, 0x0000e210,
   1427 		0xffc50000, 0x00000018, 0xffcd0000, 0x00002018,
   1428 		0xffd50000, 0x00004018, 0xffde0000, 0x00006118,
   1429 		0xffe60000, 0x00008118, 0xffee0000, 0x0000a118,
   1430 		0xfff60000, 0x0000c218, 0xffff0000, 0x0000e218,
   1431 		0xff000400, 0x00000020, 0xff080400, 0x00002020,
   1432 		0xff100400, 0x00004020, 0xff180400, 0x00006120,
   1433 		0xff200400, 0x00008120, 0xff290400, 0x0000a120,
   1434 		0xff310400, 0x0000c220, 0xff390400, 0x0000e220,
   1435 		0xff410400, 0x00000029, 0xff4a0400, 0x00002029,
   1436 		0xff520400, 0x00004029, 0xff5a0400, 0x00006129,
   1437 		0xff620400, 0x00008129, 0xff6a0400, 0x0000a129,
   1438 		0xff730400, 0x0000c229, 0xff7b0400, 0x0000e229,
   1439 		0xff830400, 0x00000031, 0xff8b0400, 0x00002031,
   1440 		0xff940400, 0x00004031, 0xff9c0400, 0x00006131,
   1441 		0xffa40400, 0x00008131, 0xffac0400, 0x0000a131,
   1442 		0xffb40400, 0x0000c231, 0xffbd0400, 0x0000e231,
   1443 		0xffc50400, 0x00000039, 0xffcd0400, 0x00002039,
   1444 		0xffd50400, 0x00004039, 0xffde0400, 0x00006139,
   1445 		0xffe60400, 0x00008139, 0xffee0400, 0x0000a139,
   1446 		0xfff60400, 0x0000c239, 0xffff0400, 0x0000e239,
   1447 		0xff000800, 0x00000041, 0xff080800, 0x00002041,
   1448 		0xff100800, 0x00004041, 0xff180800, 0x00006141,
   1449 		0xff200800, 0x00008141, 0xff290800, 0x0000a141,
   1450 		0xff310800, 0x0000c241, 0xff390800, 0x0000e241,
   1451 		0xff410800, 0x0000004a, 0xff4a0800, 0x0000204a,
   1452 		0xff520800, 0x0000404a, 0xff5a0800, 0x0000614a,
   1453 		0xff620800, 0x0000814a, 0xff6a0800, 0x0000a14a,
   1454 		0xff730800, 0x0000c24a, 0xff7b0800, 0x0000e24a,
   1455 		0xff830800, 0x00000052, 0xff8b0800, 0x00002052,
   1456 		0xff940800, 0x00004052, 0xff9c0800, 0x00006152,
   1457 		0xffa40800, 0x00008152, 0xffac0800, 0x0000a152,
   1458 		0xffb40800, 0x0000c252, 0xffbd0800, 0x0000e252,
   1459 		0xffc50800, 0x0000005a, 0xffcd0800, 0x0000205a,
   1460 		0xffd50800, 0x0000405a, 0xffde0800, 0x0000615a,
   1461 		0xffe60800, 0x0000815a, 0xffee0800, 0x0000a15a,
   1462 		0xfff60800, 0x0000c25a, 0xffff0800, 0x0000e25a,
   1463 		0xff000c00, 0x00000062, 0xff080c00, 0x00002062,
   1464 		0xff100c00, 0x00004062, 0xff180c00, 0x00006162,
   1465 		0xff200c00, 0x00008162, 0xff290c00, 0x0000a162,
   1466 		0xff310c00, 0x0000c262, 0xff390c00, 0x0000e262,
   1467 		0xff410c00, 0x0000006a, 0xff4a0c00, 0x0000206a,
   1468 		0xff520c00, 0x0000406a, 0xff5a0c00, 0x0000616a,
   1469 		0xff620c00, 0x0000816a, 0xff6a0c00, 0x0000a16a,
   1470 		0xff730c00, 0x0000c26a, 0xff7b0c00, 0x0000e26a,
   1471 		0xff830c00, 0x00000073, 0xff8b0c00, 0x00002073,
   1472 		0xff940c00, 0x00004073, 0xff9c0c00, 0x00006173,
   1473 		0xffa40c00, 0x00008173, 0xffac0c00, 0x0000a173,
   1474 		0xffb40c00, 0x0000c273, 0xffbd0c00, 0x0000e273,
   1475 		0xffc50c00, 0x0000007b, 0xffcd0c00, 0x0000207b,
   1476 		0xffd50c00, 0x0000407b, 0xffde0c00, 0x0000617b,
   1477 		0xffe60c00, 0x0000817b, 0xffee0c00, 0x0000a17b,
   1478 		0xfff60c00, 0x0000c27b, 0xffff0c00, 0x0000e27b,
   1479 		0xff001000, 0x00000083, 0xff081000, 0x00002083,
   1480 		0xff101000, 0x00004083, 0xff181000, 0x00006183,
   1481 		0xff201000, 0x00008183, 0xff291000, 0x0000a183,
   1482 		0xff311000, 0x0000c283, 0xff391000, 0x0000e283,
   1483 		0xff411000, 0x0000008b, 0xff4a1000, 0x0000208b,
   1484 		0xff521000, 0x0000408b, 0xff5a1000, 0x0000618b,
   1485 		0xff621000, 0x0000818b, 0xff6a1000, 0x0000a18b,
   1486 		0xff731000, 0x0000c28b, 0xff7b1000, 0x0000e28b,
   1487 		0xff831000, 0x00000094, 0xff8b1000, 0x00002094,
   1488 		0xff941000, 0x00004094, 0xff9c1000, 0x00006194,
   1489 		0xffa41000, 0x00008194, 0xffac1000, 0x0000a194,
   1490 		0xffb41000, 0x0000c294, 0xffbd1000, 0x0000e294,
   1491 		0xffc51000, 0x0000009c, 0xffcd1000, 0x0000209c,
   1492 		0xffd51000, 0x0000409c, 0xffde1000, 0x0000619c,
   1493 		0xffe61000, 0x0000819c, 0xffee1000, 0x0000a19c,
   1494 		0xfff61000, 0x0000c29c, 0xffff1000, 0x0000e29c,
   1495 		0xff001400, 0x000000a4, 0xff081400, 0x000020a4,
   1496 		0xff101400, 0x000040a4, 0xff181400, 0x000061a4,
   1497 		0xff201400, 0x000081a4, 0xff291400, 0x0000a1a4,
   1498 		0xff311400, 0x0000c2a4, 0xff391400, 0x0000e2a4,
   1499 		0xff411400, 0x000000ac, 0xff4a1400, 0x000020ac,
   1500 		0xff521400, 0x000040ac, 0xff5a1400, 0x000061ac,
   1501 		0xff621400, 0x000081ac, 0xff6a1400, 0x0000a1ac,
   1502 		0xff731400, 0x0000c2ac, 0xff7b1400, 0x0000e2ac,
   1503 		0xff831400, 0x000000b4, 0xff8b1400, 0x000020b4,
   1504 		0xff941400, 0x000040b4, 0xff9c1400, 0x000061b4,
   1505 		0xffa41400, 0x000081b4, 0xffac1400, 0x0000a1b4,
   1506 		0xffb41400, 0x0000c2b4, 0xffbd1400, 0x0000e2b4,
   1507 		0xffc51400, 0x000000bd, 0xffcd1400, 0x000020bd,
   1508 		0xffd51400, 0x000040bd, 0xffde1400, 0x000061bd,
   1509 		0xffe61400, 0x000081bd, 0xffee1400, 0x0000a1bd,
   1510 		0xfff61400, 0x0000c2bd, 0xffff1400, 0x0000e2bd,
   1511 		0xff001800, 0x000000c5, 0xff081800, 0x000020c5,
   1512 		0xff101800, 0x000040c5, 0xff181800, 0x000061c5,
   1513 		0xff201800, 0x000081c5, 0xff291800, 0x0000a1c5,
   1514 		0xff311800, 0x0000c2c5, 0xff391800, 0x0000e2c5,
   1515 		0xff411800, 0x000000cd, 0xff4a1800, 0x000020cd,
   1516 		0xff521800, 0x000040cd, 0xff5a1800, 0x000061cd,
   1517 		0xff621800, 0x000081cd, 0xff6a1800, 0x0000a1cd,
   1518 		0xff731800, 0x0000c2cd, 0xff7b1800, 0x0000e2cd,
   1519 		0xff831800, 0x000000d5, 0xff8b1800, 0x000020d5,
   1520 		0xff941800, 0x000040d5, 0xff9c1800, 0x000061d5,
   1521 		0xffa41800, 0x000081d5, 0xffac1800, 0x0000a1d5,
   1522 		0xffb41800, 0x0000c2d5, 0xffbd1800, 0x0000e2d5,
   1523 		0xffc51800, 0x000000de, 0xffcd1800, 0x000020de,
   1524 		0xffd51800, 0x000040de, 0xffde1800, 0x000061de,
   1525 		0xffe61800, 0x000081de, 0xffee1800, 0x0000a1de,
   1526 		0xfff61800, 0x0000c2de, 0xffff1800, 0x0000e2de,
   1527 		0xff001c00, 0x000000e6, 0xff081c00, 0x000020e6,
   1528 		0xff101c00, 0x000040e6, 0xff181c00, 0x000061e6,
   1529 		0xff201c00, 0x000081e6, 0xff291c00, 0x0000a1e6,
   1530 		0xff311c00, 0x0000c2e6, 0xff391c00, 0x0000e2e6,
   1531 		0xff411c00, 0x000000ee, 0xff4a1c00, 0x000020ee,
   1532 		0xff521c00, 0x000040ee, 0xff5a1c00, 0x000061ee,
   1533 		0xff621c00, 0x000081ee, 0xff6a1c00, 0x0000a1ee,
   1534 		0xff731c00, 0x0000c2ee, 0xff7b1c00, 0x0000e2ee,
   1535 		0xff831c00, 0x000000f6, 0xff8b1c00, 0x000020f6,
   1536 		0xff941c00, 0x000040f6, 0xff9c1c00, 0x000061f6,
   1537 		0xffa41c00, 0x000081f6, 0xffac1c00, 0x0000a1f6,
   1538 		0xffb41c00, 0x0000c2f6, 0xffbd1c00, 0x0000e2f6,
   1539 		0xffc51c00, 0x000000ff, 0xffcd1c00, 0x000020ff,
   1540 		0xffd51c00, 0x000040ff, 0xffde1c00, 0x000061ff,
   1541 		0xffe61c00, 0x000081ff, 0xffee1c00, 0x0000a1ff,
   1542 		0xfff61c00, 0x0000c2ff, 0xffff1c00, 0x0000e2ff
   1543 };
   1544 static void Blit_RGB565_ABGR8888(SDL_BlitInfo *info)
   1545 {
   1546     Blit_RGB565_32(info, RGB565_ABGR8888_LUT);
   1547 }
   1548 
   1549 /* Special optimized blit for RGB 5-6-5 --> RGBA 8-8-8-8 */
   1550 static const Uint32 RGB565_RGBA8888_LUT[512] = {
   1551 		0x000000ff, 0x00000000, 0x000008ff, 0x00200000,
   1552 		0x000010ff, 0x00400000, 0x000018ff, 0x00610000,
   1553 		0x000020ff, 0x00810000, 0x000029ff, 0x00a10000,
   1554 		0x000031ff, 0x00c20000, 0x000039ff, 0x00e20000,
   1555 		0x000041ff, 0x08000000, 0x00004aff, 0x08200000,
   1556 		0x000052ff, 0x08400000, 0x00005aff, 0x08610000,
   1557 		0x000062ff, 0x08810000, 0x00006aff, 0x08a10000,
   1558 		0x000073ff, 0x08c20000, 0x00007bff, 0x08e20000,
   1559 		0x000083ff, 0x10000000, 0x00008bff, 0x10200000,
   1560 		0x000094ff, 0x10400000, 0x00009cff, 0x10610000,
   1561 		0x0000a4ff, 0x10810000, 0x0000acff, 0x10a10000,
   1562 		0x0000b4ff, 0x10c20000, 0x0000bdff, 0x10e20000,
   1563 		0x0000c5ff, 0x18000000, 0x0000cdff, 0x18200000,
   1564 		0x0000d5ff, 0x18400000, 0x0000deff, 0x18610000,
   1565 		0x0000e6ff, 0x18810000, 0x0000eeff, 0x18a10000,
   1566 		0x0000f6ff, 0x18c20000, 0x0000ffff, 0x18e20000,
   1567 		0x000400ff, 0x20000000, 0x000408ff, 0x20200000,
   1568 		0x000410ff, 0x20400000, 0x000418ff, 0x20610000,
   1569 		0x000420ff, 0x20810000, 0x000429ff, 0x20a10000,
   1570 		0x000431ff, 0x20c20000, 0x000439ff, 0x20e20000,
   1571 		0x000441ff, 0x29000000, 0x00044aff, 0x29200000,
   1572 		0x000452ff, 0x29400000, 0x00045aff, 0x29610000,
   1573 		0x000462ff, 0x29810000, 0x00046aff, 0x29a10000,
   1574 		0x000473ff, 0x29c20000, 0x00047bff, 0x29e20000,
   1575 		0x000483ff, 0x31000000, 0x00048bff, 0x31200000,
   1576 		0x000494ff, 0x31400000, 0x00049cff, 0x31610000,
   1577 		0x0004a4ff, 0x31810000, 0x0004acff, 0x31a10000,
   1578 		0x0004b4ff, 0x31c20000, 0x0004bdff, 0x31e20000,
   1579 		0x0004c5ff, 0x39000000, 0x0004cdff, 0x39200000,
   1580 		0x0004d5ff, 0x39400000, 0x0004deff, 0x39610000,
   1581 		0x0004e6ff, 0x39810000, 0x0004eeff, 0x39a10000,
   1582 		0x0004f6ff, 0x39c20000, 0x0004ffff, 0x39e20000,
   1583 		0x000800ff, 0x41000000, 0x000808ff, 0x41200000,
   1584 		0x000810ff, 0x41400000, 0x000818ff, 0x41610000,
   1585 		0x000820ff, 0x41810000, 0x000829ff, 0x41a10000,
   1586 		0x000831ff, 0x41c20000, 0x000839ff, 0x41e20000,
   1587 		0x000841ff, 0x4a000000, 0x00084aff, 0x4a200000,
   1588 		0x000852ff, 0x4a400000, 0x00085aff, 0x4a610000,
   1589 		0x000862ff, 0x4a810000, 0x00086aff, 0x4aa10000,
   1590 		0x000873ff, 0x4ac20000, 0x00087bff, 0x4ae20000,
   1591 		0x000883ff, 0x52000000, 0x00088bff, 0x52200000,
   1592 		0x000894ff, 0x52400000, 0x00089cff, 0x52610000,
   1593 		0x0008a4ff, 0x52810000, 0x0008acff, 0x52a10000,
   1594 		0x0008b4ff, 0x52c20000, 0x0008bdff, 0x52e20000,
   1595 		0x0008c5ff, 0x5a000000, 0x0008cdff, 0x5a200000,
   1596 		0x0008d5ff, 0x5a400000, 0x0008deff, 0x5a610000,
   1597 		0x0008e6ff, 0x5a810000, 0x0008eeff, 0x5aa10000,
   1598 		0x0008f6ff, 0x5ac20000, 0x0008ffff, 0x5ae20000,
   1599 		0x000c00ff, 0x62000000, 0x000c08ff, 0x62200000,
   1600 		0x000c10ff, 0x62400000, 0x000c18ff, 0x62610000,
   1601 		0x000c20ff, 0x62810000, 0x000c29ff, 0x62a10000,
   1602 		0x000c31ff, 0x62c20000, 0x000c39ff, 0x62e20000,
   1603 		0x000c41ff, 0x6a000000, 0x000c4aff, 0x6a200000,
   1604 		0x000c52ff, 0x6a400000, 0x000c5aff, 0x6a610000,
   1605 		0x000c62ff, 0x6a810000, 0x000c6aff, 0x6aa10000,
   1606 		0x000c73ff, 0x6ac20000, 0x000c7bff, 0x6ae20000,
   1607 		0x000c83ff, 0x73000000, 0x000c8bff, 0x73200000,
   1608 		0x000c94ff, 0x73400000, 0x000c9cff, 0x73610000,
   1609 		0x000ca4ff, 0x73810000, 0x000cacff, 0x73a10000,
   1610 		0x000cb4ff, 0x73c20000, 0x000cbdff, 0x73e20000,
   1611 		0x000cc5ff, 0x7b000000, 0x000ccdff, 0x7b200000,
   1612 		0x000cd5ff, 0x7b400000, 0x000cdeff, 0x7b610000,
   1613 		0x000ce6ff, 0x7b810000, 0x000ceeff, 0x7ba10000,
   1614 		0x000cf6ff, 0x7bc20000, 0x000cffff, 0x7be20000,
   1615 		0x001000ff, 0x83000000, 0x001008ff, 0x83200000,
   1616 		0x001010ff, 0x83400000, 0x001018ff, 0x83610000,
   1617 		0x001020ff, 0x83810000, 0x001029ff, 0x83a10000,
   1618 		0x001031ff, 0x83c20000, 0x001039ff, 0x83e20000,
   1619 		0x001041ff, 0x8b000000, 0x00104aff, 0x8b200000,
   1620 		0x001052ff, 0x8b400000, 0x00105aff, 0x8b610000,
   1621 		0x001062ff, 0x8b810000, 0x00106aff, 0x8ba10000,
   1622 		0x001073ff, 0x8bc20000, 0x00107bff, 0x8be20000,
   1623 		0x001083ff, 0x94000000, 0x00108bff, 0x94200000,
   1624 		0x001094ff, 0x94400000, 0x00109cff, 0x94610000,
   1625 		0x0010a4ff, 0x94810000, 0x0010acff, 0x94a10000,
   1626 		0x0010b4ff, 0x94c20000, 0x0010bdff, 0x94e20000,
   1627 		0x0010c5ff, 0x9c000000, 0x0010cdff, 0x9c200000,
   1628 		0x0010d5ff, 0x9c400000, 0x0010deff, 0x9c610000,
   1629 		0x0010e6ff, 0x9c810000, 0x0010eeff, 0x9ca10000,
   1630 		0x0010f6ff, 0x9cc20000, 0x0010ffff, 0x9ce20000,
   1631 		0x001400ff, 0xa4000000, 0x001408ff, 0xa4200000,
   1632 		0x001410ff, 0xa4400000, 0x001418ff, 0xa4610000,
   1633 		0x001420ff, 0xa4810000, 0x001429ff, 0xa4a10000,
   1634 		0x001431ff, 0xa4c20000, 0x001439ff, 0xa4e20000,
   1635 		0x001441ff, 0xac000000, 0x00144aff, 0xac200000,
   1636 		0x001452ff, 0xac400000, 0x00145aff, 0xac610000,
   1637 		0x001462ff, 0xac810000, 0x00146aff, 0xaca10000,
   1638 		0x001473ff, 0xacc20000, 0x00147bff, 0xace20000,
   1639 		0x001483ff, 0xb4000000, 0x00148bff, 0xb4200000,
   1640 		0x001494ff, 0xb4400000, 0x00149cff, 0xb4610000,
   1641 		0x0014a4ff, 0xb4810000, 0x0014acff, 0xb4a10000,
   1642 		0x0014b4ff, 0xb4c20000, 0x0014bdff, 0xb4e20000,
   1643 		0x0014c5ff, 0xbd000000, 0x0014cdff, 0xbd200000,
   1644 		0x0014d5ff, 0xbd400000, 0x0014deff, 0xbd610000,
   1645 		0x0014e6ff, 0xbd810000, 0x0014eeff, 0xbda10000,
   1646 		0x0014f6ff, 0xbdc20000, 0x0014ffff, 0xbde20000,
   1647 		0x001800ff, 0xc5000000, 0x001808ff, 0xc5200000,
   1648 		0x001810ff, 0xc5400000, 0x001818ff, 0xc5610000,
   1649 		0x001820ff, 0xc5810000, 0x001829ff, 0xc5a10000,
   1650 		0x001831ff, 0xc5c20000, 0x001839ff, 0xc5e20000,
   1651 		0x001841ff, 0xcd000000, 0x00184aff, 0xcd200000,
   1652 		0x001852ff, 0xcd400000, 0x00185aff, 0xcd610000,
   1653 		0x001862ff, 0xcd810000, 0x00186aff, 0xcda10000,
   1654 		0x001873ff, 0xcdc20000, 0x00187bff, 0xcde20000,
   1655 		0x001883ff, 0xd5000000, 0x00188bff, 0xd5200000,
   1656 		0x001894ff, 0xd5400000, 0x00189cff, 0xd5610000,
   1657 		0x0018a4ff, 0xd5810000, 0x0018acff, 0xd5a10000,
   1658 		0x0018b4ff, 0xd5c20000, 0x0018bdff, 0xd5e20000,
   1659 		0x0018c5ff, 0xde000000, 0x0018cdff, 0xde200000,
   1660 		0x0018d5ff, 0xde400000, 0x0018deff, 0xde610000,
   1661 		0x0018e6ff, 0xde810000, 0x0018eeff, 0xdea10000,
   1662 		0x0018f6ff, 0xdec20000, 0x0018ffff, 0xdee20000,
   1663 		0x001c00ff, 0xe6000000, 0x001c08ff, 0xe6200000,
   1664 		0x001c10ff, 0xe6400000, 0x001c18ff, 0xe6610000,
   1665 		0x001c20ff, 0xe6810000, 0x001c29ff, 0xe6a10000,
   1666 		0x001c31ff, 0xe6c20000, 0x001c39ff, 0xe6e20000,
   1667 		0x001c41ff, 0xee000000, 0x001c4aff, 0xee200000,
   1668 		0x001c52ff, 0xee400000, 0x001c5aff, 0xee610000,
   1669 		0x001c62ff, 0xee810000, 0x001c6aff, 0xeea10000,
   1670 		0x001c73ff, 0xeec20000, 0x001c7bff, 0xeee20000,
   1671 		0x001c83ff, 0xf6000000, 0x001c8bff, 0xf6200000,
   1672 		0x001c94ff, 0xf6400000, 0x001c9cff, 0xf6610000,
   1673 		0x001ca4ff, 0xf6810000, 0x001cacff, 0xf6a10000,
   1674 		0x001cb4ff, 0xf6c20000, 0x001cbdff, 0xf6e20000,
   1675 		0x001cc5ff, 0xff000000, 0x001ccdff, 0xff200000,
   1676 		0x001cd5ff, 0xff400000, 0x001cdeff, 0xff610000,
   1677 		0x001ce6ff, 0xff810000, 0x001ceeff, 0xffa10000,
   1678 		0x001cf6ff, 0xffc20000, 0x001cffff, 0xffe20000,
   1679 };
   1680 static void Blit_RGB565_RGBA8888(SDL_BlitInfo *info)
   1681 {
   1682     Blit_RGB565_32(info, RGB565_RGBA8888_LUT);
   1683 }
   1684 
   1685 /* Special optimized blit for RGB 5-6-5 --> BGRA 8-8-8-8 */
   1686 static const Uint32 RGB565_BGRA8888_LUT[512] = {
   1687 		0x00000000, 0x000000ff, 0x08000000, 0x002000ff,
   1688 		0x10000000, 0x004000ff, 0x18000000, 0x006100ff,
   1689 		0x20000000, 0x008100ff, 0x29000000, 0x00a100ff,
   1690 		0x31000000, 0x00c200ff, 0x39000000, 0x00e200ff,
   1691 		0x41000000, 0x000008ff, 0x4a000000, 0x002008ff,
   1692 		0x52000000, 0x004008ff, 0x5a000000, 0x006108ff,
   1693 		0x62000000, 0x008108ff, 0x6a000000, 0x00a108ff,
   1694 		0x73000000, 0x00c208ff, 0x7b000000, 0x00e208ff,
   1695 		0x83000000, 0x000010ff, 0x8b000000, 0x002010ff,
   1696 		0x94000000, 0x004010ff, 0x9c000000, 0x006110ff,
   1697 		0xa4000000, 0x008110ff, 0xac000000, 0x00a110ff,
   1698 		0xb4000000, 0x00c210ff, 0xbd000000, 0x00e210ff,
   1699 		0xc5000000, 0x000018ff, 0xcd000000, 0x002018ff,
   1700 		0xd5000000, 0x004018ff, 0xde000000, 0x006118ff,
   1701 		0xe6000000, 0x008118ff, 0xee000000, 0x00a118ff,
   1702 		0xf6000000, 0x00c218ff, 0xff000000, 0x00e218ff,
   1703 		0x00040000, 0x000020ff, 0x08040000, 0x002020ff,
   1704 		0x10040000, 0x004020ff, 0x18040000, 0x006120ff,
   1705 		0x20040000, 0x008120ff, 0x29040000, 0x00a120ff,
   1706 		0x31040000, 0x00c220ff, 0x39040000, 0x00e220ff,
   1707 		0x41040000, 0x000029ff, 0x4a040000, 0x002029ff,
   1708 		0x52040000, 0x004029ff, 0x5a040000, 0x006129ff,
   1709 		0x62040000, 0x008129ff, 0x6a040000, 0x00a129ff,
   1710 		0x73040000, 0x00c229ff, 0x7b040000, 0x00e229ff,
   1711 		0x83040000, 0x000031ff, 0x8b040000, 0x002031ff,
   1712 		0x94040000, 0x004031ff, 0x9c040000, 0x006131ff,
   1713 		0xa4040000, 0x008131ff, 0xac040000, 0x00a131ff,
   1714 		0xb4040000, 0x00c231ff, 0xbd040000, 0x00e231ff,
   1715 		0xc5040000, 0x000039ff, 0xcd040000, 0x002039ff,
   1716 		0xd5040000, 0x004039ff, 0xde040000, 0x006139ff,
   1717 		0xe6040000, 0x008139ff, 0xee040000, 0x00a139ff,
   1718 		0xf6040000, 0x00c239ff, 0xff040000, 0x00e239ff,
   1719 		0x00080000, 0x000041ff, 0x08080000, 0x002041ff,
   1720 		0x10080000, 0x004041ff, 0x18080000, 0x006141ff,
   1721 		0x20080000, 0x008141ff, 0x29080000, 0x00a141ff,
   1722 		0x31080000, 0x00c241ff, 0x39080000, 0x00e241ff,
   1723 		0x41080000, 0x00004aff, 0x4a080000, 0x00204aff,
   1724 		0x52080000, 0x00404aff, 0x5a080000, 0x00614aff,
   1725 		0x62080000, 0x00814aff, 0x6a080000, 0x00a14aff,
   1726 		0x73080000, 0x00c24aff, 0x7b080000, 0x00e24aff,
   1727 		0x83080000, 0x000052ff, 0x8b080000, 0x002052ff,
   1728 		0x94080000, 0x004052ff, 0x9c080000, 0x006152ff,
   1729 		0xa4080000, 0x008152ff, 0xac080000, 0x00a152ff,
   1730 		0xb4080000, 0x00c252ff, 0xbd080000, 0x00e252ff,
   1731 		0xc5080000, 0x00005aff, 0xcd080000, 0x00205aff,
   1732 		0xd5080000, 0x00405aff, 0xde080000, 0x00615aff,
   1733 		0xe6080000, 0x00815aff, 0xee080000, 0x00a15aff,
   1734 		0xf6080000, 0x00c25aff, 0xff080000, 0x00e25aff,
   1735 		0x000c0000, 0x000062ff, 0x080c0000, 0x002062ff,
   1736 		0x100c0000, 0x004062ff, 0x180c0000, 0x006162ff,
   1737 		0x200c0000, 0x008162ff, 0x290c0000, 0x00a162ff,
   1738 		0x310c0000, 0x00c262ff, 0x390c0000, 0x00e262ff,
   1739 		0x410c0000, 0x00006aff, 0x4a0c0000, 0x00206aff,
   1740 		0x520c0000, 0x00406aff, 0x5a0c0000, 0x00616aff,
   1741 		0x620c0000, 0x00816aff, 0x6a0c0000, 0x00a16aff,
   1742 		0x730c0000, 0x00c26aff, 0x7b0c0000, 0x00e26aff,
   1743 		0x830c0000, 0x000073ff, 0x8b0c0000, 0x002073ff,
   1744 		0x940c0000, 0x004073ff, 0x9c0c0000, 0x006173ff,
   1745 		0xa40c0000, 0x008173ff, 0xac0c0000, 0x00a173ff,
   1746 		0xb40c0000, 0x00c273ff, 0xbd0c0000, 0x00e273ff,
   1747 		0xc50c0000, 0x00007bff, 0xcd0c0000, 0x00207bff,
   1748 		0xd50c0000, 0x00407bff, 0xde0c0000, 0x00617bff,
   1749 		0xe60c0000, 0x00817bff, 0xee0c0000, 0x00a17bff,
   1750 		0xf60c0000, 0x00c27bff, 0xff0c0000, 0x00e27bff,
   1751 		0x00100000, 0x000083ff, 0x08100000, 0x002083ff,
   1752 		0x10100000, 0x004083ff, 0x18100000, 0x006183ff,
   1753 		0x20100000, 0x008183ff, 0x29100000, 0x00a183ff,
   1754 		0x31100000, 0x00c283ff, 0x39100000, 0x00e283ff,
   1755 		0x41100000, 0x00008bff, 0x4a100000, 0x00208bff,
   1756 		0x52100000, 0x00408bff, 0x5a100000, 0x00618bff,
   1757 		0x62100000, 0x00818bff, 0x6a100000, 0x00a18bff,
   1758 		0x73100000, 0x00c28bff, 0x7b100000, 0x00e28bff,
   1759 		0x83100000, 0x000094ff, 0x8b100000, 0x002094ff,
   1760 		0x94100000, 0x004094ff, 0x9c100000, 0x006194ff,
   1761 		0xa4100000, 0x008194ff, 0xac100000, 0x00a194ff,
   1762 		0xb4100000, 0x00c294ff, 0xbd100000, 0x00e294ff,
   1763 		0xc5100000, 0x00009cff, 0xcd100000, 0x00209cff,
   1764 		0xd5100000, 0x00409cff, 0xde100000, 0x00619cff,
   1765 		0xe6100000, 0x00819cff, 0xee100000, 0x00a19cff,
   1766 		0xf6100000, 0x00c29cff, 0xff100000, 0x00e29cff,
   1767 		0x00140000, 0x0000a4ff, 0x08140000, 0x0020a4ff,
   1768 		0x10140000, 0x0040a4ff, 0x18140000, 0x0061a4ff,
   1769 		0x20140000, 0x0081a4ff, 0x29140000, 0x00a1a4ff,
   1770 		0x31140000, 0x00c2a4ff, 0x39140000, 0x00e2a4ff,
   1771 		0x41140000, 0x0000acff, 0x4a140000, 0x0020acff,
   1772 		0x52140000, 0x0040acff, 0x5a140000, 0x0061acff,
   1773 		0x62140000, 0x0081acff, 0x6a140000, 0x00a1acff,
   1774 		0x73140000, 0x00c2acff, 0x7b140000, 0x00e2acff,
   1775 		0x83140000, 0x0000b4ff, 0x8b140000, 0x0020b4ff,
   1776 		0x94140000, 0x0040b4ff, 0x9c140000, 0x0061b4ff,
   1777 		0xa4140000, 0x0081b4ff, 0xac140000, 0x00a1b4ff,
   1778 		0xb4140000, 0x00c2b4ff, 0xbd140000, 0x00e2b4ff,
   1779 		0xc5140000, 0x0000bdff, 0xcd140000, 0x0020bdff,
   1780 		0xd5140000, 0x0040bdff, 0xde140000, 0x0061bdff,
   1781 		0xe6140000, 0x0081bdff, 0xee140000, 0x00a1bdff,
   1782 		0xf6140000, 0x00c2bdff, 0xff140000, 0x00e2bdff,
   1783 		0x00180000, 0x0000c5ff, 0x08180000, 0x0020c5ff,
   1784 		0x10180000, 0x0040c5ff, 0x18180000, 0x0061c5ff,
   1785 		0x20180000, 0x0081c5ff, 0x29180000, 0x00a1c5ff,
   1786 		0x31180000, 0x00c2c5ff, 0x39180000, 0x00e2c5ff,
   1787 		0x41180000, 0x0000cdff, 0x4a180000, 0x0020cdff,
   1788 		0x52180000, 0x0040cdff, 0x5a180000, 0x0061cdff,
   1789 		0x62180000, 0x0081cdff, 0x6a180000, 0x00a1cdff,
   1790 		0x73180000, 0x00c2cdff, 0x7b180000, 0x00e2cdff,
   1791 		0x83180000, 0x0000d5ff, 0x8b180000, 0x0020d5ff,
   1792 		0x94180000, 0x0040d5ff, 0x9c180000, 0x0061d5ff,
   1793 		0xa4180000, 0x0081d5ff, 0xac180000, 0x00a1d5ff,
   1794 		0xb4180000, 0x00c2d5ff, 0xbd180000, 0x00e2d5ff,
   1795 		0xc5180000, 0x0000deff, 0xcd180000, 0x0020deff,
   1796 		0xd5180000, 0x0040deff, 0xde180000, 0x0061deff,
   1797 		0xe6180000, 0x0081deff, 0xee180000, 0x00a1deff,
   1798 		0xf6180000, 0x00c2deff, 0xff180000, 0x00e2deff,
   1799 		0x001c0000, 0x0000e6ff, 0x081c0000, 0x0020e6ff,
   1800 		0x101c0000, 0x0040e6ff, 0x181c0000, 0x0061e6ff,
   1801 		0x201c0000, 0x0081e6ff, 0x291c0000, 0x00a1e6ff,
   1802 		0x311c0000, 0x00c2e6ff, 0x391c0000, 0x00e2e6ff,
   1803 		0x411c0000, 0x0000eeff, 0x4a1c0000, 0x0020eeff,
   1804 		0x521c0000, 0x0040eeff, 0x5a1c0000, 0x0061eeff,
   1805 		0x621c0000, 0x0081eeff, 0x6a1c0000, 0x00a1eeff,
   1806 		0x731c0000, 0x00c2eeff, 0x7b1c0000, 0x00e2eeff,
   1807 		0x831c0000, 0x0000f6ff, 0x8b1c0000, 0x0020f6ff,
   1808 		0x941c0000, 0x0040f6ff, 0x9c1c0000, 0x0061f6ff,
   1809 		0xa41c0000, 0x0081f6ff, 0xac1c0000, 0x00a1f6ff,
   1810 		0xb41c0000, 0x00c2f6ff, 0xbd1c0000, 0x00e2f6ff,
   1811 		0xc51c0000, 0x0000ffff, 0xcd1c0000, 0x0020ffff,
   1812 		0xd51c0000, 0x0040ffff, 0xde1c0000, 0x0061ffff,
   1813 		0xe61c0000, 0x0081ffff, 0xee1c0000, 0x00a1ffff,
   1814 		0xf61c0000, 0x00c2ffff, 0xff1c0000, 0x00e2ffff
   1815 };
   1816 static void Blit_RGB565_BGRA8888(SDL_BlitInfo *info)
   1817 {
   1818     Blit_RGB565_32(info, RGB565_BGRA8888_LUT);
   1819 }
   1820 
   1821 /* Special optimized blit for RGB 8-8-8 --> RGB 3-3-2 */
   1822 #ifndef RGB888_RGB332
   1823 #define RGB888_RGB332(dst, src) { \
   1824 	dst = (((src)&0x00E00000)>>16)| \
   1825 	      (((src)&0x0000E000)>>11)| \
   1826 	      (((src)&0x000000C0)>>6); \
   1827 }
   1828 #endif
   1829 static void Blit_RGB888_index8_map(SDL_BlitInfo *info)
   1830 {
   1831 #ifndef USE_DUFFS_LOOP
   1832 	int c;
   1833 #endif
   1834 	int Pixel;
   1835 	int width, height;
   1836 	Uint32 *src;
   1837 	const Uint8 *map;
   1838 	Uint8 *dst;
   1839 	int srcskip, dstskip;
   1840 
   1841 	/* Set up some basic variables */
   1842 	width = info->d_width;
   1843 	height = info->d_height;
   1844 	src = (Uint32 *)info->s_pixels;
   1845 	srcskip = info->s_skip/4;
   1846 	dst = info->d_pixels;
   1847 	dstskip = info->d_skip;
   1848 	map = info->table;
   1849 
   1850 #ifdef USE_DUFFS_LOOP
   1851 	while ( height-- ) {
   1852 		DUFFS_LOOP(
   1853 			RGB888_RGB332(Pixel, *src);
   1854 			*dst++ = map[Pixel];
   1855 			++src;
   1856 		, width);
   1857 		src += srcskip;
   1858 		dst += dstskip;
   1859 	}
   1860 #else
   1861 	while ( height-- ) {
   1862 		for ( c=width/4; c; --c ) {
   1863 			/* Pack RGB into 8bit pixel */
   1864 			RGB888_RGB332(Pixel, *src);
   1865 			*dst++ = map[Pixel];
   1866 			++src;
   1867 			RGB888_RGB332(Pixel, *src);
   1868 			*dst++ = map[Pixel];
   1869 			++src;
   1870 			RGB888_RGB332(Pixel, *src);
   1871 			*dst++ = map[Pixel];
   1872 			++src;
   1873 			RGB888_RGB332(Pixel, *src);
   1874 			*dst++ = map[Pixel];
   1875 			++src;
   1876 		}
   1877 		switch ( width & 3 ) {
   1878 			case 3:
   1879 				RGB888_RGB332(Pixel, *src);
   1880 				*dst++ = map[Pixel];
   1881 				++src;
   1882 			case 2:
   1883 				RGB888_RGB332(Pixel, *src);
   1884 				*dst++ = map[Pixel];
   1885 				++src;
   1886 			case 1:
   1887 				RGB888_RGB332(Pixel, *src);
   1888 				*dst++ = map[Pixel];
   1889 				++src;
   1890 		}
   1891 		src += srcskip;
   1892 		dst += dstskip;
   1893 	}
   1894 #endif /* USE_DUFFS_LOOP */
   1895 }
   1896 static void BlitNto1(SDL_BlitInfo *info)
   1897 {
   1898 #ifndef USE_DUFFS_LOOP
   1899 	int c;
   1900 #endif
   1901 	int width, height;
   1902 	Uint8 *src;
   1903 	const Uint8 *map;
   1904 	Uint8 *dst;
   1905 	int srcskip, dstskip;
   1906 	int srcbpp;
   1907 	Uint32 Pixel;
   1908 	int  sR, sG, sB;
   1909 	SDL_PixelFormat *srcfmt;
   1910 
   1911 	/* Set up some basic variables */
   1912 	width = info->d_width;
   1913 	height = info->d_height;
   1914 	src = info->s_pixels;
   1915 	srcskip = info->s_skip;
   1916 	dst = info->d_pixels;
   1917 	dstskip = info->d_skip;
   1918 	map = info->table;
   1919 	srcfmt = info->src;
   1920 	srcbpp = srcfmt->BytesPerPixel;
   1921 
   1922 	if ( map == NULL ) {
   1923 		while ( height-- ) {
   1924 #ifdef USE_DUFFS_LOOP
   1925 			DUFFS_LOOP(
   1926 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   1927 								sR, sG, sB);
   1928 				if ( 1 ) {
   1929 				  	/* Pack RGB into 8bit pixel */
   1930 				  	*dst = ((sR>>5)<<(3+2))|
   1931 					        ((sG>>5)<<(2)) |
   1932 					        ((sB>>6)<<(0)) ;
   1933 				}
   1934 				dst++;
   1935 				src += srcbpp;
   1936 			, width);
   1937 #else
   1938 			for ( c=width; c; --c ) {
   1939 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   1940 								sR, sG, sB);
   1941 				if ( 1 ) {
   1942 				  	/* Pack RGB into 8bit pixel */
   1943 				  	*dst = ((sR>>5)<<(3+2))|
   1944 					        ((sG>>5)<<(2)) |
   1945 					        ((sB>>6)<<(0)) ;
   1946 				}
   1947 				dst++;
   1948 				src += srcbpp;
   1949 			}
   1950 #endif
   1951 			src += srcskip;
   1952 			dst += dstskip;
   1953 		}
   1954 	} else {
   1955 		while ( height-- ) {
   1956 #ifdef USE_DUFFS_LOOP
   1957 			DUFFS_LOOP(
   1958 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   1959 								sR, sG, sB);
   1960 				if ( 1 ) {
   1961 				  	/* Pack RGB into 8bit pixel */
   1962 				  	*dst = map[((sR>>5)<<(3+2))|
   1963 						   ((sG>>5)<<(2))  |
   1964 						   ((sB>>6)<<(0))  ];
   1965 				}
   1966 				dst++;
   1967 				src += srcbpp;
   1968 			, width);
   1969 #else
   1970 			for ( c=width; c; --c ) {
   1971 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   1972 								sR, sG, sB);
   1973 				if ( 1 ) {
   1974 				  	/* Pack RGB into 8bit pixel */
   1975 				  	*dst = map[((sR>>5)<<(3+2))|
   1976 						   ((sG>>5)<<(2))  |
   1977 						   ((sB>>6)<<(0))  ];
   1978 				}
   1979 				dst++;
   1980 				src += srcbpp;
   1981 			}
   1982 #endif /* USE_DUFFS_LOOP */
   1983 			src += srcskip;
   1984 			dst += dstskip;
   1985 		}
   1986 	}
   1987 }
   1988 
   1989 /* blits 32 bit RGB<->RGBA with both surfaces having the same R,G,B fields */
   1990 static void Blit4to4MaskAlpha(SDL_BlitInfo *info)
   1991 {
   1992 	int width = info->d_width;
   1993 	int height = info->d_height;
   1994 	Uint32 *src = (Uint32 *)info->s_pixels;
   1995 	int srcskip = info->s_skip;
   1996 	Uint32 *dst = (Uint32 *)info->d_pixels;
   1997 	int dstskip = info->d_skip;
   1998 	SDL_PixelFormat *srcfmt = info->src;
   1999 	SDL_PixelFormat *dstfmt = info->dst;
   2000 
   2001 	if (dstfmt->Amask) {
   2002 		/* RGB->RGBA, SET_ALPHA */
   2003 		Uint32 mask = (srcfmt->alpha >> dstfmt->Aloss) << dstfmt->Ashift;
   2004 
   2005 		while ( height-- ) {
   2006 			DUFFS_LOOP(
   2007 			{
   2008 				*dst = *src | mask;
   2009 				++dst;
   2010 				++src;
   2011 			},
   2012 			width);
   2013 			src = (Uint32*)((Uint8*)src + srcskip);
   2014 			dst = (Uint32*)((Uint8*)dst + dstskip);
   2015 		}
   2016 	} else {
   2017 		/* RGBA->RGB, NO_ALPHA */
   2018 		Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask;
   2019 
   2020 		while ( height-- ) {
   2021 			DUFFS_LOOP(
   2022 			{
   2023 				*dst = *src & mask;
   2024 				++dst;
   2025 				++src;
   2026 			},
   2027 			width);
   2028 			src = (Uint32*)((Uint8*)src + srcskip);
   2029 			dst = (Uint32*)((Uint8*)dst + dstskip);
   2030 		}
   2031 	}
   2032 }
   2033 
   2034 static void BlitNtoN(SDL_BlitInfo *info)
   2035 {
   2036 	int width = info->d_width;
   2037 	int height = info->d_height;
   2038 	Uint8 *src = info->s_pixels;
   2039 	int srcskip = info->s_skip;
   2040 	Uint8 *dst = info->d_pixels;
   2041 	int dstskip = info->d_skip;
   2042 	SDL_PixelFormat *srcfmt = info->src;
   2043 	int srcbpp = srcfmt->BytesPerPixel;
   2044 	SDL_PixelFormat *dstfmt = info->dst;
   2045 	int dstbpp = dstfmt->BytesPerPixel;
   2046 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   2047 
   2048 	while ( height-- ) {
   2049 		DUFFS_LOOP(
   2050 		{
   2051 		        Uint32 Pixel;
   2052 			unsigned sR;
   2053 			unsigned sG;
   2054 			unsigned sB;
   2055 			DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB);
   2056 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt, sR, sG, sB, alpha);
   2057 			dst += dstbpp;
   2058 			src += srcbpp;
   2059 		},
   2060 		width);
   2061 		src += srcskip;
   2062 		dst += dstskip;
   2063 	}
   2064 }
   2065 
   2066 static void BlitNtoNCopyAlpha(SDL_BlitInfo *info)
   2067 {
   2068 	int width = info->d_width;
   2069 	int height = info->d_height;
   2070 	Uint8 *src = info->s_pixels;
   2071 	int srcskip = info->s_skip;
   2072 	Uint8 *dst = info->d_pixels;
   2073 	int dstskip = info->d_skip;
   2074 	SDL_PixelFormat *srcfmt = info->src;
   2075 	int srcbpp = srcfmt->BytesPerPixel;
   2076 	SDL_PixelFormat *dstfmt = info->dst;
   2077 	int dstbpp = dstfmt->BytesPerPixel;
   2078 	int c;
   2079 
   2080 	/* FIXME: should map alpha to [0..255] correctly! */
   2081 	while ( height-- ) {
   2082 		for ( c=width; c; --c ) {
   2083 		        Uint32 Pixel;
   2084 			unsigned sR, sG, sB, sA;
   2085 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
   2086 				      sR, sG, sB, sA);
   2087 			ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
   2088 				      sR, sG, sB, sA);
   2089 			dst += dstbpp;
   2090 			src += srcbpp;
   2091 		}
   2092 		src += srcskip;
   2093 		dst += dstskip;
   2094 	}
   2095 }
   2096 
   2097 static void BlitNto1Key(SDL_BlitInfo *info)
   2098 {
   2099 	int width = info->d_width;
   2100 	int height = info->d_height;
   2101 	Uint8 *src = info->s_pixels;
   2102 	int srcskip = info->s_skip;
   2103 	Uint8 *dst = info->d_pixels;
   2104 	int dstskip = info->d_skip;
   2105 	SDL_PixelFormat *srcfmt = info->src;
   2106 	const Uint8 *palmap = info->table;
   2107 	Uint32 ckey = srcfmt->colorkey;
   2108 	Uint32 rgbmask = ~srcfmt->Amask;
   2109 	int srcbpp;
   2110 	Uint32 Pixel;
   2111 	unsigned sR, sG, sB;
   2112 
   2113 	/* Set up some basic variables */
   2114 	srcbpp = srcfmt->BytesPerPixel;
   2115 	ckey &= rgbmask;
   2116 
   2117 	if ( palmap == NULL ) {
   2118 		while ( height-- ) {
   2119 			DUFFS_LOOP(
   2120 			{
   2121 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2122 								sR, sG, sB);
   2123 				if ( (Pixel & rgbmask) != ckey ) {
   2124 				  	/* Pack RGB into 8bit pixel */
   2125 				  	*dst = (Uint8)(((sR>>5)<<(3+2))|
   2126 						           ((sG>>5)<<(2)) |
   2127 						           ((sB>>6)<<(0)));
   2128 				}
   2129 				dst++;
   2130 				src += srcbpp;
   2131 			},
   2132 			width);
   2133 			src += srcskip;
   2134 			dst += dstskip;
   2135 		}
   2136 	} else {
   2137 		while ( height-- ) {
   2138 			DUFFS_LOOP(
   2139 			{
   2140 				DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel,
   2141 								sR, sG, sB);
   2142 				if ( (Pixel & rgbmask) != ckey ) {
   2143 				  	/* Pack RGB into 8bit pixel */
   2144 				  	*dst = (Uint8)palmap[((sR>>5)<<(3+2))|
   2145 							             ((sG>>5)<<(2))  |
   2146 							             ((sB>>6)<<(0))  ];
   2147 				}
   2148 				dst++;
   2149 				src += srcbpp;
   2150 			},
   2151 			width);
   2152 			src += srcskip;
   2153 			dst += dstskip;
   2154 		}
   2155 	}
   2156 }
   2157 
   2158 static void Blit2to2Key(SDL_BlitInfo *info)
   2159 {
   2160 	int width = info->d_width;
   2161 	int height = info->d_height;
   2162 	Uint16 *srcp = (Uint16 *)info->s_pixels;
   2163 	int srcskip = info->s_skip;
   2164 	Uint16 *dstp = (Uint16 *)info->d_pixels;
   2165 	int dstskip = info->d_skip;
   2166 	Uint32 ckey = info->src->colorkey;
   2167 	Uint32 rgbmask = ~info->src->Amask;
   2168 
   2169 	/* Set up some basic variables */
   2170         srcskip /= 2;
   2171         dstskip /= 2;
   2172 	ckey &= rgbmask;
   2173 
   2174 	while ( height-- ) {
   2175 		DUFFS_LOOP(
   2176 		{
   2177 			if ( (*srcp & rgbmask) != ckey ) {
   2178 				*dstp = *srcp;
   2179 			}
   2180 			dstp++;
   2181 			srcp++;
   2182 		},
   2183 		width);
   2184 		srcp += srcskip;
   2185 		dstp += dstskip;
   2186 	}
   2187 }
   2188 
   2189 static void BlitNtoNKey(SDL_BlitInfo *info)
   2190 {
   2191 	int width = info->d_width;
   2192 	int height = info->d_height;
   2193 	Uint8 *src = info->s_pixels;
   2194 	int srcskip = info->s_skip;
   2195 	Uint8 *dst = info->d_pixels;
   2196 	int dstskip = info->d_skip;
   2197 	Uint32 ckey = info->src->colorkey;
   2198 	SDL_PixelFormat *srcfmt = info->src;
   2199 	SDL_PixelFormat *dstfmt = info->dst;
   2200 	int srcbpp = srcfmt->BytesPerPixel;
   2201 	int dstbpp = dstfmt->BytesPerPixel;
   2202 	unsigned alpha = dstfmt->Amask ? srcfmt->alpha : 0;
   2203 	Uint32 rgbmask = ~srcfmt->Amask;
   2204 
   2205 	/* Set up some basic variables */
   2206 	ckey &= rgbmask;
   2207 
   2208 	while ( height-- ) {
   2209 		DUFFS_LOOP(
   2210 		{
   2211 		        Uint32 Pixel;
   2212 			unsigned sR;
   2213 			unsigned sG;
   2214 			unsigned sB;
   2215 			RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel);
   2216 			if ( (Pixel & rgbmask) != ckey ) {
   2217 			        RGB_FROM_PIXEL(Pixel, srcfmt, sR, sG, sB);
   2218 				ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
   2219 					      sR, sG, sB, alpha);
   2220 			}
   2221 			dst += dstbpp;
   2222 			src += srcbpp;
   2223 		},
   2224 		width);
   2225 		src += srcskip;
   2226 		dst += dstskip;
   2227 	}
   2228 }
   2229 
   2230 static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info)
   2231 {
   2232 	int width = info->d_width;
   2233 	int height = info->d_height;
   2234 	Uint8 *src = info->s_pixels;
   2235 	int srcskip = info->s_skip;
   2236 	Uint8 *dst = info->d_pixels;
   2237 	int dstskip = info->d_skip;
   2238 	Uint32 ckey = info->src->colorkey;
   2239 	SDL_PixelFormat *srcfmt = info->src;
   2240 	SDL_PixelFormat *dstfmt = info->dst;
   2241 	Uint32 rgbmask = ~srcfmt->Amask;
   2242 
   2243 	Uint8 srcbpp;
   2244 	Uint8 dstbpp;
   2245 	Uint32 Pixel;
   2246 	unsigned sR, sG, sB, sA;
   2247 
   2248 	/* Set up some basic variables */
   2249 	srcbpp = srcfmt->BytesPerPixel;
   2250 	dstbpp = dstfmt->BytesPerPixel;
   2251 	ckey &= rgbmask;
   2252 
   2253 	/* FIXME: should map alpha to [0..255] correctly! */
   2254 	while ( height-- ) {
   2255 		DUFFS_LOOP(
   2256 		{
   2257 			DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel,
   2258 				      sR, sG, sB, sA);
   2259 			if ( (Pixel & rgbmask) != ckey ) {
   2260 				  ASSEMBLE_RGBA(dst, dstbpp, dstfmt,
   2261 						sR, sG, sB, sA);
   2262 			}
   2263 			dst += dstbpp;
   2264 			src += srcbpp;
   2265 		},
   2266 		width);
   2267 		src += srcskip;
   2268 		dst += dstskip;
   2269 	}
   2270 }
   2271 
   2272 /* Normal N to N optimized blitters */
   2273 struct blit_table {
   2274 	Uint32 srcR, srcG, srcB;
   2275 	int dstbpp;
   2276 	Uint32 dstR, dstG, dstB;
   2277 	Uint32 blit_features;
   2278 	void *aux_data;
   2279 	SDL_loblit blitfunc;
   2280 	enum { NO_ALPHA=1, SET_ALPHA=2, COPY_ALPHA=4 } alpha;
   2281 };
   2282 static const struct blit_table normal_blit_1[] = {
   2283 	/* Default for 8-bit RGB source, an invalid combination */
   2284 	{ 0,0,0, 0, 0,0,0, 0, NULL, NULL },
   2285 };
   2286 static const struct blit_table normal_blit_2[] = {
   2287 #if SDL_HERMES_BLITTERS
   2288     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000007E0,0x0000F800,
   2289       0, ConvertX86p16_16BGR565, ConvertX86, NO_ALPHA },
   2290     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x00007C00,0x000003E0,0x0000001F,
   2291       0, ConvertX86p16_16RGB555, ConvertX86, NO_ALPHA },
   2292     { 0x0000F800,0x000007E0,0x0000001F, 2, 0x0000001F,0x000003E0,0x00007C00,
   2293       0, ConvertX86p16_16BGR555, ConvertX86, NO_ALPHA },
   2294 #elif SDL_ALTIVEC_BLITTERS
   2295     /* has-altivec */
   2296     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
   2297       2, NULL, Blit_RGB565_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
   2298     { 0x00007C00,0x000003E0,0x0000001F, 4, 0x00000000,0x00000000,0x00000000,
   2299       2, NULL, Blit_RGB555_32Altivec, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
   2300 #endif
   2301     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x00FF0000,0x0000FF00,0x000000FF,
   2302       0, NULL, Blit_RGB565_ARGB8888, SET_ALPHA },
   2303     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x000000FF,0x0000FF00,0x00FF0000,
   2304       0, NULL, Blit_RGB565_ABGR8888, SET_ALPHA },
   2305     { 0x0000F800,0x000007E0,0x0000001F, 4, 0xFF000000,0x00FF0000,0x0000FF00,
   2306       0, NULL, Blit_RGB565_RGBA8888, SET_ALPHA },
   2307     { 0x0000F800,0x000007E0,0x0000001F, 4, 0x0000FF00,0x00FF0000,0xFF000000,
   2308       0, NULL, Blit_RGB565_BGRA8888, SET_ALPHA },
   2309 
   2310     /* Default for 16-bit RGB source, used if no other blitter matches */
   2311     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
   2312 };
   2313 static const struct blit_table normal_blit_3[] = {
   2314 	/* Default for 24-bit RGB source, never optimized */
   2315     { 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
   2316 };
   2317 static const struct blit_table normal_blit_4[] = {
   2318 #if SDL_HERMES_BLITTERS
   2319     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
   2320       1, ConvertMMXpII32_16RGB565, ConvertMMX, NO_ALPHA },
   2321     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
   2322       0, ConvertX86p32_16RGB565, ConvertX86, NO_ALPHA },
   2323     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
   2324       1, ConvertMMXpII32_16BGR565, ConvertMMX, NO_ALPHA },
   2325     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000007E0,0x0000F800,
   2326       0, ConvertX86p32_16BGR565, ConvertX86, NO_ALPHA },
   2327     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
   2328       1, ConvertMMXpII32_16RGB555, ConvertMMX, NO_ALPHA },
   2329     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
   2330       0, ConvertX86p32_16RGB555, ConvertX86, NO_ALPHA },
   2331     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
   2332       1, ConvertMMXpII32_16BGR555, ConvertMMX, NO_ALPHA },
   2333     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000001F,0x000003E0,0x00007C00,
   2334       0, ConvertX86p32_16BGR555, ConvertX86, NO_ALPHA },
   2335     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x00FF0000,0x0000FF00,0x000000FF,
   2336       0, ConvertX86p32_24RGB888, ConvertX86, NO_ALPHA },
   2337     { 0x00FF0000,0x0000FF00,0x000000FF, 3, 0x000000FF,0x0000FF00,0x00FF0000,
   2338       0, ConvertX86p32_24BGR888, ConvertX86, NO_ALPHA },
   2339     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x000000FF,0x0000FF00,0x00FF0000,
   2340       0, ConvertX86p32_32BGR888, ConvertX86, NO_ALPHA },
   2341     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0xFF000000,0x00FF0000,0x0000FF00,
   2342       0, ConvertX86p32_32RGBA888, ConvertX86, NO_ALPHA },
   2343     { 0x00FF0000,0x0000FF00,0x000000FF, 4, 0x0000FF00,0x00FF0000,0xFF000000,
   2344       0, ConvertX86p32_32BGRA888, ConvertX86, NO_ALPHA },
   2345 #else
   2346 #if SDL_ALTIVEC_BLITTERS
   2347     /* has-altivec | dont-use-prefetch */
   2348     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
   2349       6, NULL, ConvertAltivec32to32_noprefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
   2350     /* has-altivec */
   2351     { 0x00000000,0x00000000,0x00000000, 4, 0x00000000,0x00000000,0x00000000,
   2352       2, NULL, ConvertAltivec32to32_prefetch, NO_ALPHA | COPY_ALPHA | SET_ALPHA },
   2353     /* has-altivec */
   2354     { 0x00000000,0x00000000,0x00000000, 2, 0x0000F800,0x000007E0,0x0000001F,
   2355       2, NULL, Blit_RGB888_RGB565Altivec, NO_ALPHA },
   2356 #endif
   2357     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x0000F800,0x000007E0,0x0000001F,
   2358       0, NULL, Blit_RGB888_RGB565, NO_ALPHA },
   2359     { 0x00FF0000,0x0000FF00,0x000000FF, 2, 0x00007C00,0x000003E0,0x0000001F,
   2360       0, NULL, Blit_RGB888_RGB555, NO_ALPHA },
   2361 #endif
   2362 	/* Default for 32-bit RGB source, used if no other blitter matches */
   2363 	{ 0,0,0, 0, 0,0,0, 0, NULL, BlitNtoN, 0 }
   2364 };
   2365 static const struct blit_table *normal_blit[] = {
   2366 	normal_blit_1, normal_blit_2, normal_blit_3, normal_blit_4
   2367 };
   2368 
   2369 /* Mask matches table, or table entry is zero */
   2370 #define MASKOK(x, y) (((x) == (y)) || ((y) == 0x00000000))
   2371 
   2372 SDL_loblit SDL_CalculateBlitN(SDL_Surface *surface, int blit_index)
   2373 {
   2374 	struct private_swaccel *sdata;
   2375 	SDL_PixelFormat *srcfmt;
   2376 	SDL_PixelFormat *dstfmt;
   2377 	const struct blit_table *table;
   2378 	int which;
   2379 	SDL_loblit blitfun;
   2380 
   2381 	/* Set up data for choosing the blit */
   2382 	sdata = surface->map->sw_data;
   2383 	srcfmt = surface->format;
   2384 	dstfmt = surface->map->dst->format;
   2385 
   2386 	if ( blit_index & 2 ) {
   2387 	        /* alpha or alpha+colorkey */
   2388 	        return SDL_CalculateAlphaBlit(surface, blit_index);
   2389 	}
   2390 
   2391 	/* We don't support destinations less than 8-bits */
   2392 	if ( dstfmt->BitsPerPixel < 8 ) {
   2393 		return(NULL);
   2394 	}
   2395 
   2396 	if(blit_index == 1) {
   2397 	    /* colorkey blit: Here we don't have too many options, mostly
   2398 	       because RLE is the preferred fast way to deal with this.
   2399 	       If a particular case turns out to be useful we'll add it. */
   2400 
   2401 	    if(srcfmt->BytesPerPixel == 2
   2402 	       && surface->map->identity)
   2403 		return Blit2to2Key;
   2404 	    else if(dstfmt->BytesPerPixel == 1)
   2405 		return BlitNto1Key;
   2406 	    else {
   2407 #if SDL_ALTIVEC_BLITTERS
   2408         if((srcfmt->BytesPerPixel == 4) && (dstfmt->BytesPerPixel == 4) && SDL_HasAltiVec()) {
   2409             return Blit32to32KeyAltivec;
   2410         } else
   2411 #endif
   2412 
   2413 		if(srcfmt->Amask && dstfmt->Amask)
   2414 		    return BlitNtoNKeyCopyAlpha;
   2415 		else
   2416 		    return BlitNtoNKey;
   2417 	    }
   2418 	}
   2419 
   2420 	blitfun = NULL;
   2421 	if ( dstfmt->BitsPerPixel == 8 ) {
   2422 		/* We assume 8-bit destinations are palettized */
   2423 		if ( (srcfmt->BytesPerPixel == 4) &&
   2424 		     (srcfmt->Rmask == 0x00FF0000) &&
   2425 		     (srcfmt->Gmask == 0x0000FF00) &&
   2426 		     (srcfmt->Bmask == 0x000000FF) ) {
   2427 			if ( surface->map->table ) {
   2428 				blitfun = Blit_RGB888_index8_map;
   2429 			} else {
   2430 #if SDL_HERMES_BLITTERS
   2431 				sdata->aux_data = ConvertX86p32_8RGB332;
   2432 				blitfun = ConvertX86;
   2433 #else
   2434 				blitfun = Blit_RGB888_index8;
   2435 #endif
   2436 			}
   2437 		} else {
   2438 			blitfun = BlitNto1;
   2439 		}
   2440 	} else {
   2441 		/* Now the meat, choose the blitter we want */
   2442 		int a_need = NO_ALPHA;
   2443 		if(dstfmt->Amask)
   2444 		    a_need = srcfmt->Amask ? COPY_ALPHA : SET_ALPHA;
   2445 		table = normal_blit[srcfmt->BytesPerPixel-1];
   2446 		for ( which=0; table[which].dstbpp; ++which ) {
   2447 			if ( MASKOK(srcfmt->Rmask, table[which].srcR) &&
   2448 			    MASKOK(srcfmt->Gmask, table[which].srcG) &&
   2449 			    MASKOK(srcfmt->Bmask, table[which].srcB) &&
   2450 			    MASKOK(dstfmt->Rmask, table[which].dstR) &&
   2451 			    MASKOK(dstfmt->Gmask, table[which].dstG) &&
   2452 			    MASKOK(dstfmt->Bmask, table[which].dstB) &&
   2453 			    dstfmt->BytesPerPixel == table[which].dstbpp &&
   2454 			    (a_need & table[which].alpha) == a_need &&
   2455 			    ((table[which].blit_features & GetBlitFeatures()) == table[which].blit_features) )
   2456 				break;
   2457 		}
   2458 		sdata->aux_data = table[which].aux_data;
   2459 		blitfun = table[which].blitfunc;
   2460 
   2461 		if(blitfun == BlitNtoN) {  /* default C fallback catch-all. Slow! */
   2462 			/* Fastpath C fallback: 32bit RGB<->RGBA blit with matching RGB */
   2463 			if ( srcfmt->BytesPerPixel == 4 && dstfmt->BytesPerPixel == 4 &&
   2464 			     srcfmt->Rmask == dstfmt->Rmask &&
   2465 			     srcfmt->Gmask == dstfmt->Gmask &&
   2466 			     srcfmt->Bmask == dstfmt->Bmask ) {
   2467 				blitfun = Blit4to4MaskAlpha;
   2468 			} else if ( a_need == COPY_ALPHA ) {
   2469 			    blitfun = BlitNtoNCopyAlpha;
   2470 			}
   2471 		}
   2472 	}
   2473 
   2474 #ifdef DEBUG_ASM
   2475 #if SDL_HERMES_BLITTERS
   2476 	if ( blitfun == ConvertMMX )
   2477 		fprintf(stderr, "Using mmx blit\n");
   2478 	else
   2479 	if ( blitfun == ConvertX86 )
   2480 		fprintf(stderr, "Using asm blit\n");
   2481 	else
   2482 #endif
   2483 	if ( (blitfun == BlitNtoN) || (blitfun == BlitNto1) )
   2484 		fprintf(stderr, "Using C blit\n");
   2485 	else
   2486 		fprintf(stderr, "Using optimized C blit\n");
   2487 #endif /* DEBUG_ASM */
   2488 
   2489 	return(blitfun);
   2490 }
   2491