Home | History | Annotate | Download | only in video
      1 /*
      2     SDL - Simple DirectMedia Layer
      3     Copyright (C) 1997-2012 Sam Lantinga
      4 
      5     This library is free software; you can redistribute it and/or
      6     modify it under the terms of the GNU Lesser General Public
      7     License as published by the Free Software Foundation; either
      8     version 2.1 of the License, or (at your option) any later version.
      9 
     10     This library is distributed in the hope that it will be useful,
     11     but WITHOUT ANY WARRANTY; without even the implied warranty of
     12     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13     Lesser General Public License for more details.
     14 
     15     You should have received a copy of the GNU Lesser General Public
     16     License along with this library; if not, write to the Free Software
     17     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
     18 
     19     Sam Lantinga
     20     slouken (at) libsdl.org
     21 */
     22 #include "SDL_config.h"
     23 
     24 /* This is the software implementation of the YUV video overlay support */
     25 
     26 /* This code was derived from code carrying the following copyright notices:
     27 
     28  * Copyright (c) 1995 The Regents of the University of California.
     29  * All rights reserved.
     30  *
     31  * Permission to use, copy, modify, and distribute this software and its
     32  * documentation for any purpose, without fee, and without written agreement is
     33  * hereby granted, provided that the above copyright notice and the following
     34  * two paragraphs appear in all copies of this software.
     35  *
     36  * IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR
     37  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
     38  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF
     39  * CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     40  *
     41  * THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES,
     42  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
     43  * AND FITNESS FOR A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS
     44  * ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATION TO
     45  * PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     46 
     47  * Copyright (c) 1995 Erik Corry
     48  * All rights reserved.
     49  *
     50  * Permission to use, copy, modify, and distribute this software and its
     51  * documentation for any purpose, without fee, and without written agreement is
     52  * hereby granted, provided that the above copyright notice and the following
     53  * two paragraphs appear in all copies of this software.
     54  *
     55  * IN NO EVENT SHALL ERIK CORRY BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT,
     56  * SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OF
     57  * THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF ERIK CORRY HAS BEEN ADVISED
     58  * OF THE POSSIBILITY OF SUCH DAMAGE.
     59  *
     60  * ERIK CORRY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
     61  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
     62  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
     63  * BASIS, AND ERIK CORRY HAS NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT,
     64  * UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     65 
     66  * Portions of this software Copyright (c) 1995 Brown University.
     67  * All rights reserved.
     68  *
     69  * Permission to use, copy, modify, and distribute this software and its
     70  * documentation for any purpose, without fee, and without written agreement
     71  * is hereby granted, provided that the above copyright notice and the
     72  * following two paragraphs appear in all copies of this software.
     73  *
     74  * IN NO EVENT SHALL BROWN UNIVERSITY BE LIABLE TO ANY PARTY FOR
     75  * DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT
     76  * OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF BROWN
     77  * UNIVERSITY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     78  *
     79  * BROWN UNIVERSITY SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
     80  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
     81  * PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS"
     82  * BASIS, AND BROWN UNIVERSITY HAS NO OBLIGATION TO PROVIDE MAINTENANCE,
     83  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
     84  */
     85 
     86 #include "SDL_video.h"
     87 #include "SDL_cpuinfo.h"
     88 #include "SDL_stretch_c.h"
     89 #include "SDL_yuvfuncs.h"
     90 #include "SDL_yuv_sw_c.h"
     91 
     92 /* The functions used to manipulate software video overlays */
     93 static struct private_yuvhwfuncs sw_yuvfuncs = {
     94 	SDL_LockYUV_SW,
     95 	SDL_UnlockYUV_SW,
     96 	SDL_DisplayYUV_SW,
     97 	SDL_FreeYUV_SW
     98 };
     99 
    100 /* RGB conversion lookup tables */
    101 struct private_yuvhwdata {
    102 	SDL_Surface *stretch;
    103 	SDL_Surface *display;
    104 	Uint8 *pixels;
    105 	int *colortab;
    106 	Uint32 *rgb_2_pix;
    107 	void (*Display1X)(int *colortab, Uint32 *rgb_2_pix,
    108                           unsigned char *lum, unsigned char *cr,
    109                           unsigned char *cb, unsigned char *out,
    110                           int rows, int cols, int mod );
    111 	void (*Display2X)(int *colortab, Uint32 *rgb_2_pix,
    112 	                  unsigned char *lum, unsigned char *cr,
    113                           unsigned char *cb, unsigned char *out,
    114                           int rows, int cols, int mod );
    115 
    116 	/* These are just so we don't have to allocate them separately */
    117 	Uint16 pitches[3];
    118 	Uint8 *planes[3];
    119 };
    120 
    121 
    122 /* The colorspace conversion functions */
    123 
    124 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
    125 extern void Color565DitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
    126                                      unsigned char *lum, unsigned char *cr,
    127                                      unsigned char *cb, unsigned char *out,
    128                                      int rows, int cols, int mod );
    129 extern void ColorRGBDitherYV12MMX1X( int *colortab, Uint32 *rgb_2_pix,
    130                                      unsigned char *lum, unsigned char *cr,
    131                                      unsigned char *cb, unsigned char *out,
    132                                      int rows, int cols, int mod );
    133 #endif
    134 
    135 static void Color16DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
    136                                     unsigned char *lum, unsigned char *cr,
    137                                     unsigned char *cb, unsigned char *out,
    138                                     int rows, int cols, int mod )
    139 {
    140     unsigned short* row1;
    141     unsigned short* row2;
    142     unsigned char* lum2;
    143     int x, y;
    144     int cr_r;
    145     int crb_g;
    146     int cb_b;
    147     int cols_2 = cols / 2;
    148 
    149     row1 = (unsigned short*) out;
    150     row2 = row1 + cols + mod;
    151     lum2 = lum + cols;
    152 
    153     mod += cols + mod;
    154 
    155     y = rows / 2;
    156     while( y-- )
    157     {
    158         x = cols_2;
    159         while( x-- )
    160         {
    161             register int L;
    162 
    163             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    164             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    165                                + colortab[ *cb + 2*256 ];
    166             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    167             ++cr; ++cb;
    168 
    169             L = *lum++;
    170             *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
    171                                        rgb_2_pix[ L + crb_g ] |
    172                                        rgb_2_pix[ L + cb_b ]);
    173 
    174             L = *lum++;
    175             *row1++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
    176                                        rgb_2_pix[ L + crb_g ] |
    177                                        rgb_2_pix[ L + cb_b ]);
    178 
    179 
    180             /* Now, do second row.  */
    181 
    182             L = *lum2++;
    183             *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
    184                                        rgb_2_pix[ L + crb_g ] |
    185                                        rgb_2_pix[ L + cb_b ]);
    186 
    187             L = *lum2++;
    188             *row2++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
    189                                        rgb_2_pix[ L + crb_g ] |
    190                                        rgb_2_pix[ L + cb_b ]);
    191         }
    192 
    193         /*
    194          * These values are at the start of the next line, (due
    195          * to the ++'s above),but they need to be at the start
    196          * of the line after that.
    197          */
    198         lum  += cols;
    199         lum2 += cols;
    200         row1 += mod;
    201         row2 += mod;
    202     }
    203 }
    204 
    205 static void Color24DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
    206                                     unsigned char *lum, unsigned char *cr,
    207                                     unsigned char *cb, unsigned char *out,
    208                                     int rows, int cols, int mod )
    209 {
    210     unsigned int value;
    211     unsigned char* row1;
    212     unsigned char* row2;
    213     unsigned char* lum2;
    214     int x, y;
    215     int cr_r;
    216     int crb_g;
    217     int cb_b;
    218     int cols_2 = cols / 2;
    219 
    220     row1 = out;
    221     row2 = row1 + cols*3 + mod*3;
    222     lum2 = lum + cols;
    223 
    224     mod += cols + mod;
    225     mod *= 3;
    226 
    227     y = rows / 2;
    228     while( y-- )
    229     {
    230         x = cols_2;
    231         while( x-- )
    232         {
    233             register int L;
    234 
    235             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    236             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    237                                + colortab[ *cb + 2*256 ];
    238             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    239             ++cr; ++cb;
    240 
    241             L = *lum++;
    242             value = (rgb_2_pix[ L + cr_r ] |
    243                      rgb_2_pix[ L + crb_g ] |
    244                      rgb_2_pix[ L + cb_b ]);
    245             *row1++ = (value      ) & 0xFF;
    246             *row1++ = (value >>  8) & 0xFF;
    247             *row1++ = (value >> 16) & 0xFF;
    248 
    249             L = *lum++;
    250             value = (rgb_2_pix[ L + cr_r ] |
    251                      rgb_2_pix[ L + crb_g ] |
    252                      rgb_2_pix[ L + cb_b ]);
    253             *row1++ = (value      ) & 0xFF;
    254             *row1++ = (value >>  8) & 0xFF;
    255             *row1++ = (value >> 16) & 0xFF;
    256 
    257 
    258             /* Now, do second row.  */
    259 
    260             L = *lum2++;
    261             value = (rgb_2_pix[ L + cr_r ] |
    262                      rgb_2_pix[ L + crb_g ] |
    263                      rgb_2_pix[ L + cb_b ]);
    264             *row2++ = (value      ) & 0xFF;
    265             *row2++ = (value >>  8) & 0xFF;
    266             *row2++ = (value >> 16) & 0xFF;
    267 
    268             L = *lum2++;
    269             value = (rgb_2_pix[ L + cr_r ] |
    270                      rgb_2_pix[ L + crb_g ] |
    271                      rgb_2_pix[ L + cb_b ]);
    272             *row2++ = (value      ) & 0xFF;
    273             *row2++ = (value >>  8) & 0xFF;
    274             *row2++ = (value >> 16) & 0xFF;
    275         }
    276 
    277         /*
    278          * These values are at the start of the next line, (due
    279          * to the ++'s above),but they need to be at the start
    280          * of the line after that.
    281          */
    282         lum  += cols;
    283         lum2 += cols;
    284         row1 += mod;
    285         row2 += mod;
    286     }
    287 }
    288 
    289 static void Color32DitherYV12Mod1X( int *colortab, Uint32 *rgb_2_pix,
    290                                     unsigned char *lum, unsigned char *cr,
    291                                     unsigned char *cb, unsigned char *out,
    292                                     int rows, int cols, int mod )
    293 {
    294     unsigned int* row1;
    295     unsigned int* row2;
    296     unsigned char* lum2;
    297     int x, y;
    298     int cr_r;
    299     int crb_g;
    300     int cb_b;
    301     int cols_2 = cols / 2;
    302 
    303     row1 = (unsigned int*) out;
    304     row2 = row1 + cols + mod;
    305     lum2 = lum + cols;
    306 
    307     mod += cols + mod;
    308 
    309     y = rows / 2;
    310     while( y-- )
    311     {
    312         x = cols_2;
    313         while( x-- )
    314         {
    315             register int L;
    316 
    317             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    318             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    319                                + colortab[ *cb + 2*256 ];
    320             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    321             ++cr; ++cb;
    322 
    323             L = *lum++;
    324             *row1++ = (rgb_2_pix[ L + cr_r ] |
    325                        rgb_2_pix[ L + crb_g ] |
    326                        rgb_2_pix[ L + cb_b ]);
    327 
    328             L = *lum++;
    329             *row1++ = (rgb_2_pix[ L + cr_r ] |
    330                        rgb_2_pix[ L + crb_g ] |
    331                        rgb_2_pix[ L + cb_b ]);
    332 
    333 
    334             /* Now, do second row.  */
    335 
    336             L = *lum2++;
    337             *row2++ = (rgb_2_pix[ L + cr_r ] |
    338                        rgb_2_pix[ L + crb_g ] |
    339                        rgb_2_pix[ L + cb_b ]);
    340 
    341             L = *lum2++;
    342             *row2++ = (rgb_2_pix[ L + cr_r ] |
    343                        rgb_2_pix[ L + crb_g ] |
    344                        rgb_2_pix[ L + cb_b ]);
    345         }
    346 
    347         /*
    348          * These values are at the start of the next line, (due
    349          * to the ++'s above),but they need to be at the start
    350          * of the line after that.
    351          */
    352         lum  += cols;
    353         lum2 += cols;
    354         row1 += mod;
    355         row2 += mod;
    356     }
    357 }
    358 
    359 /*
    360  * In this function I make use of a nasty trick. The tables have the lower
    361  * 16 bits replicated in the upper 16. This means I can write ints and get
    362  * the horisontal doubling for free (almost).
    363  */
    364 static void Color16DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
    365                                     unsigned char *lum, unsigned char *cr,
    366                                     unsigned char *cb, unsigned char *out,
    367                                     int rows, int cols, int mod )
    368 {
    369     unsigned int* row1 = (unsigned int*) out;
    370     const int next_row = cols+(mod/2);
    371     unsigned int* row2 = row1 + 2*next_row;
    372     unsigned char* lum2;
    373     int x, y;
    374     int cr_r;
    375     int crb_g;
    376     int cb_b;
    377     int cols_2 = cols / 2;
    378 
    379     lum2 = lum + cols;
    380 
    381     mod = (next_row * 3) + (mod/2);
    382 
    383     y = rows / 2;
    384     while( y-- )
    385     {
    386         x = cols_2;
    387         while( x-- )
    388         {
    389             register int L;
    390 
    391             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    392             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    393                                + colortab[ *cb + 2*256 ];
    394             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    395             ++cr; ++cb;
    396 
    397             L = *lum++;
    398             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
    399                                         rgb_2_pix[ L + crb_g ] |
    400                                         rgb_2_pix[ L + cb_b ]);
    401             row1++;
    402 
    403             L = *lum++;
    404             row1[0] = row1[next_row] = (rgb_2_pix[ L + cr_r ] |
    405                                         rgb_2_pix[ L + crb_g ] |
    406                                         rgb_2_pix[ L + cb_b ]);
    407             row1++;
    408 
    409 
    410             /* Now, do second row. */
    411 
    412             L = *lum2++;
    413             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
    414                                         rgb_2_pix[ L + crb_g ] |
    415                                         rgb_2_pix[ L + cb_b ]);
    416             row2++;
    417 
    418             L = *lum2++;
    419             row2[0] = row2[next_row] = (rgb_2_pix[ L + cr_r ] |
    420                                         rgb_2_pix[ L + crb_g ] |
    421                                         rgb_2_pix[ L + cb_b ]);
    422             row2++;
    423         }
    424 
    425         /*
    426          * These values are at the start of the next line, (due
    427          * to the ++'s above),but they need to be at the start
    428          * of the line after that.
    429          */
    430         lum  += cols;
    431         lum2 += cols;
    432         row1 += mod;
    433         row2 += mod;
    434     }
    435 }
    436 
    437 static void Color24DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
    438                                     unsigned char *lum, unsigned char *cr,
    439                                     unsigned char *cb, unsigned char *out,
    440                                     int rows, int cols, int mod )
    441 {
    442     unsigned int value;
    443     unsigned char* row1 = out;
    444     const int next_row = (cols*2 + mod) * 3;
    445     unsigned char* row2 = row1 + 2*next_row;
    446     unsigned char* lum2;
    447     int x, y;
    448     int cr_r;
    449     int crb_g;
    450     int cb_b;
    451     int cols_2 = cols / 2;
    452 
    453     lum2 = lum + cols;
    454 
    455     mod = next_row*3 + mod*3;
    456 
    457     y = rows / 2;
    458     while( y-- )
    459     {
    460         x = cols_2;
    461         while( x-- )
    462         {
    463             register int L;
    464 
    465             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    466             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    467                                + colortab[ *cb + 2*256 ];
    468             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    469             ++cr; ++cb;
    470 
    471             L = *lum++;
    472             value = (rgb_2_pix[ L + cr_r ] |
    473                      rgb_2_pix[ L + crb_g ] |
    474                      rgb_2_pix[ L + cb_b ]);
    475             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
    476                      (value      ) & 0xFF;
    477             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
    478                      (value >>  8) & 0xFF;
    479             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
    480                      (value >> 16) & 0xFF;
    481             row1 += 2*3;
    482 
    483             L = *lum++;
    484             value = (rgb_2_pix[ L + cr_r ] |
    485                      rgb_2_pix[ L + crb_g ] |
    486                      rgb_2_pix[ L + cb_b ]);
    487             row1[0+0] = row1[3+0] = row1[next_row+0] = row1[next_row+3+0] =
    488                      (value      ) & 0xFF;
    489             row1[0+1] = row1[3+1] = row1[next_row+1] = row1[next_row+3+1] =
    490                      (value >>  8) & 0xFF;
    491             row1[0+2] = row1[3+2] = row1[next_row+2] = row1[next_row+3+2] =
    492                      (value >> 16) & 0xFF;
    493             row1 += 2*3;
    494 
    495 
    496             /* Now, do second row. */
    497 
    498             L = *lum2++;
    499             value = (rgb_2_pix[ L + cr_r ] |
    500                      rgb_2_pix[ L + crb_g ] |
    501                      rgb_2_pix[ L + cb_b ]);
    502             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
    503                      (value      ) & 0xFF;
    504             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
    505                      (value >>  8) & 0xFF;
    506             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
    507                      (value >> 16) & 0xFF;
    508             row2 += 2*3;
    509 
    510             L = *lum2++;
    511             value = (rgb_2_pix[ L + cr_r ] |
    512                      rgb_2_pix[ L + crb_g ] |
    513                      rgb_2_pix[ L + cb_b ]);
    514             row2[0+0] = row2[3+0] = row2[next_row+0] = row2[next_row+3+0] =
    515                      (value      ) & 0xFF;
    516             row2[0+1] = row2[3+1] = row2[next_row+1] = row2[next_row+3+1] =
    517                      (value >>  8) & 0xFF;
    518             row2[0+2] = row2[3+2] = row2[next_row+2] = row2[next_row+3+2] =
    519                      (value >> 16) & 0xFF;
    520             row2 += 2*3;
    521         }
    522 
    523         /*
    524          * These values are at the start of the next line, (due
    525          * to the ++'s above),but they need to be at the start
    526          * of the line after that.
    527          */
    528         lum  += cols;
    529         lum2 += cols;
    530         row1 += mod;
    531         row2 += mod;
    532     }
    533 }
    534 
    535 static void Color32DitherYV12Mod2X( int *colortab, Uint32 *rgb_2_pix,
    536                                     unsigned char *lum, unsigned char *cr,
    537                                     unsigned char *cb, unsigned char *out,
    538                                     int rows, int cols, int mod )
    539 {
    540     unsigned int* row1 = (unsigned int*) out;
    541     const int next_row = cols*2+mod;
    542     unsigned int* row2 = row1 + 2*next_row;
    543     unsigned char* lum2;
    544     int x, y;
    545     int cr_r;
    546     int crb_g;
    547     int cb_b;
    548     int cols_2 = cols / 2;
    549 
    550     lum2 = lum + cols;
    551 
    552     mod = (next_row * 3) + mod;
    553 
    554     y = rows / 2;
    555     while( y-- )
    556     {
    557         x = cols_2;
    558         while( x-- )
    559         {
    560             register int L;
    561 
    562             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    563             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    564                                + colortab[ *cb + 2*256 ];
    565             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    566             ++cr; ++cb;
    567 
    568             L = *lum++;
    569             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
    570                                        (rgb_2_pix[ L + cr_r ] |
    571                                         rgb_2_pix[ L + crb_g ] |
    572                                         rgb_2_pix[ L + cb_b ]);
    573             row1 += 2;
    574 
    575             L = *lum++;
    576             row1[0] = row1[1] = row1[next_row] = row1[next_row+1] =
    577                                        (rgb_2_pix[ L + cr_r ] |
    578                                         rgb_2_pix[ L + crb_g ] |
    579                                         rgb_2_pix[ L + cb_b ]);
    580             row1 += 2;
    581 
    582 
    583             /* Now, do second row. */
    584 
    585             L = *lum2++;
    586             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
    587                                        (rgb_2_pix[ L + cr_r ] |
    588                                         rgb_2_pix[ L + crb_g ] |
    589                                         rgb_2_pix[ L + cb_b ]);
    590             row2 += 2;
    591 
    592             L = *lum2++;
    593             row2[0] = row2[1] = row2[next_row] = row2[next_row+1] =
    594                                        (rgb_2_pix[ L + cr_r ] |
    595                                         rgb_2_pix[ L + crb_g ] |
    596                                         rgb_2_pix[ L + cb_b ]);
    597             row2 += 2;
    598         }
    599 
    600         /*
    601          * These values are at the start of the next line, (due
    602          * to the ++'s above),but they need to be at the start
    603          * of the line after that.
    604          */
    605         lum  += cols;
    606         lum2 += cols;
    607         row1 += mod;
    608         row2 += mod;
    609     }
    610 }
    611 
    612 static void Color16DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
    613                                     unsigned char *lum, unsigned char *cr,
    614                                     unsigned char *cb, unsigned char *out,
    615                                     int rows, int cols, int mod )
    616 {
    617     unsigned short* row;
    618     int x, y;
    619     int cr_r;
    620     int crb_g;
    621     int cb_b;
    622     int cols_2 = cols / 2;
    623 
    624     row = (unsigned short*) out;
    625 
    626     y = rows;
    627     while( y-- )
    628     {
    629         x = cols_2;
    630         while( x-- )
    631         {
    632             register int L;
    633 
    634             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    635             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    636                                + colortab[ *cb + 2*256 ];
    637             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    638             cr += 4; cb += 4;
    639 
    640             L = *lum; lum += 2;
    641             *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
    642                                       rgb_2_pix[ L + crb_g ] |
    643                                       rgb_2_pix[ L + cb_b ]);
    644 
    645             L = *lum; lum += 2;
    646             *row++ = (unsigned short)(rgb_2_pix[ L + cr_r ] |
    647                                       rgb_2_pix[ L + crb_g ] |
    648                                       rgb_2_pix[ L + cb_b ]);
    649 
    650         }
    651 
    652         row += mod;
    653     }
    654 }
    655 
    656 static void Color24DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
    657                                     unsigned char *lum, unsigned char *cr,
    658                                     unsigned char *cb, unsigned char *out,
    659                                     int rows, int cols, int mod )
    660 {
    661     unsigned int value;
    662     unsigned char* row;
    663     int x, y;
    664     int cr_r;
    665     int crb_g;
    666     int cb_b;
    667     int cols_2 = cols / 2;
    668 
    669     row = (unsigned char*) out;
    670     mod *= 3;
    671     y = rows;
    672     while( y-- )
    673     {
    674         x = cols_2;
    675         while( x-- )
    676         {
    677             register int L;
    678 
    679             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    680             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    681                                + colortab[ *cb + 2*256 ];
    682             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    683             cr += 4; cb += 4;
    684 
    685             L = *lum; lum += 2;
    686             value = (rgb_2_pix[ L + cr_r ] |
    687                      rgb_2_pix[ L + crb_g ] |
    688                      rgb_2_pix[ L + cb_b ]);
    689             *row++ = (value      ) & 0xFF;
    690             *row++ = (value >>  8) & 0xFF;
    691             *row++ = (value >> 16) & 0xFF;
    692 
    693             L = *lum; lum += 2;
    694             value = (rgb_2_pix[ L + cr_r ] |
    695                      rgb_2_pix[ L + crb_g ] |
    696                      rgb_2_pix[ L + cb_b ]);
    697             *row++ = (value      ) & 0xFF;
    698             *row++ = (value >>  8) & 0xFF;
    699             *row++ = (value >> 16) & 0xFF;
    700 
    701         }
    702         row += mod;
    703     }
    704 }
    705 
    706 static void Color32DitherYUY2Mod1X( int *colortab, Uint32 *rgb_2_pix,
    707                                     unsigned char *lum, unsigned char *cr,
    708                                     unsigned char *cb, unsigned char *out,
    709                                     int rows, int cols, int mod )
    710 {
    711     unsigned int* row;
    712     int x, y;
    713     int cr_r;
    714     int crb_g;
    715     int cb_b;
    716     int cols_2 = cols / 2;
    717 
    718     row = (unsigned int*) out;
    719     y = rows;
    720     while( y-- )
    721     {
    722         x = cols_2;
    723         while( x-- )
    724         {
    725             register int L;
    726 
    727             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    728             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    729                                + colortab[ *cb + 2*256 ];
    730             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    731             cr += 4; cb += 4;
    732 
    733             L = *lum; lum += 2;
    734             *row++ = (rgb_2_pix[ L + cr_r ] |
    735                        rgb_2_pix[ L + crb_g ] |
    736                        rgb_2_pix[ L + cb_b ]);
    737 
    738             L = *lum; lum += 2;
    739             *row++ = (rgb_2_pix[ L + cr_r ] |
    740                        rgb_2_pix[ L + crb_g ] |
    741                        rgb_2_pix[ L + cb_b ]);
    742 
    743 
    744         }
    745         row += mod;
    746     }
    747 }
    748 
    749 /*
    750  * In this function I make use of a nasty trick. The tables have the lower
    751  * 16 bits replicated in the upper 16. This means I can write ints and get
    752  * the horisontal doubling for free (almost).
    753  */
    754 static void Color16DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
    755                                     unsigned char *lum, unsigned char *cr,
    756                                     unsigned char *cb, unsigned char *out,
    757                                     int rows, int cols, int mod )
    758 {
    759     unsigned int* row = (unsigned int*) out;
    760     const int next_row = cols+(mod/2);
    761     int x, y;
    762     int cr_r;
    763     int crb_g;
    764     int cb_b;
    765     int cols_2 = cols / 2;
    766 
    767     y = rows;
    768     while( y-- )
    769     {
    770         x = cols_2;
    771         while( x-- )
    772         {
    773             register int L;
    774 
    775             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    776             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    777                                + colortab[ *cb + 2*256 ];
    778             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    779             cr += 4; cb += 4;
    780 
    781             L = *lum; lum += 2;
    782             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
    783                                         rgb_2_pix[ L + crb_g ] |
    784                                         rgb_2_pix[ L + cb_b ]);
    785             row++;
    786 
    787             L = *lum; lum += 2;
    788             row[0] = row[next_row] = (rgb_2_pix[ L + cr_r ] |
    789                                         rgb_2_pix[ L + crb_g ] |
    790                                         rgb_2_pix[ L + cb_b ]);
    791             row++;
    792 
    793         }
    794         row += next_row;
    795     }
    796 }
    797 
    798 static void Color24DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
    799                                     unsigned char *lum, unsigned char *cr,
    800                                     unsigned char *cb, unsigned char *out,
    801                                     int rows, int cols, int mod )
    802 {
    803     unsigned int value;
    804     unsigned char* row = out;
    805     const int next_row = (cols*2 + mod) * 3;
    806     int x, y;
    807     int cr_r;
    808     int crb_g;
    809     int cb_b;
    810     int cols_2 = cols / 2;
    811     y = rows;
    812     while( y-- )
    813     {
    814         x = cols_2;
    815         while( x-- )
    816         {
    817             register int L;
    818 
    819             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    820             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    821                                + colortab[ *cb + 2*256 ];
    822             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    823             cr += 4; cb += 4;
    824 
    825             L = *lum; lum += 2;
    826             value = (rgb_2_pix[ L + cr_r ] |
    827                      rgb_2_pix[ L + crb_g ] |
    828                      rgb_2_pix[ L + cb_b ]);
    829             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
    830                      (value      ) & 0xFF;
    831             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
    832                      (value >>  8) & 0xFF;
    833             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
    834                      (value >> 16) & 0xFF;
    835             row += 2*3;
    836 
    837             L = *lum; lum += 2;
    838             value = (rgb_2_pix[ L + cr_r ] |
    839                      rgb_2_pix[ L + crb_g ] |
    840                      rgb_2_pix[ L + cb_b ]);
    841             row[0+0] = row[3+0] = row[next_row+0] = row[next_row+3+0] =
    842                      (value      ) & 0xFF;
    843             row[0+1] = row[3+1] = row[next_row+1] = row[next_row+3+1] =
    844                      (value >>  8) & 0xFF;
    845             row[0+2] = row[3+2] = row[next_row+2] = row[next_row+3+2] =
    846                      (value >> 16) & 0xFF;
    847             row += 2*3;
    848 
    849         }
    850         row += next_row;
    851     }
    852 }
    853 
    854 static void Color32DitherYUY2Mod2X( int *colortab, Uint32 *rgb_2_pix,
    855                                     unsigned char *lum, unsigned char *cr,
    856                                     unsigned char *cb, unsigned char *out,
    857                                     int rows, int cols, int mod )
    858 {
    859     unsigned int* row = (unsigned int*) out;
    860     const int next_row = cols*2+mod;
    861     int x, y;
    862     int cr_r;
    863     int crb_g;
    864     int cb_b;
    865     int cols_2 = cols / 2;
    866     mod+=mod;
    867     y = rows;
    868     while( y-- )
    869     {
    870         x = cols_2;
    871         while( x-- )
    872         {
    873             register int L;
    874 
    875             cr_r   = 0*768+256 + colortab[ *cr + 0*256 ];
    876             crb_g  = 1*768+256 + colortab[ *cr + 1*256 ]
    877                                + colortab[ *cb + 2*256 ];
    878             cb_b   = 2*768+256 + colortab[ *cb + 3*256 ];
    879             cr += 4; cb += 4;
    880 
    881             L = *lum; lum += 2;
    882             row[0] = row[1] = row[next_row] = row[next_row+1] =
    883                                        (rgb_2_pix[ L + cr_r ] |
    884                                         rgb_2_pix[ L + crb_g ] |
    885                                         rgb_2_pix[ L + cb_b ]);
    886             row += 2;
    887 
    888             L = *lum; lum += 2;
    889             row[0] = row[1] = row[next_row] = row[next_row+1] =
    890                                        (rgb_2_pix[ L + cr_r ] |
    891                                         rgb_2_pix[ L + crb_g ] |
    892                                         rgb_2_pix[ L + cb_b ]);
    893             row += 2;
    894 
    895 
    896         }
    897 
    898         row += next_row;
    899     }
    900 }
    901 
    902 /*
    903  * How many 1 bits are there in the Uint32.
    904  * Low performance, do not call often.
    905  */
    906 static int number_of_bits_set( Uint32 a )
    907 {
    908     if(!a) return 0;
    909     if(a & 1) return 1 + number_of_bits_set(a >> 1);
    910     return(number_of_bits_set(a >> 1));
    911 }
    912 
    913 /*
    914  * How many 0 bits are there at least significant end of Uint32.
    915  * Low performance, do not call often.
    916  */
    917 static int free_bits_at_bottom( Uint32 a )
    918 {
    919       /* assume char is 8 bits */
    920     if(!a) return sizeof(Uint32) * 8;
    921     if(((Sint32)a) & 1l) return 0;
    922     return 1 + free_bits_at_bottom ( a >> 1);
    923 }
    924 
    925 
    926 SDL_Overlay *SDL_CreateYUV_SW(_THIS, int width, int height, Uint32 format, SDL_Surface *display)
    927 {
    928 	SDL_Overlay *overlay;
    929 	struct private_yuvhwdata *swdata;
    930 	int *Cr_r_tab;
    931 	int *Cr_g_tab;
    932 	int *Cb_g_tab;
    933 	int *Cb_b_tab;
    934 	Uint32 *r_2_pix_alloc;
    935 	Uint32 *g_2_pix_alloc;
    936 	Uint32 *b_2_pix_alloc;
    937 	int i;
    938 	int CR, CB;
    939 	Uint32 Rmask, Gmask, Bmask;
    940 
    941 	/* Only RGB packed pixel conversion supported */
    942 	if ( (display->format->BytesPerPixel != 2) &&
    943 	     (display->format->BytesPerPixel != 3) &&
    944 	     (display->format->BytesPerPixel != 4) ) {
    945 		SDL_SetError("Can't use YUV data on non 16/24/32 bit surfaces");
    946 		return(NULL);
    947 	}
    948 
    949 	/* Verify that we support the format */
    950 	switch (format) {
    951 	    case SDL_YV12_OVERLAY:
    952 	    case SDL_IYUV_OVERLAY:
    953 	    case SDL_YUY2_OVERLAY:
    954 	    case SDL_UYVY_OVERLAY:
    955 	    case SDL_YVYU_OVERLAY:
    956 		break;
    957 	    default:
    958 		SDL_SetError("Unsupported YUV format");
    959 		return(NULL);
    960 	}
    961 
    962 	/* Create the overlay structure */
    963 	overlay = (SDL_Overlay *)SDL_malloc(sizeof *overlay);
    964 	if ( overlay == NULL ) {
    965 		SDL_OutOfMemory();
    966 		return(NULL);
    967 	}
    968 	SDL_memset(overlay, 0, (sizeof *overlay));
    969 
    970 	/* Fill in the basic members */
    971 	overlay->format = format;
    972 	overlay->w = width;
    973 	overlay->h = height;
    974 
    975 	/* Set up the YUV surface function structure */
    976 	overlay->hwfuncs = &sw_yuvfuncs;
    977 
    978 	/* Create the pixel data and lookup tables */
    979 	swdata = (struct private_yuvhwdata *)SDL_malloc(sizeof *swdata);
    980 	overlay->hwdata = swdata;
    981 	if ( swdata == NULL ) {
    982 		SDL_OutOfMemory();
    983 		SDL_FreeYUVOverlay(overlay);
    984 		return(NULL);
    985 	}
    986 	swdata->stretch = NULL;
    987 	swdata->display = display;
    988 	swdata->pixels = (Uint8 *) SDL_malloc(width*height*2);
    989 	swdata->colortab = (int *)SDL_malloc(4*256*sizeof(int));
    990 	Cr_r_tab = &swdata->colortab[0*256];
    991 	Cr_g_tab = &swdata->colortab[1*256];
    992 	Cb_g_tab = &swdata->colortab[2*256];
    993 	Cb_b_tab = &swdata->colortab[3*256];
    994 	swdata->rgb_2_pix = (Uint32 *)SDL_malloc(3*768*sizeof(Uint32));
    995 	r_2_pix_alloc = &swdata->rgb_2_pix[0*768];
    996 	g_2_pix_alloc = &swdata->rgb_2_pix[1*768];
    997 	b_2_pix_alloc = &swdata->rgb_2_pix[2*768];
    998 	if ( ! swdata->pixels || ! swdata->colortab || ! swdata->rgb_2_pix ) {
    999 		SDL_OutOfMemory();
   1000 		SDL_FreeYUVOverlay(overlay);
   1001 		return(NULL);
   1002 	}
   1003 
   1004 	/* Generate the tables for the display surface */
   1005 	for (i=0; i<256; i++) {
   1006 		/* Gamma correction (luminescence table) and chroma correction
   1007 		   would be done here.  See the Berkeley mpeg_play sources.
   1008 		*/
   1009 		CB = CR = (i-128);
   1010 		Cr_r_tab[i] = (int) ( (0.419/0.299) * CR);
   1011 		Cr_g_tab[i] = (int) (-(0.299/0.419) * CR);
   1012 		Cb_g_tab[i] = (int) (-(0.114/0.331) * CB);
   1013 		Cb_b_tab[i] = (int) ( (0.587/0.331) * CB);
   1014 	}
   1015 
   1016 	/*
   1017 	 * Set up entries 0-255 in rgb-to-pixel value tables.
   1018 	 */
   1019 	Rmask = display->format->Rmask;
   1020 	Gmask = display->format->Gmask;
   1021 	Bmask = display->format->Bmask;
   1022 	for ( i=0; i<256; ++i ) {
   1023 		r_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Rmask));
   1024 		r_2_pix_alloc[i+256] <<= free_bits_at_bottom(Rmask);
   1025 		g_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Gmask));
   1026 		g_2_pix_alloc[i+256] <<= free_bits_at_bottom(Gmask);
   1027 		b_2_pix_alloc[i+256] = i >> (8 - number_of_bits_set(Bmask));
   1028 		b_2_pix_alloc[i+256] <<= free_bits_at_bottom(Bmask);
   1029 	}
   1030 
   1031 	/*
   1032 	 * If we have 16-bit output depth, then we double the value
   1033 	 * in the top word. This means that we can write out both
   1034 	 * pixels in the pixel doubling mode with one op. It is
   1035 	 * harmless in the normal case as storing a 32-bit value
   1036 	 * through a short pointer will lose the top bits anyway.
   1037 	 */
   1038 	if( display->format->BytesPerPixel == 2 ) {
   1039 		for ( i=0; i<256; ++i ) {
   1040 			r_2_pix_alloc[i+256] |= (r_2_pix_alloc[i+256]) << 16;
   1041 			g_2_pix_alloc[i+256] |= (g_2_pix_alloc[i+256]) << 16;
   1042 			b_2_pix_alloc[i+256] |= (b_2_pix_alloc[i+256]) << 16;
   1043 		}
   1044 	}
   1045 
   1046 	/*
   1047 	 * Spread out the values we have to the rest of the array so that
   1048 	 * we do not need to check for overflow.
   1049 	 */
   1050 	for ( i=0; i<256; ++i ) {
   1051 		r_2_pix_alloc[i] = r_2_pix_alloc[256];
   1052 		r_2_pix_alloc[i+512] = r_2_pix_alloc[511];
   1053 		g_2_pix_alloc[i] = g_2_pix_alloc[256];
   1054 		g_2_pix_alloc[i+512] = g_2_pix_alloc[511];
   1055 		b_2_pix_alloc[i] = b_2_pix_alloc[256];
   1056 		b_2_pix_alloc[i+512] = b_2_pix_alloc[511];
   1057 	}
   1058 
   1059 	/* You have chosen wisely... */
   1060 	switch (format) {
   1061 	    case SDL_YV12_OVERLAY:
   1062 	    case SDL_IYUV_OVERLAY:
   1063 		if ( display->format->BytesPerPixel == 2 ) {
   1064 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   1065 			/* inline assembly functions */
   1066 			if ( SDL_HasMMX() && (Rmask == 0xF800) &&
   1067 			                     (Gmask == 0x07E0) &&
   1068 				             (Bmask == 0x001F) &&
   1069 			                     (width & 15) == 0) {
   1070 /*printf("Using MMX 16-bit 565 dither\n");*/
   1071 				swdata->Display1X = Color565DitherYV12MMX1X;
   1072 			} else {
   1073 /*printf("Using C 16-bit dither\n");*/
   1074 				swdata->Display1X = Color16DitherYV12Mod1X;
   1075 			}
   1076 #else
   1077 			swdata->Display1X = Color16DitherYV12Mod1X;
   1078 #endif
   1079 			swdata->Display2X = Color16DitherYV12Mod2X;
   1080 		}
   1081 		if ( display->format->BytesPerPixel == 3 ) {
   1082 			swdata->Display1X = Color24DitherYV12Mod1X;
   1083 			swdata->Display2X = Color24DitherYV12Mod2X;
   1084 		}
   1085 		if ( display->format->BytesPerPixel == 4 ) {
   1086 #if (__GNUC__ > 2) && defined(__i386__) && __OPTIMIZE__ && SDL_ASSEMBLY_ROUTINES
   1087 			/* inline assembly functions */
   1088 			if ( SDL_HasMMX() && (Rmask == 0x00FF0000) &&
   1089 			                     (Gmask == 0x0000FF00) &&
   1090 				             (Bmask == 0x000000FF) &&
   1091 			                     (width & 15) == 0) {
   1092 /*printf("Using MMX 32-bit dither\n");*/
   1093 				swdata->Display1X = ColorRGBDitherYV12MMX1X;
   1094 			} else {
   1095 /*printf("Using C 32-bit dither\n");*/
   1096 				swdata->Display1X = Color32DitherYV12Mod1X;
   1097 			}
   1098 #else
   1099 			swdata->Display1X = Color32DitherYV12Mod1X;
   1100 #endif
   1101 			swdata->Display2X = Color32DitherYV12Mod2X;
   1102 		}
   1103 		break;
   1104 	    case SDL_YUY2_OVERLAY:
   1105 	    case SDL_UYVY_OVERLAY:
   1106 	    case SDL_YVYU_OVERLAY:
   1107 		if ( display->format->BytesPerPixel == 2 ) {
   1108 			swdata->Display1X = Color16DitherYUY2Mod1X;
   1109 			swdata->Display2X = Color16DitherYUY2Mod2X;
   1110 		}
   1111 		if ( display->format->BytesPerPixel == 3 ) {
   1112 			swdata->Display1X = Color24DitherYUY2Mod1X;
   1113 			swdata->Display2X = Color24DitherYUY2Mod2X;
   1114 		}
   1115 		if ( display->format->BytesPerPixel == 4 ) {
   1116 			swdata->Display1X = Color32DitherYUY2Mod1X;
   1117 			swdata->Display2X = Color32DitherYUY2Mod2X;
   1118 		}
   1119 		break;
   1120 	    default:
   1121 		/* We should never get here (caught above) */
   1122 		break;
   1123 	}
   1124 
   1125 	/* Find the pitch and offset values for the overlay */
   1126 	overlay->pitches = swdata->pitches;
   1127 	overlay->pixels = swdata->planes;
   1128 	switch (format) {
   1129 	    case SDL_YV12_OVERLAY:
   1130 	    case SDL_IYUV_OVERLAY:
   1131 		overlay->pitches[0] = overlay->w;
   1132 		overlay->pitches[1] = overlay->pitches[0] / 2;
   1133 		overlay->pitches[2] = overlay->pitches[0] / 2;
   1134 	        overlay->pixels[0] = swdata->pixels;
   1135 	        overlay->pixels[1] = overlay->pixels[0] +
   1136 		                     overlay->pitches[0] * overlay->h;
   1137 	        overlay->pixels[2] = overlay->pixels[1] +
   1138 		                     overlay->pitches[1] * overlay->h / 2;
   1139 		overlay->planes = 3;
   1140 		break;
   1141 	    case SDL_YUY2_OVERLAY:
   1142 	    case SDL_UYVY_OVERLAY:
   1143 	    case SDL_YVYU_OVERLAY:
   1144 		overlay->pitches[0] = overlay->w*2;
   1145 	        overlay->pixels[0] = swdata->pixels;
   1146 		overlay->planes = 1;
   1147 		break;
   1148 	    default:
   1149 		/* We should never get here (caught above) */
   1150 		break;
   1151 	}
   1152 
   1153 	/* We're all done.. */
   1154 	return(overlay);
   1155 }
   1156 
   1157 int SDL_LockYUV_SW(_THIS, SDL_Overlay *overlay)
   1158 {
   1159 	return(0);
   1160 }
   1161 
   1162 void SDL_UnlockYUV_SW(_THIS, SDL_Overlay *overlay)
   1163 {
   1164 	return;
   1165 }
   1166 
   1167 int SDL_DisplayYUV_SW(_THIS, SDL_Overlay *overlay, SDL_Rect *src, SDL_Rect *dst)
   1168 {
   1169 	struct private_yuvhwdata *swdata;
   1170 	int stretch;
   1171 	int scale_2x;
   1172 	SDL_Surface *display;
   1173 	Uint8 *lum, *Cr, *Cb;
   1174 	Uint8 *dstp;
   1175 	int mod;
   1176 
   1177 	swdata = overlay->hwdata;
   1178 	stretch = 0;
   1179 	scale_2x = 0;
   1180 	if ( src->x || src->y || src->w < overlay->w || src->h < overlay->h ) {
   1181 		/* The source rectangle has been clipped.
   1182 		   Using a scratch surface is easier than adding clipped
   1183 		   source support to all the blitters, plus that would
   1184 		   slow them down in the general unclipped case.
   1185 		*/
   1186 		stretch = 1;
   1187 	} else if ( (src->w != dst->w) || (src->h != dst->h) ) {
   1188 		if ( (dst->w == 2*src->w) &&
   1189 		     (dst->h == 2*src->h) ) {
   1190 			scale_2x = 1;
   1191 		} else {
   1192 			stretch = 1;
   1193 		}
   1194 	}
   1195 	if ( stretch ) {
   1196 		if ( ! swdata->stretch ) {
   1197 			display = swdata->display;
   1198 			swdata->stretch = SDL_CreateRGBSurface(
   1199 				SDL_SWSURFACE,
   1200 				overlay->w, overlay->h,
   1201 				display->format->BitsPerPixel,
   1202 				display->format->Rmask,
   1203 				display->format->Gmask,
   1204 				display->format->Bmask, 0);
   1205 			if ( ! swdata->stretch ) {
   1206 				return(-1);
   1207 			}
   1208 		}
   1209 		display = swdata->stretch;
   1210 	} else {
   1211 		display = swdata->display;
   1212 	}
   1213 	switch (overlay->format) {
   1214 	    case SDL_YV12_OVERLAY:
   1215 		lum = overlay->pixels[0];
   1216 		Cr =  overlay->pixels[1];
   1217 		Cb =  overlay->pixels[2];
   1218 		break;
   1219 	    case SDL_IYUV_OVERLAY:
   1220 		lum = overlay->pixels[0];
   1221 		Cr =  overlay->pixels[2];
   1222 		Cb =  overlay->pixels[1];
   1223 		break;
   1224 	    case SDL_YUY2_OVERLAY:
   1225 		lum = overlay->pixels[0];
   1226 		Cr = lum + 3;
   1227 		Cb = lum + 1;
   1228 		break;
   1229 	    case SDL_UYVY_OVERLAY:
   1230 		lum = overlay->pixels[0]+1;
   1231 		Cr = lum + 1;
   1232 		Cb = lum - 1;
   1233 		break;
   1234 	    case SDL_YVYU_OVERLAY:
   1235 		lum = overlay->pixels[0];
   1236 		Cr = lum + 1;
   1237 		Cb = lum + 3;
   1238 		break;
   1239 	    default:
   1240 		SDL_SetError("Unsupported YUV format in blit");
   1241 		return(-1);
   1242 	}
   1243 	if ( SDL_MUSTLOCK(display) ) {
   1244         	if ( SDL_LockSurface(display) < 0 ) {
   1245 			return(-1);
   1246 		}
   1247 	}
   1248 	if ( stretch ) {
   1249 		dstp = (Uint8 *)swdata->stretch->pixels;
   1250 	} else {
   1251 		dstp = (Uint8 *)display->pixels
   1252 			+ dst->x * display->format->BytesPerPixel
   1253 			+ dst->y * display->pitch;
   1254 	}
   1255 	mod = (display->pitch / display->format->BytesPerPixel);
   1256 
   1257 	if ( scale_2x ) {
   1258 		mod -= (overlay->w * 2);
   1259 		swdata->Display2X(swdata->colortab, swdata->rgb_2_pix,
   1260 		                  lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
   1261 	} else {
   1262 		mod -= overlay->w;
   1263 		swdata->Display1X(swdata->colortab, swdata->rgb_2_pix,
   1264 		                  lum, Cr, Cb, dstp, overlay->h, overlay->w, mod);
   1265 	}
   1266 	if ( SDL_MUSTLOCK(display) ) {
   1267 		SDL_UnlockSurface(display);
   1268 	}
   1269 	if ( stretch ) {
   1270 		display = swdata->display;
   1271 		SDL_SoftStretch(swdata->stretch, src, display, dst);
   1272 	}
   1273 	SDL_UpdateRects(display, 1, dst);
   1274 
   1275 	return(0);
   1276 }
   1277 
   1278 void SDL_FreeYUV_SW(_THIS, SDL_Overlay *overlay)
   1279 {
   1280 	struct private_yuvhwdata *swdata;
   1281 
   1282 	swdata = overlay->hwdata;
   1283 	if ( swdata ) {
   1284 		if ( swdata->stretch ) {
   1285 			SDL_FreeSurface(swdata->stretch);
   1286 		}
   1287 		if ( swdata->pixels ) {
   1288 			SDL_free(swdata->pixels);
   1289 		}
   1290 		if ( swdata->colortab ) {
   1291 			SDL_free(swdata->colortab);
   1292 		}
   1293 		if ( swdata->rgb_2_pix ) {
   1294 			SDL_free(swdata->rgb_2_pix);
   1295 		}
   1296 		SDL_free(swdata);
   1297 		overlay->hwdata = NULL;
   1298 	}
   1299 }
   1300