Home | History | Annotate | Download | only in dm642
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 /****************************************************************************
     13  *
     14  *   Module Title :     gen_scalers.c
     15  *
     16  *   Description  :     Generic image scaling functions.
     17  *
     18  ***************************************************************************/
     19 
     20 /****************************************************************************
     21 *  Header Files
     22 ****************************************************************************/
     23 #include "vpx_scale/vpxscale.h"
     24 
     25 /****************************************************************************
     26 *  Imports
     27 ****************************************************************************/
     28 
     29 /****************************************************************************
     30  *
     31  *  ROUTINE       : horizontal_line_4_5_scale_c4
     32  *
     33  *  INPUTS        : const unsigned char *source : Pointer to source data.
     34  *                  unsigned int source_width    : Stride of source.
     35  *                  unsigned char *dest         : Pointer to destination data.
     36  *                  unsigned int dest_width      : Stride of destination (NOT USED).
     37  *
     38  *  OUTPUTS       : None.
     39  *
     40  *  RETURNS       : void
     41  *
     42  *  FUNCTION      : Copies horizontal line of pixels from source to
     43  *                  destination scaling up by 4 to 5.
     44  *
     45  *  SPECIAL NOTES : None.
     46  *
     47  ****************************************************************************/
     48 static
     49 void horizontal_line_4_5_scale_c64
     50 (
     51     const unsigned char *source,
     52     unsigned int source_width,
     53     unsigned char *dest,
     54     unsigned int dest_width
     55 )
     56 {
     57     unsigned i;
     58     unsigned int ba, cb, dc, ed;
     59     unsigned char *restrict des = dest;
     60     unsigned int *restrict src = (unsigned int *)source;
     61     unsigned int const_51_205, const_102_154,
     62              const_205_51, const_154_102;
     63 
     64     unsigned int src_current, src_next;
     65 
     66     (void) dest_width;
     67 
     68     // Constants that are to be used for the filtering.  For
     69     //  best speed we are going to want to right shift by 16.
     70     //  In the generic version they were shift by 8, so put
     71     //  an extra 8 in now so that 16 will come out later.
     72     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
     73     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
     74     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
     75     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
     76 
     77     // 5 points are needed to filter to give 5 output points.
     78     //  A load can pull up 4 at a time, and one needs to be
     79     //  "borrowed" from the next set of data.  So instead of
     80     //  loading those 5 points each time, "steal" a point from
     81     //  the next set and only load up 4 each time through.
     82     src_current = _mem4(src);
     83 
     84     for (i = 0; i < source_width - 4; i += 4)
     85     {
     86         src_next = _mem4(src++);
     87 
     88         // Reorder the data so that it is ready for the
     89         //  dot product.
     90         ba = _unpklu4(src_current);
     91         cb = _unpkhu4(_rotl(src_current, 8));
     92         dc = _unpkhu4(src_current);
     93         ed = _unpkhu4(_shrmb(src_next, src_current));
     94 
     95         // Use the dot product with round and shift.
     96         des [0] = src_current & 0xff;
     97         des [1] = _dotprsu2(ba, const_205_51);
     98         des [2] = _dotprsu2(cb, const_154_102);
     99         des [3] = _dotprsu2(dc, const_102_154);
    100         des [4] = _dotprsu2(ed, const_51_205);
    101 
    102         des += 5;
    103 
    104         // reuse loaded vales next time around.
    105         src_current = src_next;
    106     }
    107 
    108     // vp8_filter the last set of points.  Normally a point from the next set
    109     //  would be used, but there is no next set, so just fill.
    110     ba = _unpklu4(src_current);
    111     cb = _unpkhu4(_rotl(src_current, 8));
    112     dc = _unpkhu4(src_current);
    113 
    114     des [0] = src_current & 0xff;
    115     des [1] = _dotprsu2(ba, const_205_51);
    116     des [2] = _dotprsu2(cb, const_154_102);
    117     des [3] = _dotprsu2(dc, const_102_154);
    118     des [4] = src_current & 0xff;
    119 
    120 }
    121 /****************************************************************************
    122  *
    123  *  ROUTINE       : vertical_band_4_5_scale_c64
    124  *
    125  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
    126  *                  unsigned int dest_pitch : Stride of destination data.
    127  *                  unsigned int dest_width : Width of destination data.
    128  *
    129  *  OUTPUTS       : None.
    130  *
    131  *  RETURNS       : void
    132  *
    133  *  FUNCTION      : Scales vertical band of pixels by scale 4 to 5. The
    134  *                  height of the band scaled is 4-pixels.
    135  *
    136  *  SPECIAL NOTES : The routine uses the first line of the band below
    137  *                  the current band.
    138  *
    139  ****************************************************************************/
    140 static
    141 void vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
    142 {
    143     unsigned int i;
    144     unsigned int a, b, c, d, e;
    145     unsigned int ba, cb, dc, ed;
    146     unsigned char *restrict src = dest;
    147     unsigned char *restrict des = dest;
    148     unsigned int const_51_205, const_102_154,
    149              const_205_51, const_154_102;
    150 
    151     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
    152     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
    153     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
    154     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
    155 
    156     // Force a loop unroll here so that there is not such a
    157     //  dependancy.
    158     a = src [0];
    159     b = src [dest_pitch];
    160     c = src [dest_pitch*2];
    161     d = src [dest_pitch*3];
    162     e = src [dest_pitch*5];
    163     src ++;
    164 
    165     for (i = 0; i < dest_width; i++)
    166     {
    167         ba = _pack2(b, a);
    168         cb = _pack2(c, b);
    169         dc = _pack2(d, c);
    170         ed = _pack2(e, d);
    171 
    172         a = src [0];
    173         b = src [dest_pitch];
    174         c = src [dest_pitch*2];
    175         d = src [dest_pitch*3];
    176         e = src [dest_pitch*5];
    177         src ++;
    178 
    179         des [dest_pitch] = _dotprsu2(ba, const_205_51);
    180         des [dest_pitch*2] = _dotprsu2(cb, const_154_102);
    181         des [dest_pitch*3] = _dotprsu2(dc, const_102_154);
    182         des [dest_pitch*4] = _dotprsu2(ed, const_51_205);
    183 
    184         des ++;
    185     }
    186 }
    187 
    188 /****************************************************************************
    189  *
    190  *  ROUTINE       : last_vertical_band_4_5_scale_c64
    191  *
    192  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
    193  *                  unsigned int dest_pitch : Stride of destination data.
    194  *                  unsigned int dest_width : Width of destination data.
    195  *
    196  *  OUTPUTS       : None.
    197  *
    198  *  RETURNS       : void
    199  *
    200  *  FUNCTION      : Scales last vertical band of pixels by scale 4 to 5. The
    201  *                  height of the band scaled is 4-pixels.
    202  *
    203  *  SPECIAL NOTES : The routine does not have available the first line of
    204  *                  the band below the current band, since this is the
    205  *                  last band.
    206  *
    207  ****************************************************************************/
    208 static
    209 void last_vertical_band_4_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
    210 {
    211     unsigned int i;
    212     unsigned int a, b, c, d;
    213     unsigned int ba, cb, dc;
    214     unsigned char *restrict src = dest;
    215     unsigned char *restrict des = dest;
    216     unsigned int const_102_154, const_205_51, const_154_102;
    217 
    218     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
    219     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
    220     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
    221 
    222     a = src [0];
    223     b = src [dest_pitch];
    224     c = src [dest_pitch*2];
    225     d = src [dest_pitch*3];
    226     src ++;
    227 
    228     for (i = 0; i < dest_width; ++i)
    229     {
    230         ba = _pack2(b, a);
    231         cb = _pack2(c, b);
    232         dc = _pack2(d, c);
    233 
    234         a = src [0];
    235         b = src [dest_pitch];
    236         c = src [dest_pitch*2];
    237         d = src [dest_pitch*3];
    238         src ++;
    239 
    240         des [dest_pitch] = _dotprsu2(ba, const_205_51);
    241         des [dest_pitch*2] = _dotprsu2(cb, const_154_102);
    242         des [dest_pitch*3] = _dotprsu2(dc, const_102_154);
    243         des [dest_pitch*4] = (unsigned char) d;
    244 
    245         des++;
    246     }
    247 }
    248 
    249 /****************************************************************************
    250  *
    251  *  ROUTINE       : horizontal_line_3_5_scale_c64
    252  *
    253  *  INPUTS        : const unsigned char *source : Pointer to source data.
    254  *                  unsigned int source_width    : Stride of source.
    255  *                  unsigned char *dest         : Pointer to destination data.
    256  *                  unsigned int dest_width      : Stride of destination (NOT USED).
    257  *
    258  *  OUTPUTS       : None.
    259  *
    260  *  RETURNS       : void
    261  *
    262  *  FUNCTION      : Copies horizontal line of pixels from source to
    263  *                  destination scaling up by 3 to 5.
    264  *
    265  *  SPECIAL NOTES : None.
    266  *
    267  *
    268  ****************************************************************************/
    269 static
    270 void horizontal_line_3_5_scale_c64
    271 (
    272     const unsigned char *source,
    273     unsigned int source_width,
    274     unsigned char *dest,
    275     unsigned int dest_width
    276 )
    277 {
    278     unsigned int i;
    279     unsigned int ba, cb, dc;
    280     unsigned int src_current;
    281     unsigned char *restrict des = dest;
    282     unsigned char *restrict src = (unsigned char *)source;
    283     unsigned int const_51_205, const_102_154,
    284              const_205_51, const_154_102;
    285 
    286     (void) dest_width;
    287 
    288     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
    289     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
    290     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
    291     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
    292 
    293     for (i = 0; i < source_width - 3; i += 3)
    294     {
    295         src_current = _mem4(src);
    296 
    297         // Reorder the data so that it is ready for the
    298         //  dot product.
    299         ba = _unpklu4(src_current);
    300         cb = _unpkhu4(_rotl(src_current, 8));
    301         dc = _unpkhu4(src_current);
    302 
    303         des [0] = src_current & 0xff;
    304         des [1] = _dotprsu2(ba, const_154_102);
    305         des [2] = _dotprsu2(cb, const_51_205);
    306         des [3] = _dotprsu2(cb, const_205_51);
    307         des [4] = _dotprsu2(dc, const_102_154);
    308 
    309         src += 3;
    310         des += 5;
    311     }
    312 
    313     src_current = _mem4(src);
    314 
    315     ba = _unpklu4(src_current);
    316     cb = _unpkhu4(_rotl(src_current, 8));
    317     dc = _unpkhu4(src_current);
    318 
    319 
    320     des [0] = src_current & 0xff;
    321     des [1] = _dotprsu2(ba, const_154_102);
    322     des [2] = _dotprsu2(cb, const_51_205);
    323     des [3] = _dotprsu2(cb, const_205_51);
    324     des [4] = dc & 0xff;
    325 
    326 }
    327 
    328 /****************************************************************************
    329  *
    330  *  ROUTINE       : vertical_band_3_5_scale_c64
    331  *
    332  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
    333  *                  unsigned int dest_pitch : Stride of destination data.
    334  *                  unsigned int dest_width : Width of destination data.
    335  *
    336  *  OUTPUTS       : None.
    337  *
    338  *  RETURNS       : void
    339  *
    340  *  FUNCTION      : Scales vertical band of pixels by scale 3 to 5. The
    341  *                  height of the band scaled is 3-pixels.
    342  *
    343  *  SPECIAL NOTES : The routine uses the first line of the band below
    344  *                  the current band.
    345  *
    346  ****************************************************************************/
    347 static
    348 void vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
    349 {
    350     unsigned int i;
    351     unsigned int a, b, c, d;
    352     unsigned int ba, cb, dc;
    353     unsigned char *restrict src = dest;
    354     unsigned char *restrict des = dest;
    355     unsigned int const_51_205, const_102_154,
    356              const_205_51, const_154_102;
    357 
    358     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
    359     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
    360     const_102_154 = 0x66009A00; //_pack2 (102 << 8, 154 << 8);
    361     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
    362 
    363     a = src [0];
    364     b = src [dest_pitch];
    365     c = src [dest_pitch*2];
    366     d = src [dest_pitch*5];
    367     src ++;
    368 
    369     for (i = 0; i < dest_width; i++)
    370     {
    371         ba = _pack2(b, a);
    372         cb = _pack2(c, b);
    373         dc = _pack2(d, c);
    374 
    375         a = src [0];
    376         b = src [dest_pitch];
    377         c = src [dest_pitch*2];
    378         d = src [dest_pitch*5];
    379         src ++;
    380 
    381         des [dest_pitch]   = _dotprsu2(ba, const_154_102);
    382         des [dest_pitch*2] = _dotprsu2(cb, const_51_205);
    383         des [dest_pitch*3] = _dotprsu2(cb, const_205_51);
    384         des [dest_pitch*4] = _dotprsu2(dc, const_102_154);
    385 
    386         des++;
    387     }
    388 }
    389 
    390 /****************************************************************************
    391  *
    392  *  ROUTINE       : last_vertical_band_3_5_scale_c64
    393  *
    394  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
    395  *                  unsigned int dest_pitch : Stride of destination data.
    396  *                  unsigned int dest_width : Width of destination data.
    397  *
    398  *  OUTPUTS       : None.
    399  *
    400  *  RETURNS       : void
    401  *
    402  *  FUNCTION      : Scales last vertical band of pixels by scale 3 to 5. The
    403  *                  height of the band scaled is 3-pixels.
    404  *
    405  *  SPECIAL NOTES : The routine does not have available the first line of
    406  *                  the band below the current band, since this is the
    407  *                  last band.
    408  *
    409  ****************************************************************************/
    410 static
    411 void last_vertical_band_3_5_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
    412 {
    413     unsigned int i;
    414     unsigned int a, b, c;
    415     unsigned int ba, cb;
    416     unsigned char *restrict src = dest;
    417     unsigned char *restrict des = dest;
    418     unsigned int const_51_205, const_205_51, const_154_102;
    419 
    420     const_51_205 = 0x3300CD00; //_pack2 (51 << 8, 205 << 8);
    421     const_205_51 = 0xCD003300; //_pack2 (205 << 8, 51 << 8);
    422     const_154_102 = 0x9A006600; //_pack2 (154 << 8, 102 << 8);
    423 
    424     a = src [0];
    425     b = src [dest_pitch];
    426     c = src [dest_pitch*2];
    427     src ++;
    428 
    429     for (i = 0; i < dest_width; ++i)
    430     {
    431         ba = _pack2(b, a);
    432         cb = _pack2(c, b);
    433 
    434         a = src [0];
    435         b = src [dest_pitch];
    436         c = src [dest_pitch*2];
    437         src ++;
    438 
    439         des [dest_pitch]   = _dotprsu2(ba, const_154_102);
    440         des [dest_pitch*2] = _dotprsu2(cb, const_51_205);
    441         des [dest_pitch*3] = _dotprsu2(cb, const_205_51);
    442         des [dest_pitch*4] = (unsigned char)(c) ;
    443 
    444         des++;
    445     }
    446 }
    447 
    448 /****************************************************************************
    449  *
    450  *  ROUTINE       : horizontal_line_1_2_scale_c64
    451  *
    452  *  INPUTS        : const unsigned char *source : Pointer to source data.
    453  *                  unsigned int source_width    : Stride of source.
    454  *                  unsigned char *dest         : Pointer to destination data.
    455  *                  unsigned int dest_width      : Stride of destination (NOT USED).
    456  *
    457  *  OUTPUTS       : None.
    458  *
    459  *  RETURNS       : void
    460  *
    461  *  FUNCTION      : Copies horizontal line of pixels from source to
    462  *                  destination scaling up by 1 to 2.
    463  *
    464  *  SPECIAL NOTES : source width must be a multiple of 4.
    465  *
    466  ****************************************************************************/
    467 void horizontal_line_1_2_scale_c64
    468 (
    469     const unsigned char *source,
    470     unsigned int source_width,
    471     unsigned char *dest,
    472     unsigned int dest_width
    473 )
    474 {
    475     unsigned int i;
    476     unsigned char *restrict des = dest;
    477     unsigned char *restrict src = (unsigned char *)source;
    478     unsigned int src7_4i, src4_1i, src3_0i;
    479     unsigned int a4_0i, ahi, alo;
    480     double src7_0d, src3_0d;
    481     const unsigned int k01 = 0x01010101;
    482 
    483     for (i = 0; i < source_width / 4; i += 1)
    484     {
    485         // Load up the data from src.  Here a wide load is
    486         //  used to get 8 bytes at once, only 5 will be used
    487         //  for the actual computation.
    488         src7_0d = _memd8(src);
    489         src3_0i = _lo(src7_0d);
    490         src7_4i = _hi(src7_0d);
    491 
    492         // Need to average between points.  Shift byte 5 into
    493         //  the lower word.  This will result in bytes 5-1
    494         //  averaged with 4-0.
    495         src4_1i = _shrmb(src7_4i, src3_0i);
    496         a4_0i = _avgu4(src4_1i, src3_0i);
    497 
    498         // Expand the data out. Could do an unpack, however
    499         //  all but the multiply units are getting pretty hard
    500         //  here the multiply unit can take some of the computations.
    501         src3_0d = _mpyu4(src3_0i, k01);
    502 
    503         // The averages need to be unpacked so that they are in 16
    504         //  bit form and will be able to be interleaved with the
    505         //  original data
    506         ahi = _unpkhu4(a4_0i);
    507         alo = _unpklu4(a4_0i);
    508 
    509         ahi = _swap4(ahi);
    510         alo = _swap4(alo);
    511 
    512         // Mix the average result in with the orginal data.
    513         ahi = _hi(src3_0d) | ahi;
    514         alo = _lo(src3_0d) | alo;
    515 
    516         _memd8(des) = _itod(ahi, alo);
    517 
    518         des += 8;
    519         src += 4;
    520     }
    521 }
    522 
    523 
    524 /****************************************************************************
    525  *
    526  *  ROUTINE       : vertical_band_1_2_scale_c64
    527  *
    528  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
    529  *                  unsigned int dest_pitch : Stride of destination data.
    530  *                  unsigned int dest_width : Width of destination data.
    531  *
    532  *  OUTPUTS       : None.
    533  *
    534  *  RETURNS       : void
    535  *
    536  *  FUNCTION      : Scales vertical band of pixels by scale 1 to 2. The
    537  *                  height of the band scaled is 1-pixel.
    538  *
    539  *  SPECIAL NOTES : The routine uses the first line of the band below
    540  *                  the current band.
    541  *                  Destination width must be a multiple of 4.  Because the
    542  *                  intput must be, therefore the output must be.
    543  *
    544  ****************************************************************************/
    545 static
    546 void vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
    547 {
    548     unsigned int i;
    549     unsigned int a, b;
    550     unsigned int *restrict line_a = (unsigned int *)dest;
    551     unsigned int *restrict line_b = (unsigned int *)(dest + (dest_pitch * 2));
    552     unsigned int *restrict des = (unsigned int *)(dest + dest_pitch);
    553 
    554     for (i = 0; i < dest_width / 4; i++)
    555     {
    556         a = _mem4(line_a++);
    557         b = _mem4(line_b++);
    558 
    559         _mem4(des++) = _avgu4(a, b);
    560     }
    561 }
    562 
    563 /****************************************************************************
    564  *
    565  *  ROUTINE       : last_vertical_band_1_2_scale_c64
    566  *
    567  *  INPUTS        : unsigned char *dest    : Pointer to destination data.
    568  *                  unsigned int dest_pitch : Stride of destination data.
    569  *                  unsigned int dest_width : Width of destination data.
    570  *
    571  *  OUTPUTS       : None.
    572  *
    573  *  RETURNS       : void
    574  *
    575  *  FUNCTION      : Scales last vertical band of pixels by scale 1 to 2. The
    576  *                  height of the band scaled is 1-pixel.
    577  *
    578  *  SPECIAL NOTES : The routine does not have available the first line of
    579  *                  the band below the current band, since this is the
    580  *                  last band.  Again, width must be a multiple of 4.
    581  *
    582  ****************************************************************************/
    583 static
    584 void last_vertical_band_1_2_scale_c64(unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width)
    585 {
    586     unsigned int i;
    587     unsigned int *restrict src = (unsigned int *)dest;
    588     unsigned int *restrict des = (unsigned int *)(dest + dest_pitch);
    589 
    590     for (i = 0; i < dest_width / 4; ++i)
    591     {
    592         _mem4(des++) = _mem4(src++);
    593     }
    594 }
    595 
    596 void
    597 register_generic_scalers(void)
    598 {
    599     vp8_horizontal_line_1_2_scale        = horizontal_line_1_2_scale_c64;
    600     vp8_vertical_band_1_2_scale          = vertical_band_1_2_scale_c64;
    601     vp8_last_vertical_band_1_2_scale      = last_vertical_band_1_2_scale_c64;
    602     vp8_horizontal_line_3_5_scale        = horizontal_line_3_5_scale_c64;
    603     vp8_vertical_band_3_5_scale          = vertical_band_3_5_scale_c64;
    604     vp8_last_vertical_band_3_5_scale      = last_vertical_band_3_5_scale_c64;
    605     vp8_horizontal_line_4_5_scale        = horizontal_line_4_5_scale_c64;
    606     vp8_vertical_band_4_5_scale          = vertical_band_4_5_scale_c64;
    607     vp8_last_vertical_band_4_5_scale      = last_vertical_band_4_5_scale_c64;
    608 }
    609