Home | History | Annotate | Download | only in common
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <math.h>
     12 
     13 #include "./vp9_rtcd.h"
     14 #include "./vpx_dsp_rtcd.h"
     15 #include "vp9/common/vp9_blockd.h"
     16 #include "vp9/common/vp9_idct.h"
     17 #include "vpx_dsp/inv_txfm.h"
     18 #include "vpx_ports/mem.h"
     19 
     20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     21                          int tx_type) {
     22   const transform_2d IHT_4[] = {
     23     { idct4_c, idct4_c  },  // DCT_DCT  = 0
     24     { iadst4_c, idct4_c  },   // ADST_DCT = 1
     25     { idct4_c, iadst4_c },    // DCT_ADST = 2
     26     { iadst4_c, iadst4_c }      // ADST_ADST = 3
     27   };
     28 
     29   int i, j;
     30   tran_low_t out[4 * 4];
     31   tran_low_t *outptr = out;
     32   tran_low_t temp_in[4], temp_out[4];
     33 
     34   // inverse transform row vectors
     35   for (i = 0; i < 4; ++i) {
     36     IHT_4[tx_type].rows(input, outptr);
     37     input  += 4;
     38     outptr += 4;
     39   }
     40 
     41   // inverse transform column vectors
     42   for (i = 0; i < 4; ++i) {
     43     for (j = 0; j < 4; ++j)
     44       temp_in[j] = out[j * 4 + i];
     45     IHT_4[tx_type].cols(temp_in, temp_out);
     46     for (j = 0; j < 4; ++j) {
     47       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
     48                                             ROUND_POWER_OF_TWO(temp_out[j], 4));
     49     }
     50   }
     51 }
     52 
     53 static const transform_2d IHT_8[] = {
     54   { idct8_c,  idct8_c  },  // DCT_DCT  = 0
     55   { iadst8_c, idct8_c  },  // ADST_DCT = 1
     56   { idct8_c,  iadst8_c },  // DCT_ADST = 2
     57   { iadst8_c, iadst8_c }   // ADST_ADST = 3
     58 };
     59 
     60 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     61                          int tx_type) {
     62   int i, j;
     63   tran_low_t out[8 * 8];
     64   tran_low_t *outptr = out;
     65   tran_low_t temp_in[8], temp_out[8];
     66   const transform_2d ht = IHT_8[tx_type];
     67 
     68   // inverse transform row vectors
     69   for (i = 0; i < 8; ++i) {
     70     ht.rows(input, outptr);
     71     input += 8;
     72     outptr += 8;
     73   }
     74 
     75   // inverse transform column vectors
     76   for (i = 0; i < 8; ++i) {
     77     for (j = 0; j < 8; ++j)
     78       temp_in[j] = out[j * 8 + i];
     79     ht.cols(temp_in, temp_out);
     80     for (j = 0; j < 8; ++j) {
     81       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
     82                                             ROUND_POWER_OF_TWO(temp_out[j], 5));
     83     }
     84   }
     85 }
     86 
     87 static const transform_2d IHT_16[] = {
     88   { idct16_c,  idct16_c  },  // DCT_DCT  = 0
     89   { iadst16_c, idct16_c  },  // ADST_DCT = 1
     90   { idct16_c,  iadst16_c },  // DCT_ADST = 2
     91   { iadst16_c, iadst16_c }   // ADST_ADST = 3
     92 };
     93 
     94 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     95                             int tx_type) {
     96   int i, j;
     97   tran_low_t out[16 * 16];
     98   tran_low_t *outptr = out;
     99   tran_low_t temp_in[16], temp_out[16];
    100   const transform_2d ht = IHT_16[tx_type];
    101 
    102   // Rows
    103   for (i = 0; i < 16; ++i) {
    104     ht.rows(input, outptr);
    105     input += 16;
    106     outptr += 16;
    107   }
    108 
    109   // Columns
    110   for (i = 0; i < 16; ++i) {
    111     for (j = 0; j < 16; ++j)
    112       temp_in[j] = out[j * 16 + i];
    113     ht.cols(temp_in, temp_out);
    114     for (j = 0; j < 16; ++j) {
    115       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
    116                                             ROUND_POWER_OF_TWO(temp_out[j], 6));
    117     }
    118   }
    119 }
    120 
    121 // idct
    122 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    123                      int eob) {
    124   if (eob > 1)
    125     vpx_idct4x4_16_add(input, dest, stride);
    126   else
    127     vpx_idct4x4_1_add(input, dest, stride);
    128 }
    129 
    130 
    131 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    132                      int eob) {
    133   if (eob > 1)
    134     vpx_iwht4x4_16_add(input, dest, stride);
    135   else
    136     vpx_iwht4x4_1_add(input, dest, stride);
    137 }
    138 
    139 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
    140                      int eob) {
    141   // If dc is 1, then input[0] is the reconstructed value, do not need
    142   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
    143 
    144   // The calculation can be simplified if there are not many non-zero dct
    145   // coefficients. Use eobs to decide what to do.
    146   // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
    147   // Combine that with code here.
    148   if (eob == 1)
    149     // DC only DCT coefficient
    150     vpx_idct8x8_1_add(input, dest, stride);
    151   else if (eob <= 12)
    152     vpx_idct8x8_12_add(input, dest, stride);
    153   else
    154     vpx_idct8x8_64_add(input, dest, stride);
    155 }
    156 
    157 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
    158                        int eob) {
    159   /* The calculation can be simplified if there are not many non-zero dct
    160    * coefficients. Use eobs to separate different cases. */
    161   if (eob == 1)
    162     /* DC only DCT coefficient. */
    163     vpx_idct16x16_1_add(input, dest, stride);
    164   else if (eob <= 10)
    165     vpx_idct16x16_10_add(input, dest, stride);
    166   else
    167     vpx_idct16x16_256_add(input, dest, stride);
    168 }
    169 
    170 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
    171                        int eob) {
    172   if (eob == 1)
    173     vpx_idct32x32_1_add(input, dest, stride);
    174   else if (eob <= 34)
    175     // non-zero coeff only in upper-left 8x8
    176     vpx_idct32x32_34_add(input, dest, stride);
    177   else
    178     vpx_idct32x32_1024_add(input, dest, stride);
    179 }
    180 
    181 // iht
    182 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
    183                     int stride, int eob) {
    184   if (tx_type == DCT_DCT)
    185     vp9_idct4x4_add(input, dest, stride, eob);
    186   else
    187     vp9_iht4x4_16_add(input, dest, stride, tx_type);
    188 }
    189 
    190 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
    191                     int stride, int eob) {
    192   if (tx_type == DCT_DCT) {
    193     vp9_idct8x8_add(input, dest, stride, eob);
    194   } else {
    195     vp9_iht8x8_64_add(input, dest, stride, tx_type);
    196   }
    197 }
    198 
    199 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
    200                       int stride, int eob) {
    201   if (tx_type == DCT_DCT) {
    202     vp9_idct16x16_add(input, dest, stride, eob);
    203   } else {
    204     vp9_iht16x16_256_add(input, dest, stride, tx_type);
    205   }
    206 }
    207 
    208 #if CONFIG_VP9_HIGHBITDEPTH
    209 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8,
    210                                 int stride, int tx_type, int bd) {
    211   const highbd_transform_2d IHT_4[] = {
    212     { vpx_highbd_idct4_c, vpx_highbd_idct4_c  },    // DCT_DCT  = 0
    213     { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },    // ADST_DCT = 1
    214     { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },    // DCT_ADST = 2
    215     { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }    // ADST_ADST = 3
    216   };
    217   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    218 
    219   int i, j;
    220   tran_low_t out[4 * 4];
    221   tran_low_t *outptr = out;
    222   tran_low_t temp_in[4], temp_out[4];
    223 
    224   // Inverse transform row vectors.
    225   for (i = 0; i < 4; ++i) {
    226     IHT_4[tx_type].rows(input, outptr, bd);
    227     input  += 4;
    228     outptr += 4;
    229   }
    230 
    231   // Inverse transform column vectors.
    232   for (i = 0; i < 4; ++i) {
    233     for (j = 0; j < 4; ++j)
    234       temp_in[j] = out[j * 4 + i];
    235     IHT_4[tx_type].cols(temp_in, temp_out, bd);
    236     for (j = 0; j < 4; ++j) {
    237       dest[j * stride + i] = highbd_clip_pixel_add(
    238           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
    239     }
    240   }
    241 }
    242 
    243 static const highbd_transform_2d HIGH_IHT_8[] = {
    244   { vpx_highbd_idct8_c,  vpx_highbd_idct8_c  },  // DCT_DCT  = 0
    245   { vpx_highbd_iadst8_c, vpx_highbd_idct8_c  },  // ADST_DCT = 1
    246   { vpx_highbd_idct8_c,  vpx_highbd_iadst8_c },  // DCT_ADST = 2
    247   { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }   // ADST_ADST = 3
    248 };
    249 
    250 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8,
    251                                 int stride, int tx_type, int bd) {
    252   int i, j;
    253   tran_low_t out[8 * 8];
    254   tran_low_t *outptr = out;
    255   tran_low_t temp_in[8], temp_out[8];
    256   const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
    257   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    258 
    259   // Inverse transform row vectors.
    260   for (i = 0; i < 8; ++i) {
    261     ht.rows(input, outptr, bd);
    262     input += 8;
    263     outptr += 8;
    264   }
    265 
    266   // Inverse transform column vectors.
    267   for (i = 0; i < 8; ++i) {
    268     for (j = 0; j < 8; ++j)
    269       temp_in[j] = out[j * 8 + i];
    270     ht.cols(temp_in, temp_out, bd);
    271     for (j = 0; j < 8; ++j) {
    272       dest[j * stride + i] = highbd_clip_pixel_add(
    273           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
    274     }
    275   }
    276 }
    277 
    278 static const highbd_transform_2d HIGH_IHT_16[] = {
    279   { vpx_highbd_idct16_c,  vpx_highbd_idct16_c  },  // DCT_DCT  = 0
    280   { vpx_highbd_iadst16_c, vpx_highbd_idct16_c  },  // ADST_DCT = 1
    281   { vpx_highbd_idct16_c,  vpx_highbd_iadst16_c },  // DCT_ADST = 2
    282   { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }   // ADST_ADST = 3
    283 };
    284 
    285 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8,
    286                                    int stride, int tx_type, int bd) {
    287   int i, j;
    288   tran_low_t out[16 * 16];
    289   tran_low_t *outptr = out;
    290   tran_low_t temp_in[16], temp_out[16];
    291   const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
    292   uint16_t *dest = CONVERT_TO_SHORTPTR(dest8);
    293 
    294   // Rows
    295   for (i = 0; i < 16; ++i) {
    296     ht.rows(input, outptr, bd);
    297     input += 16;
    298     outptr += 16;
    299   }
    300 
    301   // Columns
    302   for (i = 0; i < 16; ++i) {
    303     for (j = 0; j < 16; ++j)
    304       temp_in[j] = out[j * 16 + i];
    305     ht.cols(temp_in, temp_out, bd);
    306     for (j = 0; j < 16; ++j) {
    307       dest[j * stride + i] = highbd_clip_pixel_add(
    308           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
    309     }
    310   }
    311 }
    312 
    313 // idct
    314 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    315                             int eob, int bd) {
    316   if (eob > 1)
    317     vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
    318   else
    319     vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
    320 }
    321 
    322 
    323 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    324                             int eob, int bd) {
    325   if (eob > 1)
    326     vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
    327   else
    328     vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
    329 }
    330 
    331 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
    332                             int eob, int bd) {
    333   // If dc is 1, then input[0] is the reconstructed value, do not need
    334   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
    335 
    336   // The calculation can be simplified if there are not many non-zero dct
    337   // coefficients. Use eobs to decide what to do.
    338   // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c.
    339   // Combine that with code here.
    340   // DC only DCT coefficient
    341   if (eob == 1) {
    342     vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
    343   } else if (eob <= 10) {
    344     vpx_highbd_idct8x8_10_add(input, dest, stride, bd);
    345   } else {
    346     vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
    347   }
    348 }
    349 
    350 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest,
    351                               int stride, int eob, int bd) {
    352   // The calculation can be simplified if there are not many non-zero dct
    353   // coefficients. Use eobs to separate different cases.
    354   // DC only DCT coefficient.
    355   if (eob == 1) {
    356     vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
    357   } else if (eob <= 10) {
    358     vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
    359   } else {
    360     vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
    361   }
    362 }
    363 
    364 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest,
    365                               int stride, int eob, int bd) {
    366   // Non-zero coeff only in upper-left 8x8
    367   if (eob == 1) {
    368     vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
    369   } else if (eob <= 34) {
    370     vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
    371   } else {
    372     vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
    373   }
    374 }
    375 
    376 // iht
    377 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
    378                            uint8_t *dest, int stride, int eob, int bd) {
    379   if (tx_type == DCT_DCT)
    380     vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
    381   else
    382     vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
    383 }
    384 
    385 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
    386                            uint8_t *dest, int stride, int eob, int bd) {
    387   if (tx_type == DCT_DCT) {
    388     vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
    389   } else {
    390     vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
    391   }
    392 }
    393 
    394 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
    395                            uint8_t *dest, int stride, int eob, int bd) {
    396   if (tx_type == DCT_DCT) {
    397     vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
    398   } else {
    399     vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
    400   }
    401 }
    402 #endif  // CONFIG_VP9_HIGHBITDEPTH
    403