Home | History | Annotate | Download | only in common
      1 /*
      2  *  Copyright (c) 2010 The WebM project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 #include <math.h>
     12 
     13 #include "./vp9_rtcd.h"
     14 #include "./vpx_dsp_rtcd.h"
     15 #include "vp9/common/vp9_blockd.h"
     16 #include "vp9/common/vp9_idct.h"
     17 #include "vpx_dsp/inv_txfm.h"
     18 #include "vpx_ports/mem.h"
     19 
     20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     21                          int tx_type) {
     22   const transform_2d IHT_4[] = {
     23     { idct4_c, idct4_c },   // DCT_DCT  = 0
     24     { iadst4_c, idct4_c },  // ADST_DCT = 1
     25     { idct4_c, iadst4_c },  // DCT_ADST = 2
     26     { iadst4_c, iadst4_c }  // ADST_ADST = 3
     27   };
     28 
     29   int i, j;
     30   tran_low_t out[4 * 4];
     31   tran_low_t *outptr = out;
     32   tran_low_t temp_in[4], temp_out[4];
     33 
     34   // inverse transform row vectors
     35   for (i = 0; i < 4; ++i) {
     36     IHT_4[tx_type].rows(input, outptr);
     37     input += 4;
     38     outptr += 4;
     39   }
     40 
     41   // inverse transform column vectors
     42   for (i = 0; i < 4; ++i) {
     43     for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
     44     IHT_4[tx_type].cols(temp_in, temp_out);
     45     for (j = 0; j < 4; ++j) {
     46       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
     47                                             ROUND_POWER_OF_TWO(temp_out[j], 4));
     48     }
     49   }
     50 }
     51 
     52 static const transform_2d IHT_8[] = {
     53   { idct8_c, idct8_c },   // DCT_DCT  = 0
     54   { iadst8_c, idct8_c },  // ADST_DCT = 1
     55   { idct8_c, iadst8_c },  // DCT_ADST = 2
     56   { iadst8_c, iadst8_c }  // ADST_ADST = 3
     57 };
     58 
     59 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     60                          int tx_type) {
     61   int i, j;
     62   tran_low_t out[8 * 8];
     63   tran_low_t *outptr = out;
     64   tran_low_t temp_in[8], temp_out[8];
     65   const transform_2d ht = IHT_8[tx_type];
     66 
     67   // inverse transform row vectors
     68   for (i = 0; i < 8; ++i) {
     69     ht.rows(input, outptr);
     70     input += 8;
     71     outptr += 8;
     72   }
     73 
     74   // inverse transform column vectors
     75   for (i = 0; i < 8; ++i) {
     76     for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
     77     ht.cols(temp_in, temp_out);
     78     for (j = 0; j < 8; ++j) {
     79       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
     80                                             ROUND_POWER_OF_TWO(temp_out[j], 5));
     81     }
     82   }
     83 }
     84 
     85 static const transform_2d IHT_16[] = {
     86   { idct16_c, idct16_c },   // DCT_DCT  = 0
     87   { iadst16_c, idct16_c },  // ADST_DCT = 1
     88   { idct16_c, iadst16_c },  // DCT_ADST = 2
     89   { iadst16_c, iadst16_c }  // ADST_ADST = 3
     90 };
     91 
     92 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride,
     93                             int tx_type) {
     94   int i, j;
     95   tran_low_t out[16 * 16];
     96   tran_low_t *outptr = out;
     97   tran_low_t temp_in[16], temp_out[16];
     98   const transform_2d ht = IHT_16[tx_type];
     99 
    100   // Rows
    101   for (i = 0; i < 16; ++i) {
    102     ht.rows(input, outptr);
    103     input += 16;
    104     outptr += 16;
    105   }
    106 
    107   // Columns
    108   for (i = 0; i < 16; ++i) {
    109     for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
    110     ht.cols(temp_in, temp_out);
    111     for (j = 0; j < 16; ++j) {
    112       dest[j * stride + i] = clip_pixel_add(dest[j * stride + i],
    113                                             ROUND_POWER_OF_TWO(temp_out[j], 6));
    114     }
    115   }
    116 }
    117 
    118 // idct
    119 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    120                      int eob) {
    121   if (eob > 1)
    122     vpx_idct4x4_16_add(input, dest, stride);
    123   else
    124     vpx_idct4x4_1_add(input, dest, stride);
    125 }
    126 
    127 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride,
    128                      int eob) {
    129   if (eob > 1)
    130     vpx_iwht4x4_16_add(input, dest, stride);
    131   else
    132     vpx_iwht4x4_1_add(input, dest, stride);
    133 }
    134 
    135 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride,
    136                      int eob) {
    137   // If dc is 1, then input[0] is the reconstructed value, do not need
    138   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
    139 
    140   // The calculation can be simplified if there are not many non-zero dct
    141   // coefficients. Use eobs to decide what to do.
    142   if (eob == 1)
    143     // DC only DCT coefficient
    144     vpx_idct8x8_1_add(input, dest, stride);
    145   else if (eob <= 12)
    146     vpx_idct8x8_12_add(input, dest, stride);
    147   else
    148     vpx_idct8x8_64_add(input, dest, stride);
    149 }
    150 
    151 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride,
    152                        int eob) {
    153   /* The calculation can be simplified if there are not many non-zero dct
    154    * coefficients. Use eobs to separate different cases. */
    155   if (eob == 1) /* DC only DCT coefficient. */
    156     vpx_idct16x16_1_add(input, dest, stride);
    157   else if (eob <= 10)
    158     vpx_idct16x16_10_add(input, dest, stride);
    159   else if (eob <= 38)
    160     vpx_idct16x16_38_add(input, dest, stride);
    161   else
    162     vpx_idct16x16_256_add(input, dest, stride);
    163 }
    164 
    165 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride,
    166                        int eob) {
    167   if (eob == 1)
    168     vpx_idct32x32_1_add(input, dest, stride);
    169   else if (eob <= 34)
    170     // non-zero coeff only in upper-left 8x8
    171     vpx_idct32x32_34_add(input, dest, stride);
    172   else if (eob <= 135)
    173     // non-zero coeff only in upper-left 16x16
    174     vpx_idct32x32_135_add(input, dest, stride);
    175   else
    176     vpx_idct32x32_1024_add(input, dest, stride);
    177 }
    178 
    179 // iht
    180 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
    181                     int stride, int eob) {
    182   if (tx_type == DCT_DCT)
    183     vp9_idct4x4_add(input, dest, stride, eob);
    184   else
    185     vp9_iht4x4_16_add(input, dest, stride, tx_type);
    186 }
    187 
    188 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
    189                     int stride, int eob) {
    190   if (tx_type == DCT_DCT) {
    191     vp9_idct8x8_add(input, dest, stride, eob);
    192   } else {
    193     vp9_iht8x8_64_add(input, dest, stride, tx_type);
    194   }
    195 }
    196 
    197 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest,
    198                       int stride, int eob) {
    199   if (tx_type == DCT_DCT) {
    200     vp9_idct16x16_add(input, dest, stride, eob);
    201   } else {
    202     vp9_iht16x16_256_add(input, dest, stride, tx_type);
    203   }
    204 }
    205 
    206 #if CONFIG_VP9_HIGHBITDEPTH
    207 
    208 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest,
    209                                 int stride, int tx_type, int bd) {
    210   const highbd_transform_2d IHT_4[] = {
    211     { vpx_highbd_idct4_c, vpx_highbd_idct4_c },   // DCT_DCT  = 0
    212     { vpx_highbd_iadst4_c, vpx_highbd_idct4_c },  // ADST_DCT = 1
    213     { vpx_highbd_idct4_c, vpx_highbd_iadst4_c },  // DCT_ADST = 2
    214     { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c }  // ADST_ADST = 3
    215   };
    216 
    217   int i, j;
    218   tran_low_t out[4 * 4];
    219   tran_low_t *outptr = out;
    220   tran_low_t temp_in[4], temp_out[4];
    221 
    222   // Inverse transform row vectors.
    223   for (i = 0; i < 4; ++i) {
    224     IHT_4[tx_type].rows(input, outptr, bd);
    225     input += 4;
    226     outptr += 4;
    227   }
    228 
    229   // Inverse transform column vectors.
    230   for (i = 0; i < 4; ++i) {
    231     for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i];
    232     IHT_4[tx_type].cols(temp_in, temp_out, bd);
    233     for (j = 0; j < 4; ++j) {
    234       dest[j * stride + i] = highbd_clip_pixel_add(
    235           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd);
    236     }
    237   }
    238 }
    239 
    240 static const highbd_transform_2d HIGH_IHT_8[] = {
    241   { vpx_highbd_idct8_c, vpx_highbd_idct8_c },   // DCT_DCT  = 0
    242   { vpx_highbd_iadst8_c, vpx_highbd_idct8_c },  // ADST_DCT = 1
    243   { vpx_highbd_idct8_c, vpx_highbd_iadst8_c },  // DCT_ADST = 2
    244   { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c }  // ADST_ADST = 3
    245 };
    246 
    247 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest,
    248                                 int stride, int tx_type, int bd) {
    249   int i, j;
    250   tran_low_t out[8 * 8];
    251   tran_low_t *outptr = out;
    252   tran_low_t temp_in[8], temp_out[8];
    253   const highbd_transform_2d ht = HIGH_IHT_8[tx_type];
    254 
    255   // Inverse transform row vectors.
    256   for (i = 0; i < 8; ++i) {
    257     ht.rows(input, outptr, bd);
    258     input += 8;
    259     outptr += 8;
    260   }
    261 
    262   // Inverse transform column vectors.
    263   for (i = 0; i < 8; ++i) {
    264     for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i];
    265     ht.cols(temp_in, temp_out, bd);
    266     for (j = 0; j < 8; ++j) {
    267       dest[j * stride + i] = highbd_clip_pixel_add(
    268           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd);
    269     }
    270   }
    271 }
    272 
    273 static const highbd_transform_2d HIGH_IHT_16[] = {
    274   { vpx_highbd_idct16_c, vpx_highbd_idct16_c },   // DCT_DCT  = 0
    275   { vpx_highbd_iadst16_c, vpx_highbd_idct16_c },  // ADST_DCT = 1
    276   { vpx_highbd_idct16_c, vpx_highbd_iadst16_c },  // DCT_ADST = 2
    277   { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c }  // ADST_ADST = 3
    278 };
    279 
    280 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest,
    281                                    int stride, int tx_type, int bd) {
    282   int i, j;
    283   tran_low_t out[16 * 16];
    284   tran_low_t *outptr = out;
    285   tran_low_t temp_in[16], temp_out[16];
    286   const highbd_transform_2d ht = HIGH_IHT_16[tx_type];
    287 
    288   // Rows
    289   for (i = 0; i < 16; ++i) {
    290     ht.rows(input, outptr, bd);
    291     input += 16;
    292     outptr += 16;
    293   }
    294 
    295   // Columns
    296   for (i = 0; i < 16; ++i) {
    297     for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i];
    298     ht.cols(temp_in, temp_out, bd);
    299     for (j = 0; j < 16; ++j) {
    300       dest[j * stride + i] = highbd_clip_pixel_add(
    301           dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd);
    302     }
    303   }
    304 }
    305 
    306 // idct
    307 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
    308                             int eob, int bd) {
    309   if (eob > 1)
    310     vpx_highbd_idct4x4_16_add(input, dest, stride, bd);
    311   else
    312     vpx_highbd_idct4x4_1_add(input, dest, stride, bd);
    313 }
    314 
    315 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride,
    316                             int eob, int bd) {
    317   if (eob > 1)
    318     vpx_highbd_iwht4x4_16_add(input, dest, stride, bd);
    319   else
    320     vpx_highbd_iwht4x4_1_add(input, dest, stride, bd);
    321 }
    322 
    323 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride,
    324                             int eob, int bd) {
    325   // If dc is 1, then input[0] is the reconstructed value, do not need
    326   // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1.
    327 
    328   // The calculation can be simplified if there are not many non-zero dct
    329   // coefficients. Use eobs to decide what to do.
    330   // DC only DCT coefficient
    331   if (eob == 1) {
    332     vpx_highbd_idct8x8_1_add(input, dest, stride, bd);
    333   } else if (eob <= 12) {
    334     vpx_highbd_idct8x8_12_add(input, dest, stride, bd);
    335   } else {
    336     vpx_highbd_idct8x8_64_add(input, dest, stride, bd);
    337   }
    338 }
    339 
    340 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest,
    341                               int stride, int eob, int bd) {
    342   // The calculation can be simplified if there are not many non-zero dct
    343   // coefficients. Use eobs to separate different cases.
    344   // DC only DCT coefficient.
    345   if (eob == 1) {
    346     vpx_highbd_idct16x16_1_add(input, dest, stride, bd);
    347   } else if (eob <= 10) {
    348     vpx_highbd_idct16x16_10_add(input, dest, stride, bd);
    349   } else if (eob <= 38) {
    350     vpx_highbd_idct16x16_38_add(input, dest, stride, bd);
    351   } else {
    352     vpx_highbd_idct16x16_256_add(input, dest, stride, bd);
    353   }
    354 }
    355 
    356 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest,
    357                               int stride, int eob, int bd) {
    358   // Non-zero coeff only in upper-left 8x8
    359   if (eob == 1) {
    360     vpx_highbd_idct32x32_1_add(input, dest, stride, bd);
    361   } else if (eob <= 34) {
    362     vpx_highbd_idct32x32_34_add(input, dest, stride, bd);
    363   } else if (eob <= 135) {
    364     vpx_highbd_idct32x32_135_add(input, dest, stride, bd);
    365   } else {
    366     vpx_highbd_idct32x32_1024_add(input, dest, stride, bd);
    367   }
    368 }
    369 
    370 // iht
    371 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input,
    372                            uint16_t *dest, int stride, int eob, int bd) {
    373   if (tx_type == DCT_DCT)
    374     vp9_highbd_idct4x4_add(input, dest, stride, eob, bd);
    375   else
    376     vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd);
    377 }
    378 
    379 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input,
    380                            uint16_t *dest, int stride, int eob, int bd) {
    381   if (tx_type == DCT_DCT) {
    382     vp9_highbd_idct8x8_add(input, dest, stride, eob, bd);
    383   } else {
    384     vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd);
    385   }
    386 }
    387 
    388 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input,
    389                              uint16_t *dest, int stride, int eob, int bd) {
    390   if (tx_type == DCT_DCT) {
    391     vp9_highbd_idct16x16_add(input, dest, stride, eob, bd);
    392   } else {
    393     vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd);
    394   }
    395 }
    396 #endif  // CONFIG_VP9_HIGHBITDEPTH
    397