1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 13 #include "./vp9_rtcd.h" 14 #include "./vpx_dsp_rtcd.h" 15 #include "vp9/common/vp9_blockd.h" 16 #include "vp9/common/vp9_idct.h" 17 #include "vpx_dsp/inv_txfm.h" 18 #include "vpx_ports/mem.h" 19 20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, 21 int tx_type) { 22 const transform_2d IHT_4[] = { 23 { idct4_c, idct4_c }, // DCT_DCT = 0 24 { iadst4_c, idct4_c }, // ADST_DCT = 1 25 { idct4_c, iadst4_c }, // DCT_ADST = 2 26 { iadst4_c, iadst4_c } // ADST_ADST = 3 27 }; 28 29 int i, j; 30 tran_low_t out[4 * 4]; 31 tran_low_t *outptr = out; 32 tran_low_t temp_in[4], temp_out[4]; 33 34 // inverse transform row vectors 35 for (i = 0; i < 4; ++i) { 36 IHT_4[tx_type].rows(input, outptr); 37 input += 4; 38 outptr += 4; 39 } 40 41 // inverse transform column vectors 42 for (i = 0; i < 4; ++i) { 43 for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; 44 IHT_4[tx_type].cols(temp_in, temp_out); 45 for (j = 0; j < 4; ++j) { 46 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 47 ROUND_POWER_OF_TWO(temp_out[j], 4)); 48 } 49 } 50 } 51 52 static const transform_2d IHT_8[] = { 53 { idct8_c, idct8_c }, // DCT_DCT = 0 54 { iadst8_c, idct8_c }, // ADST_DCT = 1 55 { idct8_c, iadst8_c }, // DCT_ADST = 2 56 { iadst8_c, iadst8_c } // ADST_ADST = 3 57 }; 58 59 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, 60 int tx_type) { 61 int i, j; 62 tran_low_t out[8 * 8]; 63 tran_low_t *outptr = out; 64 tran_low_t temp_in[8], temp_out[8]; 65 const transform_2d ht = IHT_8[tx_type]; 66 67 // inverse transform row vectors 68 for (i = 0; i < 8; ++i) { 69 ht.rows(input, outptr); 70 input += 8; 71 outptr += 8; 72 } 73 74 // inverse transform column vectors 75 for (i = 0; i < 8; ++i) { 76 for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; 77 ht.cols(temp_in, temp_out); 78 for (j = 0; j < 8; ++j) { 79 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 80 ROUND_POWER_OF_TWO(temp_out[j], 5)); 81 } 82 } 83 } 84 85 static const transform_2d IHT_16[] = { 86 { idct16_c, idct16_c }, // DCT_DCT = 0 87 { iadst16_c, idct16_c }, // ADST_DCT = 1 88 { idct16_c, iadst16_c }, // DCT_ADST = 2 89 { iadst16_c, iadst16_c } // ADST_ADST = 3 90 }; 91 92 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, 93 int tx_type) { 94 int i, j; 95 tran_low_t out[16 * 16]; 96 tran_low_t *outptr = out; 97 tran_low_t temp_in[16], temp_out[16]; 98 const transform_2d ht = IHT_16[tx_type]; 99 100 // Rows 101 for (i = 0; i < 16; ++i) { 102 ht.rows(input, outptr); 103 input += 16; 104 outptr += 16; 105 } 106 107 // Columns 108 for (i = 0; i < 16; ++i) { 109 for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; 110 ht.cols(temp_in, temp_out); 111 for (j = 0; j < 16; ++j) { 112 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 113 ROUND_POWER_OF_TWO(temp_out[j], 6)); 114 } 115 } 116 } 117 118 // idct 119 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, 120 int eob) { 121 if (eob > 1) 122 vpx_idct4x4_16_add(input, dest, stride); 123 else 124 vpx_idct4x4_1_add(input, dest, stride); 125 } 126 127 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, 128 int eob) { 129 if (eob > 1) 130 vpx_iwht4x4_16_add(input, dest, stride); 131 else 132 vpx_iwht4x4_1_add(input, dest, stride); 133 } 134 135 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, 136 int eob) { 137 // If dc is 1, then input[0] is the reconstructed value, do not need 138 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. 139 140 // The calculation can be simplified if there are not many non-zero dct 141 // coefficients. Use eobs to decide what to do. 142 if (eob == 1) 143 // DC only DCT coefficient 144 vpx_idct8x8_1_add(input, dest, stride); 145 else if (eob <= 12) 146 vpx_idct8x8_12_add(input, dest, stride); 147 else 148 vpx_idct8x8_64_add(input, dest, stride); 149 } 150 151 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, 152 int eob) { 153 /* The calculation can be simplified if there are not many non-zero dct 154 * coefficients. Use eobs to separate different cases. */ 155 if (eob == 1) /* DC only DCT coefficient. */ 156 vpx_idct16x16_1_add(input, dest, stride); 157 else if (eob <= 10) 158 vpx_idct16x16_10_add(input, dest, stride); 159 else if (eob <= 38) 160 vpx_idct16x16_38_add(input, dest, stride); 161 else 162 vpx_idct16x16_256_add(input, dest, stride); 163 } 164 165 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, 166 int eob) { 167 if (eob == 1) 168 vpx_idct32x32_1_add(input, dest, stride); 169 else if (eob <= 34) 170 // non-zero coeff only in upper-left 8x8 171 vpx_idct32x32_34_add(input, dest, stride); 172 else if (eob <= 135) 173 // non-zero coeff only in upper-left 16x16 174 vpx_idct32x32_135_add(input, dest, stride); 175 else 176 vpx_idct32x32_1024_add(input, dest, stride); 177 } 178 179 // iht 180 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, 181 int stride, int eob) { 182 if (tx_type == DCT_DCT) 183 vp9_idct4x4_add(input, dest, stride, eob); 184 else 185 vp9_iht4x4_16_add(input, dest, stride, tx_type); 186 } 187 188 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, 189 int stride, int eob) { 190 if (tx_type == DCT_DCT) { 191 vp9_idct8x8_add(input, dest, stride, eob); 192 } else { 193 vp9_iht8x8_64_add(input, dest, stride, tx_type); 194 } 195 } 196 197 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, 198 int stride, int eob) { 199 if (tx_type == DCT_DCT) { 200 vp9_idct16x16_add(input, dest, stride, eob); 201 } else { 202 vp9_iht16x16_256_add(input, dest, stride, tx_type); 203 } 204 } 205 206 #if CONFIG_VP9_HIGHBITDEPTH 207 208 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest, 209 int stride, int tx_type, int bd) { 210 const highbd_transform_2d IHT_4[] = { 211 { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 212 { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 213 { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 214 { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 215 }; 216 217 int i, j; 218 tran_low_t out[4 * 4]; 219 tran_low_t *outptr = out; 220 tran_low_t temp_in[4], temp_out[4]; 221 222 // Inverse transform row vectors. 223 for (i = 0; i < 4; ++i) { 224 IHT_4[tx_type].rows(input, outptr, bd); 225 input += 4; 226 outptr += 4; 227 } 228 229 // Inverse transform column vectors. 230 for (i = 0; i < 4; ++i) { 231 for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; 232 IHT_4[tx_type].cols(temp_in, temp_out, bd); 233 for (j = 0; j < 4; ++j) { 234 dest[j * stride + i] = highbd_clip_pixel_add( 235 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); 236 } 237 } 238 } 239 240 static const highbd_transform_2d HIGH_IHT_8[] = { 241 { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 242 { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 243 { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 244 { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 245 }; 246 247 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest, 248 int stride, int tx_type, int bd) { 249 int i, j; 250 tran_low_t out[8 * 8]; 251 tran_low_t *outptr = out; 252 tran_low_t temp_in[8], temp_out[8]; 253 const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; 254 255 // Inverse transform row vectors. 256 for (i = 0; i < 8; ++i) { 257 ht.rows(input, outptr, bd); 258 input += 8; 259 outptr += 8; 260 } 261 262 // Inverse transform column vectors. 263 for (i = 0; i < 8; ++i) { 264 for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; 265 ht.cols(temp_in, temp_out, bd); 266 for (j = 0; j < 8; ++j) { 267 dest[j * stride + i] = highbd_clip_pixel_add( 268 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); 269 } 270 } 271 } 272 273 static const highbd_transform_2d HIGH_IHT_16[] = { 274 { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 275 { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 276 { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 277 { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 278 }; 279 280 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest, 281 int stride, int tx_type, int bd) { 282 int i, j; 283 tran_low_t out[16 * 16]; 284 tran_low_t *outptr = out; 285 tran_low_t temp_in[16], temp_out[16]; 286 const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; 287 288 // Rows 289 for (i = 0; i < 16; ++i) { 290 ht.rows(input, outptr, bd); 291 input += 16; 292 outptr += 16; 293 } 294 295 // Columns 296 for (i = 0; i < 16; ++i) { 297 for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; 298 ht.cols(temp_in, temp_out, bd); 299 for (j = 0; j < 16; ++j) { 300 dest[j * stride + i] = highbd_clip_pixel_add( 301 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); 302 } 303 } 304 } 305 306 // idct 307 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride, 308 int eob, int bd) { 309 if (eob > 1) 310 vpx_highbd_idct4x4_16_add(input, dest, stride, bd); 311 else 312 vpx_highbd_idct4x4_1_add(input, dest, stride, bd); 313 } 314 315 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride, 316 int eob, int bd) { 317 if (eob > 1) 318 vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); 319 else 320 vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); 321 } 322 323 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride, 324 int eob, int bd) { 325 // If dc is 1, then input[0] is the reconstructed value, do not need 326 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. 327 328 // The calculation can be simplified if there are not many non-zero dct 329 // coefficients. Use eobs to decide what to do. 330 // DC only DCT coefficient 331 if (eob == 1) { 332 vpx_highbd_idct8x8_1_add(input, dest, stride, bd); 333 } else if (eob <= 12) { 334 vpx_highbd_idct8x8_12_add(input, dest, stride, bd); 335 } else { 336 vpx_highbd_idct8x8_64_add(input, dest, stride, bd); 337 } 338 } 339 340 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest, 341 int stride, int eob, int bd) { 342 // The calculation can be simplified if there are not many non-zero dct 343 // coefficients. Use eobs to separate different cases. 344 // DC only DCT coefficient. 345 if (eob == 1) { 346 vpx_highbd_idct16x16_1_add(input, dest, stride, bd); 347 } else if (eob <= 10) { 348 vpx_highbd_idct16x16_10_add(input, dest, stride, bd); 349 } else if (eob <= 38) { 350 vpx_highbd_idct16x16_38_add(input, dest, stride, bd); 351 } else { 352 vpx_highbd_idct16x16_256_add(input, dest, stride, bd); 353 } 354 } 355 356 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest, 357 int stride, int eob, int bd) { 358 // Non-zero coeff only in upper-left 8x8 359 if (eob == 1) { 360 vpx_highbd_idct32x32_1_add(input, dest, stride, bd); 361 } else if (eob <= 34) { 362 vpx_highbd_idct32x32_34_add(input, dest, stride, bd); 363 } else if (eob <= 135) { 364 vpx_highbd_idct32x32_135_add(input, dest, stride, bd); 365 } else { 366 vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); 367 } 368 } 369 370 // iht 371 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, 372 uint16_t *dest, int stride, int eob, int bd) { 373 if (tx_type == DCT_DCT) 374 vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); 375 else 376 vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); 377 } 378 379 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, 380 uint16_t *dest, int stride, int eob, int bd) { 381 if (tx_type == DCT_DCT) { 382 vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); 383 } else { 384 vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); 385 } 386 } 387 388 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, 389 uint16_t *dest, int stride, int eob, int bd) { 390 if (tx_type == DCT_DCT) { 391 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); 392 } else { 393 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); 394 } 395 } 396 #endif // CONFIG_VP9_HIGHBITDEPTH 397