1 /* 2 * Copyright (c) 2010 The WebM project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include <math.h> 12 13 #include "./vp9_rtcd.h" 14 #include "./vpx_dsp_rtcd.h" 15 #include "vp9/common/vp9_blockd.h" 16 #include "vp9/common/vp9_idct.h" 17 #include "vpx_dsp/inv_txfm.h" 18 #include "vpx_ports/mem.h" 19 20 void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, 21 int tx_type) { 22 const transform_2d IHT_4[] = { 23 { idct4_c, idct4_c }, // DCT_DCT = 0 24 { iadst4_c, idct4_c }, // ADST_DCT = 1 25 { idct4_c, iadst4_c }, // DCT_ADST = 2 26 { iadst4_c, iadst4_c } // ADST_ADST = 3 27 }; 28 29 int i, j; 30 tran_low_t out[4 * 4]; 31 tran_low_t *outptr = out; 32 tran_low_t temp_in[4], temp_out[4]; 33 34 // inverse transform row vectors 35 for (i = 0; i < 4; ++i) { 36 IHT_4[tx_type].rows(input, outptr); 37 input += 4; 38 outptr += 4; 39 } 40 41 // inverse transform column vectors 42 for (i = 0; i < 4; ++i) { 43 for (j = 0; j < 4; ++j) 44 temp_in[j] = out[j * 4 + i]; 45 IHT_4[tx_type].cols(temp_in, temp_out); 46 for (j = 0; j < 4; ++j) { 47 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 48 ROUND_POWER_OF_TWO(temp_out[j], 4)); 49 } 50 } 51 } 52 53 static const transform_2d IHT_8[] = { 54 { idct8_c, idct8_c }, // DCT_DCT = 0 55 { iadst8_c, idct8_c }, // ADST_DCT = 1 56 { idct8_c, iadst8_c }, // DCT_ADST = 2 57 { iadst8_c, iadst8_c } // ADST_ADST = 3 58 }; 59 60 void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, 61 int tx_type) { 62 int i, j; 63 tran_low_t out[8 * 8]; 64 tran_low_t *outptr = out; 65 tran_low_t temp_in[8], temp_out[8]; 66 const transform_2d ht = IHT_8[tx_type]; 67 68 // inverse transform row vectors 69 for (i = 0; i < 8; ++i) { 70 ht.rows(input, outptr); 71 input += 8; 72 outptr += 8; 73 } 74 75 // inverse transform column vectors 76 for (i = 0; i < 8; ++i) { 77 for (j = 0; j < 8; ++j) 78 temp_in[j] = out[j * 8 + i]; 79 ht.cols(temp_in, temp_out); 80 for (j = 0; j < 8; ++j) { 81 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 82 ROUND_POWER_OF_TWO(temp_out[j], 5)); 83 } 84 } 85 } 86 87 static const transform_2d IHT_16[] = { 88 { idct16_c, idct16_c }, // DCT_DCT = 0 89 { iadst16_c, idct16_c }, // ADST_DCT = 1 90 { idct16_c, iadst16_c }, // DCT_ADST = 2 91 { iadst16_c, iadst16_c } // ADST_ADST = 3 92 }; 93 94 void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, 95 int tx_type) { 96 int i, j; 97 tran_low_t out[16 * 16]; 98 tran_low_t *outptr = out; 99 tran_low_t temp_in[16], temp_out[16]; 100 const transform_2d ht = IHT_16[tx_type]; 101 102 // Rows 103 for (i = 0; i < 16; ++i) { 104 ht.rows(input, outptr); 105 input += 16; 106 outptr += 16; 107 } 108 109 // Columns 110 for (i = 0; i < 16; ++i) { 111 for (j = 0; j < 16; ++j) 112 temp_in[j] = out[j * 16 + i]; 113 ht.cols(temp_in, temp_out); 114 for (j = 0; j < 16; ++j) { 115 dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], 116 ROUND_POWER_OF_TWO(temp_out[j], 6)); 117 } 118 } 119 } 120 121 // idct 122 void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, 123 int eob) { 124 if (eob > 1) 125 vpx_idct4x4_16_add(input, dest, stride); 126 else 127 vpx_idct4x4_1_add(input, dest, stride); 128 } 129 130 131 void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, 132 int eob) { 133 if (eob > 1) 134 vpx_iwht4x4_16_add(input, dest, stride); 135 else 136 vpx_iwht4x4_1_add(input, dest, stride); 137 } 138 139 void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, 140 int eob) { 141 // If dc is 1, then input[0] is the reconstructed value, do not need 142 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. 143 144 // The calculation can be simplified if there are not many non-zero dct 145 // coefficients. Use eobs to decide what to do. 146 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. 147 // Combine that with code here. 148 if (eob == 1) 149 // DC only DCT coefficient 150 vpx_idct8x8_1_add(input, dest, stride); 151 else if (eob <= 12) 152 vpx_idct8x8_12_add(input, dest, stride); 153 else 154 vpx_idct8x8_64_add(input, dest, stride); 155 } 156 157 void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, 158 int eob) { 159 /* The calculation can be simplified if there are not many non-zero dct 160 * coefficients. Use eobs to separate different cases. */ 161 if (eob == 1) 162 /* DC only DCT coefficient. */ 163 vpx_idct16x16_1_add(input, dest, stride); 164 else if (eob <= 10) 165 vpx_idct16x16_10_add(input, dest, stride); 166 else 167 vpx_idct16x16_256_add(input, dest, stride); 168 } 169 170 void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, 171 int eob) { 172 if (eob == 1) 173 vpx_idct32x32_1_add(input, dest, stride); 174 else if (eob <= 34) 175 // non-zero coeff only in upper-left 8x8 176 vpx_idct32x32_34_add(input, dest, stride); 177 else 178 vpx_idct32x32_1024_add(input, dest, stride); 179 } 180 181 // iht 182 void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, 183 int stride, int eob) { 184 if (tx_type == DCT_DCT) 185 vp9_idct4x4_add(input, dest, stride, eob); 186 else 187 vp9_iht4x4_16_add(input, dest, stride, tx_type); 188 } 189 190 void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, 191 int stride, int eob) { 192 if (tx_type == DCT_DCT) { 193 vp9_idct8x8_add(input, dest, stride, eob); 194 } else { 195 vp9_iht8x8_64_add(input, dest, stride, tx_type); 196 } 197 } 198 199 void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, 200 int stride, int eob) { 201 if (tx_type == DCT_DCT) { 202 vp9_idct16x16_add(input, dest, stride, eob); 203 } else { 204 vp9_iht16x16_256_add(input, dest, stride, tx_type); 205 } 206 } 207 208 #if CONFIG_VP9_HIGHBITDEPTH 209 void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, 210 int stride, int tx_type, int bd) { 211 const highbd_transform_2d IHT_4[] = { 212 { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 213 { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 214 { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 215 { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 216 }; 217 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 218 219 int i, j; 220 tran_low_t out[4 * 4]; 221 tran_low_t *outptr = out; 222 tran_low_t temp_in[4], temp_out[4]; 223 224 // Inverse transform row vectors. 225 for (i = 0; i < 4; ++i) { 226 IHT_4[tx_type].rows(input, outptr, bd); 227 input += 4; 228 outptr += 4; 229 } 230 231 // Inverse transform column vectors. 232 for (i = 0; i < 4; ++i) { 233 for (j = 0; j < 4; ++j) 234 temp_in[j] = out[j * 4 + i]; 235 IHT_4[tx_type].cols(temp_in, temp_out, bd); 236 for (j = 0; j < 4; ++j) { 237 dest[j * stride + i] = highbd_clip_pixel_add( 238 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); 239 } 240 } 241 } 242 243 static const highbd_transform_2d HIGH_IHT_8[] = { 244 { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 245 { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 246 { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 247 { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 248 }; 249 250 void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, 251 int stride, int tx_type, int bd) { 252 int i, j; 253 tran_low_t out[8 * 8]; 254 tran_low_t *outptr = out; 255 tran_low_t temp_in[8], temp_out[8]; 256 const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; 257 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 258 259 // Inverse transform row vectors. 260 for (i = 0; i < 8; ++i) { 261 ht.rows(input, outptr, bd); 262 input += 8; 263 outptr += 8; 264 } 265 266 // Inverse transform column vectors. 267 for (i = 0; i < 8; ++i) { 268 for (j = 0; j < 8; ++j) 269 temp_in[j] = out[j * 8 + i]; 270 ht.cols(temp_in, temp_out, bd); 271 for (j = 0; j < 8; ++j) { 272 dest[j * stride + i] = highbd_clip_pixel_add( 273 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); 274 } 275 } 276 } 277 278 static const highbd_transform_2d HIGH_IHT_16[] = { 279 { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 280 { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 281 { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 282 { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 283 }; 284 285 void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, 286 int stride, int tx_type, int bd) { 287 int i, j; 288 tran_low_t out[16 * 16]; 289 tran_low_t *outptr = out; 290 tran_low_t temp_in[16], temp_out[16]; 291 const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; 292 uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); 293 294 // Rows 295 for (i = 0; i < 16; ++i) { 296 ht.rows(input, outptr, bd); 297 input += 16; 298 outptr += 16; 299 } 300 301 // Columns 302 for (i = 0; i < 16; ++i) { 303 for (j = 0; j < 16; ++j) 304 temp_in[j] = out[j * 16 + i]; 305 ht.cols(temp_in, temp_out, bd); 306 for (j = 0; j < 16; ++j) { 307 dest[j * stride + i] = highbd_clip_pixel_add( 308 dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); 309 } 310 } 311 } 312 313 // idct 314 void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, 315 int eob, int bd) { 316 if (eob > 1) 317 vpx_highbd_idct4x4_16_add(input, dest, stride, bd); 318 else 319 vpx_highbd_idct4x4_1_add(input, dest, stride, bd); 320 } 321 322 323 void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, 324 int eob, int bd) { 325 if (eob > 1) 326 vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); 327 else 328 vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); 329 } 330 331 void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, 332 int eob, int bd) { 333 // If dc is 1, then input[0] is the reconstructed value, do not need 334 // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. 335 336 // The calculation can be simplified if there are not many non-zero dct 337 // coefficients. Use eobs to decide what to do. 338 // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. 339 // Combine that with code here. 340 // DC only DCT coefficient 341 if (eob == 1) { 342 vpx_highbd_idct8x8_1_add(input, dest, stride, bd); 343 } else if (eob <= 10) { 344 vpx_highbd_idct8x8_10_add(input, dest, stride, bd); 345 } else { 346 vpx_highbd_idct8x8_64_add(input, dest, stride, bd); 347 } 348 } 349 350 void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, 351 int stride, int eob, int bd) { 352 // The calculation can be simplified if there are not many non-zero dct 353 // coefficients. Use eobs to separate different cases. 354 // DC only DCT coefficient. 355 if (eob == 1) { 356 vpx_highbd_idct16x16_1_add(input, dest, stride, bd); 357 } else if (eob <= 10) { 358 vpx_highbd_idct16x16_10_add(input, dest, stride, bd); 359 } else { 360 vpx_highbd_idct16x16_256_add(input, dest, stride, bd); 361 } 362 } 363 364 void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, 365 int stride, int eob, int bd) { 366 // Non-zero coeff only in upper-left 8x8 367 if (eob == 1) { 368 vpx_highbd_idct32x32_1_add(input, dest, stride, bd); 369 } else if (eob <= 34) { 370 vpx_highbd_idct32x32_34_add(input, dest, stride, bd); 371 } else { 372 vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); 373 } 374 } 375 376 // iht 377 void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, 378 uint8_t *dest, int stride, int eob, int bd) { 379 if (tx_type == DCT_DCT) 380 vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); 381 else 382 vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); 383 } 384 385 void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, 386 uint8_t *dest, int stride, int eob, int bd) { 387 if (tx_type == DCT_DCT) { 388 vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); 389 } else { 390 vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); 391 } 392 } 393 394 void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, 395 uint8_t *dest, int stride, int eob, int bd) { 396 if (tx_type == DCT_DCT) { 397 vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); 398 } else { 399 vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); 400 } 401 } 402 #endif // CONFIG_VP9_HIGHBITDEPTH 403