1 /* 2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved 3 * 4 * This source code is subject to the terms of the BSD 2 Clause License and 5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License 6 * was not distributed with this source code in the LICENSE file, you can 7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open 8 * Media Patent License 1.0 was not distributed with this source code in the 9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent. 10 */ 11 12 #include <math.h> 13 14 #include "config/aom_dsp_rtcd.h" 15 #include "config/av1_rtcd.h" 16 17 #include "aom_ports/mem.h" 18 #include "av1/common/av1_inv_txfm1d_cfg.h" 19 #include "av1/common/av1_txfm.h" 20 #include "av1/common/blockd.h" 21 #include "av1/common/enums.h" 22 #include "av1/common/idct.h" 23 24 int av1_get_tx_scale(const TX_SIZE tx_size) { 25 const int pels = tx_size_2d[tx_size]; 26 // Largest possible pels is 4096 (64x64). 27 return (pels > 256) + (pels > 1024); 28 } 29 30 // NOTE: The implementation of all inverses need to be aware of the fact 31 // that input and output could be the same buffer. 32 33 // idct 34 void av1_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, 35 int eob, int bd) { 36 if (eob > 1) 37 av1_highbd_iwht4x4_16_add(input, dest, stride, bd); 38 else 39 av1_highbd_iwht4x4_1_add(input, dest, stride, bd); 40 } 41 42 void av1_highbd_inv_txfm_add_4x4_c(const tran_low_t *input, uint8_t *dest, 43 int stride, const TxfmParam *txfm_param) { 44 assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); 45 int eob = txfm_param->eob; 46 int bd = txfm_param->bd; 47 int lossless = txfm_param->lossless; 48 const int32_t *src = cast_to_int32(input); 49 const TX_TYPE tx_type = txfm_param->tx_type; 50 if (lossless) { 51 assert(tx_type == DCT_DCT); 52 av1_highbd_iwht4x4_add(input, dest, stride, eob, bd); 53 return; 54 } 55 56 av1_inv_txfm2d_add_4x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd); 57 } 58 59 void av1_highbd_inv_txfm_add_4x8_c(const tran_low_t *input, uint8_t *dest, 60 int stride, const TxfmParam *txfm_param) { 61 assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); 62 const int32_t *src = cast_to_int32(input); 63 av1_inv_txfm2d_add_4x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, 64 txfm_param->tx_type, txfm_param->bd); 65 } 66 67 void av1_highbd_inv_txfm_add_8x4_c(const tran_low_t *input, uint8_t *dest, 68 int stride, const TxfmParam *txfm_param) { 69 assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); 70 const int32_t *src = cast_to_int32(input); 71 av1_inv_txfm2d_add_8x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, 72 txfm_param->tx_type, txfm_param->bd); 73 } 74 75 void av1_highbd_inv_txfm_add_16x32_c(const tran_low_t *input, uint8_t *dest, 76 int stride, const TxfmParam *txfm_param) { 77 const int32_t *src = cast_to_int32(input); 78 av1_inv_txfm2d_add_16x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, 79 txfm_param->tx_type, txfm_param->bd); 80 } 81 82 void av1_highbd_inv_txfm_add_32x16_c(const tran_low_t *input, uint8_t *dest, 83 int stride, const TxfmParam *txfm_param) { 84 const int32_t *src = cast_to_int32(input); 85 av1_inv_txfm2d_add_32x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, 86 txfm_param->tx_type, txfm_param->bd); 87 } 88 89 void av1_highbd_inv_txfm_add_16x4_c(const tran_low_t *input, uint8_t *dest, 90 int stride, const TxfmParam *txfm_param) { 91 const int32_t *src = cast_to_int32(input); 92 av1_inv_txfm2d_add_16x4_c(src, CONVERT_TO_SHORTPTR(dest), stride, 93 txfm_param->tx_type, txfm_param->bd); 94 } 95 96 void av1_highbd_inv_txfm_add_4x16_c(const tran_low_t *input, uint8_t *dest, 97 int stride, const TxfmParam *txfm_param) { 98 const int32_t *src = cast_to_int32(input); 99 av1_inv_txfm2d_add_4x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, 100 txfm_param->tx_type, txfm_param->bd); 101 } 102 103 void av1_highbd_inv_txfm_add_32x8_c(const tran_low_t *input, uint8_t *dest, 104 int stride, const TxfmParam *txfm_param) { 105 const int32_t *src = cast_to_int32(input); 106 av1_inv_txfm2d_add_32x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, 107 txfm_param->tx_type, txfm_param->bd); 108 } 109 110 void av1_highbd_inv_txfm_add_8x32_c(const tran_low_t *input, uint8_t *dest, 111 int stride, const TxfmParam *txfm_param) { 112 const int32_t *src = cast_to_int32(input); 113 av1_inv_txfm2d_add_8x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, 114 txfm_param->tx_type, txfm_param->bd); 115 } 116 117 void av1_highbd_inv_txfm_add_32x64_c(const tran_low_t *input, uint8_t *dest, 118 int stride, const TxfmParam *txfm_param) { 119 const int32_t *src = cast_to_int32(input); 120 av1_inv_txfm2d_add_32x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, 121 txfm_param->tx_type, txfm_param->bd); 122 } 123 124 void av1_highbd_inv_txfm_add_64x32_c(const tran_low_t *input, uint8_t *dest, 125 int stride, const TxfmParam *txfm_param) { 126 const int32_t *src = cast_to_int32(input); 127 av1_inv_txfm2d_add_64x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, 128 txfm_param->tx_type, txfm_param->bd); 129 } 130 131 void av1_highbd_inv_txfm_add_16x64_c(const tran_low_t *input, uint8_t *dest, 132 int stride, const TxfmParam *txfm_param) { 133 const int32_t *src = cast_to_int32(input); 134 av1_inv_txfm2d_add_16x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, 135 txfm_param->tx_type, txfm_param->bd); 136 } 137 138 void av1_highbd_inv_txfm_add_64x16_c(const tran_low_t *input, uint8_t *dest, 139 int stride, const TxfmParam *txfm_param) { 140 const int32_t *src = cast_to_int32(input); 141 av1_inv_txfm2d_add_64x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, 142 txfm_param->tx_type, txfm_param->bd); 143 } 144 145 void av1_highbd_inv_txfm_add_8x8_c(const tran_low_t *input, uint8_t *dest, 146 int stride, const TxfmParam *txfm_param) { 147 int bd = txfm_param->bd; 148 const TX_TYPE tx_type = txfm_param->tx_type; 149 const int32_t *src = cast_to_int32(input); 150 151 av1_inv_txfm2d_add_8x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, bd); 152 } 153 154 void av1_highbd_inv_txfm_add_16x16_c(const tran_low_t *input, uint8_t *dest, 155 int stride, const TxfmParam *txfm_param) { 156 int bd = txfm_param->bd; 157 const TX_TYPE tx_type = txfm_param->tx_type; 158 const int32_t *src = cast_to_int32(input); 159 160 av1_inv_txfm2d_add_16x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, 161 bd); 162 } 163 164 void av1_highbd_inv_txfm_add_8x16_c(const tran_low_t *input, uint8_t *dest, 165 int stride, const TxfmParam *txfm_param) { 166 const int32_t *src = cast_to_int32(input); 167 av1_inv_txfm2d_add_8x16_c(src, CONVERT_TO_SHORTPTR(dest), stride, 168 txfm_param->tx_type, txfm_param->bd); 169 } 170 171 void av1_highbd_inv_txfm_add_16x8_c(const tran_low_t *input, uint8_t *dest, 172 int stride, const TxfmParam *txfm_param) { 173 const int32_t *src = cast_to_int32(input); 174 av1_inv_txfm2d_add_16x8_c(src, CONVERT_TO_SHORTPTR(dest), stride, 175 txfm_param->tx_type, txfm_param->bd); 176 } 177 178 void av1_highbd_inv_txfm_add_32x32_c(const tran_low_t *input, uint8_t *dest, 179 int stride, const TxfmParam *txfm_param) { 180 const int bd = txfm_param->bd; 181 const TX_TYPE tx_type = txfm_param->tx_type; 182 const int32_t *src = cast_to_int32(input); 183 184 av1_inv_txfm2d_add_32x32_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, 185 bd); 186 } 187 188 void av1_highbd_inv_txfm_add_64x64_c(const tran_low_t *input, uint8_t *dest, 189 int stride, const TxfmParam *txfm_param) { 190 const int bd = txfm_param->bd; 191 const TX_TYPE tx_type = txfm_param->tx_type; 192 const int32_t *src = cast_to_int32(input); 193 assert(tx_type == DCT_DCT); 194 av1_inv_txfm2d_add_64x64_c(src, CONVERT_TO_SHORTPTR(dest), stride, tx_type, 195 bd); 196 } 197 198 static void init_txfm_param(const MACROBLOCKD *xd, int plane, TX_SIZE tx_size, 199 TX_TYPE tx_type, int eob, int reduced_tx_set, 200 TxfmParam *txfm_param) { 201 (void)plane; 202 txfm_param->tx_type = tx_type; 203 txfm_param->tx_size = tx_size; 204 txfm_param->eob = eob; 205 txfm_param->lossless = xd->lossless[xd->mi[0]->segment_id]; 206 txfm_param->bd = xd->bd; 207 txfm_param->is_hbd = is_cur_buf_hbd(xd); 208 txfm_param->tx_set_type = av1_get_ext_tx_set_type( 209 txfm_param->tx_size, is_inter_block(xd->mi[0]), reduced_tx_set); 210 } 211 212 void av1_highbd_inv_txfm_add_c(const tran_low_t *input, uint8_t *dest, 213 int stride, const TxfmParam *txfm_param) { 214 assert(av1_ext_tx_used[txfm_param->tx_set_type][txfm_param->tx_type]); 215 const TX_SIZE tx_size = txfm_param->tx_size; 216 switch (tx_size) { 217 case TX_32X32: 218 av1_highbd_inv_txfm_add_32x32_c(input, dest, stride, txfm_param); 219 break; 220 case TX_16X16: 221 av1_highbd_inv_txfm_add_16x16_c(input, dest, stride, txfm_param); 222 break; 223 case TX_8X8: 224 av1_highbd_inv_txfm_add_8x8_c(input, dest, stride, txfm_param); 225 break; 226 case TX_4X8: 227 av1_highbd_inv_txfm_add_4x8_c(input, dest, stride, txfm_param); 228 break; 229 case TX_8X4: 230 av1_highbd_inv_txfm_add_8x4_c(input, dest, stride, txfm_param); 231 break; 232 case TX_8X16: 233 av1_highbd_inv_txfm_add_8x16_c(input, dest, stride, txfm_param); 234 break; 235 case TX_16X8: 236 av1_highbd_inv_txfm_add_16x8_c(input, dest, stride, txfm_param); 237 break; 238 case TX_16X32: 239 av1_highbd_inv_txfm_add_16x32_c(input, dest, stride, txfm_param); 240 break; 241 case TX_32X16: 242 av1_highbd_inv_txfm_add_32x16_c(input, dest, stride, txfm_param); 243 break; 244 case TX_64X64: 245 av1_highbd_inv_txfm_add_64x64_c(input, dest, stride, txfm_param); 246 break; 247 case TX_32X64: 248 av1_highbd_inv_txfm_add_32x64_c(input, dest, stride, txfm_param); 249 break; 250 case TX_64X32: 251 av1_highbd_inv_txfm_add_64x32_c(input, dest, stride, txfm_param); 252 break; 253 case TX_16X64: 254 av1_highbd_inv_txfm_add_16x64_c(input, dest, stride, txfm_param); 255 break; 256 case TX_64X16: 257 av1_highbd_inv_txfm_add_64x16_c(input, dest, stride, txfm_param); 258 break; 259 case TX_4X4: 260 // this is like av1_short_idct4x4 but has a special case around eob<=1 261 // which is significant (not just an optimization) for the lossless 262 // case. 263 av1_highbd_inv_txfm_add_4x4_c(input, dest, stride, txfm_param); 264 break; 265 case TX_16X4: 266 av1_highbd_inv_txfm_add_16x4_c(input, dest, stride, txfm_param); 267 break; 268 case TX_4X16: 269 av1_highbd_inv_txfm_add_4x16_c(input, dest, stride, txfm_param); 270 break; 271 case TX_8X32: 272 av1_highbd_inv_txfm_add_8x32_c(input, dest, stride, txfm_param); 273 break; 274 case TX_32X8: 275 av1_highbd_inv_txfm_add_32x8_c(input, dest, stride, txfm_param); 276 break; 277 default: assert(0 && "Invalid transform size"); break; 278 } 279 } 280 281 void av1_inv_txfm_add_c(const tran_low_t *dqcoeff, uint8_t *dst, int stride, 282 const TxfmParam *txfm_param) { 283 const TX_SIZE tx_size = txfm_param->tx_size; 284 DECLARE_ALIGNED(32, uint16_t, tmp[MAX_TX_SQUARE]); 285 int tmp_stride = MAX_TX_SIZE; 286 int w = tx_size_wide[tx_size]; 287 int h = tx_size_high[tx_size]; 288 for (int r = 0; r < h; ++r) { 289 for (int c = 0; c < w; ++c) { 290 tmp[r * tmp_stride + c] = dst[r * stride + c]; 291 } 292 } 293 294 av1_highbd_inv_txfm_add(dqcoeff, CONVERT_TO_BYTEPTR(tmp), tmp_stride, 295 txfm_param); 296 297 for (int r = 0; r < h; ++r) { 298 for (int c = 0; c < w; ++c) { 299 dst[r * stride + c] = (uint8_t)tmp[r * tmp_stride + c]; 300 } 301 } 302 } 303 304 void av1_inverse_transform_block(const MACROBLOCKD *xd, 305 const tran_low_t *dqcoeff, int plane, 306 TX_TYPE tx_type, TX_SIZE tx_size, uint8_t *dst, 307 int stride, int eob, int reduced_tx_set) { 308 if (!eob) return; 309 310 assert(eob <= av1_get_max_eob(tx_size)); 311 312 TxfmParam txfm_param; 313 init_txfm_param(xd, plane, tx_size, tx_type, eob, reduced_tx_set, 314 &txfm_param); 315 assert(av1_ext_tx_used[txfm_param.tx_set_type][txfm_param.tx_type]); 316 317 if (txfm_param.is_hbd) { 318 av1_highbd_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); 319 } else { 320 av1_inv_txfm_add(dqcoeff, dst, stride, &txfm_param); 321 } 322 } 323