Home | History | Annotate | Download | only in common
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19 *******************************************************************************
     20 * @file
     21 *  ihevc_trans_macros.h
     22 *
     23 * @brief
     24 *  Macros used in the forward transform and inverse transform functions
     25 *
     26 * @author
     27 *  Ittiam
     28 *
     29 * @remarks
     30 *  None
     31 *
     32 *******************************************************************************
     33 */
     34 #ifndef IHEVC_TRANS_MACROS_H_
     35 #define IHEVC_TRANS_MACROS_H_
     36 
     37 #define QUANT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
     38 {                                                                                                                                                                \
     39     LWORD64 tmp;                                                                                                                                                  \
     40     WORD32 sign;                                                                                                                                                 \
     41     WORD32 bit_depth,transform_shift;                                                                                                                            \
     42     WORD32  q_bits, quant_multiplier;                                                                                                                            \
     43                                                                                                                                                                  \
     44     /* q_bits and q_add calculation*/                                                                                                                            \
     45     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
     46     bit_depth = 8;                                                                                                                                               \
     47     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
     48     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
     49     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
     50                                                                                                                                                                  \
     51     sign = (inp)<0 ? -1:1;                                                                                                                                       \
     52                                                                                                                                                                  \
     53     tmp = (LWORD64)(abs(inp));                                                                                                                                    \
     54     tmp = tmp * (quant_coeff);                                                                                                                                   \
     55     tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
     56     tmp = tmp >> q_bits;                                                                                                                                         \
     57                                                                                                                                                                  \
     58     tmp = tmp * sign;                                                                                                                                            \
     59     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
     60 }                                                                                                                                                                \
     61 
     62 #define QUANT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
     63 {                                                                                                                                                                \
     64     LWORD64 tmp;                                                                                                                                                  \
     65     WORD32 sign;                                                                                                                                                 \
     66     WORD32 transform_shift;                                                                                                                                      \
     67     WORD32  q_bits, quant_multiplier;                                                                                                                            \
     68                                                                                                                                                                  \
     69     /* q_bits and q_add calculation*/                                                                                                                            \
     70     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
     71                                                                                                                                                                  \
     72     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
     73     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
     74     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier ;                                                                       \
     75                                                                                                                                                                  \
     76     sign = (inp)<0 ? -1:1;                                                                                                                                       \
     77                                                                                                                                                                  \
     78     tmp = (LWORD64)(abs(inp));                                                                                                                                    \
     79     tmp = tmp * (quant_coeff);                                                                                                                                   \
     80     tmp = tmp + (((LWORD64)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
     81     tmp = tmp >> q_bits;                                                                                                                                         \
     82                                                                                                                                                                  \
     83     tmp = tmp * sign;                                                                                                                                            \
     84     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
     85 }
     86 /* added by 100028 */
     87 #define QUANT_NO_WEIGHTMAT(out, inp, quant_coeff, qp_div, log2_trans_size, q_add) \
     88 {                                                                                                                                                                \
     89     WORD32 tmp;                                                                                                                                                  \
     90     WORD32 sign;                                                                                                                                                 \
     91     WORD32 bit_depth,transform_shift;                                                                                                                            \
     92     WORD32  q_bits, quant_multiplier;                                                                                                                            \
     93                                                                                                                                                                  \
     94     /* q_bits and q_add calculation*/                                                                                                                            \
     95     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
     96     bit_depth = 8;                                                                                                                                               \
     97     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
     98     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
     99     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
    100                                                                                                                                                                  \
    101     sign = (inp)<0 ? -1:1;                                                                                                                                       \
    102                                                                                                                                                                  \
    103     tmp = (WORD32)(abs(inp));                                                                                                                                    \
    104     tmp = tmp * (quant_coeff);                                                                                                                                   \
    105     tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
    106     tmp = tmp >> q_bits;                                                                                                                                         \
    107                                                                                                                                                                  \
    108     tmp = tmp * sign;                                                                                                                                            \
    109     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
    110 }
    111 
    112 #define QUANT_NO_WEIGHTMAT_HBD(out, inp, quant_coeff, qp_div, log2_trans_size, q_add, bit_depth) \
    113 {                                                                                                                                                                \
    114     WORD32 tmp;                                                                                                                                                  \
    115     WORD32 sign;                                                                                                                                                 \
    116     WORD32 transform_shift;                                                                                                                                      \
    117     WORD32  q_bits, quant_multiplier;                                                                                                                            \
    118                                                                                                                                                                  \
    119     /* q_bits and q_add calculation*/                                                                                                                            \
    120     /* To be moved outside in neon. To be computer once per transform call */                                                                                    \
    121                                                                                                                                                                  \
    122     transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size;                                                                                        \
    123     quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */                 \
    124     q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */;                                                                       \
    125                                                                                                                                                                  \
    126     sign = (inp)<0 ? -1:1;                                                                                                                                       \
    127                                                                                                                                                                  \
    128     tmp = (WORD32)(abs(inp));                                                                                                                                    \
    129     tmp = tmp * (quant_coeff);                                                                                                                                   \
    130     tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));                                                                                            \
    131     tmp = tmp >> q_bits;                                                                                                                                         \
    132                                                                                                                                                                  \
    133     tmp = tmp * sign;                                                                                                                                            \
    134     out = (WORD16) CLIP_S16(tmp);                                                                                                                                \
    135 }
    136 /* Reference Inverse Quantization: "pi2_src"(Coefficients) will be clipped to 15 or 14 bits when (qp_div > shift_iq). Spec doesn't have any clip mentioned  */
    137 
    138 /* Inverse quantization other than 4x4 */
    139 /* No clipping is needed for "pi2_src"(coefficients) */
    140 #define IQUANT(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)       \
    141 {                                                                                                                                              \
    142     WORD32 tmp, add_iq;                                                                                                                        \
    143                                                                                                                                                \
    144     add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
    145                                                                                                                                                \
    146     tmp = coeff * dequant_coeff ;                                                                                                              \
    147     tmp = tmp + add_iq;                                                                                                                        \
    148     tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
    149                                                                                                                                                \
    150     res = CLIP_S16(tmp);                                                                                                                       \
    151 }
    152 
    153 /* 4x4 inverse quantization */
    154 /* Options : */
    155 /* 1. Clip "pi2_src"(coefficients) to 10 bits if "(qp_div >= shift_iq)" or 16 bits if "(qp_div < shift_iq)"*/
    156 /* 2. Increasing precision of "pi2_src"(coefficients) to 64 bits */
    157 
    158 #define IQUANT_4x4(res, coeff /*pi2_src[index*src_strd]*/, dequant_coeff /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */, shift_iq, qp_div)   \
    159 {                                                                                                                                              \
    160     WORD32 clip_coeff, tmp;                                                                                                                    \
    161     WORD32 coeff_min,coeff_max;                                                                                                                \
    162     WORD32 coeff_bit_range;                                                                                                                    \
    163     WORD32 add_iq;                                                                                                                             \
    164     add_iq = SHL_NEG(1 , (shift_iq - qp_div - 1));  /* To be moved outside in neon. To be computed once per transform call */                  \
    165                                                                                                                                                \
    166     coeff_bit_range = 16;                                                                                                                      \
    167     if(qp_div > shift_iq)                                                                                                                      \
    168         coeff_bit_range = 10;                                                                                                                  \
    169                                                                                                                                                \
    170     coeff_min = -(1<<(coeff_bit_range-1));                                                                                                     \
    171     coeff_max = (1<<(coeff_bit_range-1)) - 1;                                                                                                  \
    172                                                                                                                                                \
    173     clip_coeff = CLIP3(coeff,coeff_min,coeff_max);                                                                                             \
    174                                                                                                                                                \
    175     tmp = clip_coeff * dequant_coeff ;                                                                                                         \
    176     tmp = tmp + add_iq;                                                                                                                        \
    177     tmp = SHR_NEG(tmp,(shift_iq - qp_div));                                                                                                    \
    178                                                                                                                                                \
    179     res = CLIP_S16(tmp);                                                                                                                       \
    180 }
    181 
    182 #endif /* IHEVC_TRANS_MACROS_H_ */
    183