Home | History | Annotate | Download | only in common
      1 /******************************************************************************
      2  *
      3  * Copyright (C) 2015 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 /**
     21  *******************************************************************************
     22  * @file
     23  *  ih264_iquant_itrans_recon.c
     24  *
     25  * @brief
     26  *  Contains definition of functions for h264 inverse quantization inverse transformation and recon
     27  *
     28  * @author
     29  *  Ittiam
     30  *
     31  *  @par List of Functions:
     32  *  - ih264_iquant_itrans_recon_4x4()
     33  *  - ih264_iquant_itrans_recon_8x8()
     34  *  - ih264_iquant_itrans_recon_4x4_dc()
     35  *  - ih264_iquant_itrans_recon_8x8_dc()
     36  *  - ih264_iquant_itrans_recon_chroma_4x4()
     37  *  -ih264_iquant_itrans_recon_chroma_4x4_dc()
     38  *
     39  * @remarks
     40  *
     41  *******************************************************************************
     42  */
     43 
     44 /*****************************************************************************/
     45 /* File Includes                                                             */
     46 /*****************************************************************************/
     47 
     48 /* User include files */
     49 #include "ih264_typedefs.h"
     50 #include "ih264_defs.h"
     51 #include "ih264_trans_macros.h"
     52 #include "ih264_macros.h"
     53 #include "ih264_platform_macros.h"
     54 #include "ih264_trans_data.h"
     55 #include "ih264_size_defs.h"
     56 #include "ih264_structs.h"
     57 #include "ih264_trans_quant_itrans_iquant.h"
     58 
     59 /*
     60  ********************************************************************************
     61  *
     62  * @brief This function reconstructs a 4x4 sub block from quantized resiude and
     63  * prediction buffer
     64  *
     65  * @par Description:
     66  *  The quantized residue is first inverse quantized, then inverse transformed.
     67  *  This inverse transformed content is added to the prediction buffer to recon-
     68  *  struct the end output
     69  *
     70  * @param[in] pi2_src
     71  *  quantized 4x4 block
     72  *
     73  * @param[in] pu1_pred
     74  *  prediction 4x4 block
     75  *
     76  * @param[out] pu1_out
     77  *  reconstructed 4x4 block
     78  *
     79  * @param[in] src_strd
     80  *  quantization buffer stride
     81  *
     82  * @param[in] pred_strd,
     83  *  Prediction buffer stride
     84  *
     85  * @param[in] out_strd
     86  *  recon buffer Stride
     87  *
     88  * @param[in] pu2_scaling_list
     89  *  pointer to scaling list
     90  *
     91  * @param[in] pu2_norm_adjust
     92  *  pointer to inverse scale matrix
     93  *
     94  * @param[in] u4_qp_div_6
     95  *  Floor (qp/6)
     96  *
     97  * @param[in] pi4_tmp
     98  * temporary buffer of size 1*16
     99  *
    100  * @returns none
    101  *
    102  * @remarks none
    103  *
    104  *******************************************************************************
    105  */
    106 void ih264_iquant_itrans_recon_4x4(WORD16 *pi2_src,
    107                                    UWORD8 *pu1_pred,
    108                                    UWORD8 *pu1_out,
    109                                    WORD32 pred_strd,
    110                                    WORD32 out_strd,
    111                                    const UWORD16 *pu2_iscal_mat,
    112                                    const UWORD16 *pu2_weigh_mat,
    113                                    UWORD32 u4_qp_div_6,
    114                                    WORD16 *pi2_tmp,
    115                                    WORD32 iq_start_idx,
    116                                    WORD16 *pi2_dc_ld_addr
    117 )
    118 {
    119     WORD16 *pi2_src_ptr = pi2_src;
    120     WORD16 *pi2_tmp_ptr = pi2_tmp;
    121     UWORD8 *pu1_pred_ptr = pu1_pred;
    122     UWORD8 *pu1_out_ptr = pu1_out;
    123     WORD16 x0, x1, x2, x3, i;
    124     WORD32 q0, q1, q2, q3;
    125     WORD16 i_macro;
    126     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
    127 
    128     /* inverse quant */
    129     /*horizontal inverse transform */
    130     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
    131     {
    132         q0 = pi2_src_ptr[0];
    133         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact,
    134                   4);
    135         if (i==0 && iq_start_idx == 1)
    136             q0 = pi2_dc_ld_addr[0];     // Restoring dc value for intra case
    137 
    138         q2 = pi2_src_ptr[2];
    139         INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
    140                   4);
    141 
    142         x0 = q0 + q2;
    143         x1 = q0 - q2;
    144 
    145         q1 = pi2_src_ptr[1];
    146         INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
    147                   4);
    148 
    149         q3 = pi2_src_ptr[3];
    150         INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
    151                   4);
    152 
    153         x2 = (q1 >> 1) - q3;
    154         x3 = q1 + (q3 >> 1);
    155 
    156         pi2_tmp_ptr[0] = x0 + x3;
    157         pi2_tmp_ptr[1] = x1 + x2;
    158         pi2_tmp_ptr[2] = x1 - x2;
    159         pi2_tmp_ptr[3] = x0 - x3;
    160 
    161         pi2_src_ptr += SUB_BLK_WIDTH_4x4;
    162         pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
    163         pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
    164         pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
    165     }
    166 
    167     /* vertical inverse transform */
    168     pi2_tmp_ptr = pi2_tmp;
    169     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
    170     {
    171         pu1_pred_ptr = pu1_pred;
    172         pu1_out = pu1_out_ptr;
    173 
    174         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
    175         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
    176         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
    177         x3 = pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
    178 
    179         /* inverse prediction */
    180         i_macro = x0 + x3;
    181         i_macro = ((i_macro + 32) >> 6);
    182         i_macro += *pu1_pred_ptr;
    183         *pu1_out = CLIP_U8(i_macro);
    184         pu1_pred_ptr += pred_strd;
    185         pu1_out += out_strd;
    186 
    187         i_macro = x1 + x2;
    188         i_macro = ((i_macro + 32) >> 6);
    189         i_macro += *pu1_pred_ptr;
    190         *pu1_out = CLIP_U8(i_macro);
    191         pu1_pred_ptr += pred_strd;
    192         pu1_out += out_strd;
    193 
    194         i_macro = x1 - x2;
    195         i_macro = ((i_macro + 32) >> 6);
    196         i_macro += *pu1_pred_ptr;
    197         *pu1_out = CLIP_U8(i_macro);
    198         pu1_pred_ptr += pred_strd;
    199         pu1_out += out_strd;
    200 
    201         i_macro = x0 - x3;
    202         i_macro = ((i_macro + 32) >> 6);
    203         i_macro += *pu1_pred_ptr;
    204         *pu1_out = CLIP_U8(i_macro);
    205 
    206         pi2_tmp_ptr++;
    207         pu1_out_ptr++;
    208         pu1_pred++;
    209     }
    210 
    211 }
    212 
    213 void ih264_iquant_itrans_recon_4x4_dc(WORD16 *pi2_src,
    214                                       UWORD8 *pu1_pred,
    215                                       UWORD8 *pu1_out,
    216                                       WORD32 pred_strd,
    217                                       WORD32 out_strd,
    218                                       const UWORD16 *pu2_iscal_mat,
    219                                       const UWORD16 *pu2_weigh_mat,
    220                                       UWORD32 u4_qp_div_6,
    221                                       WORD16 *pi2_tmp,
    222                                       WORD32 iq_start_idx,
    223                                       WORD16 *pi2_dc_ld_addr)
    224 {
    225     UWORD8 *pu1_pred_ptr = pu1_pred;
    226     UWORD8 *pu1_out_ptr = pu1_out;
    227     WORD32 q0;
    228     WORD16 x, i_macro, i;
    229     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
    230     UNUSED(pi2_tmp);
    231 
    232     if (iq_start_idx == 0)
    233     {
    234       q0 = pi2_src[0];
    235       INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
    236     }
    237     else
    238     {
    239       q0 = pi2_dc_ld_addr[0];    // Restoring dc value for intra case3
    240     }
    241     i_macro = ((q0 + 32) >> 6);
    242     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
    243     {
    244         pu1_pred_ptr = pu1_pred;
    245         pu1_out = pu1_out_ptr;
    246 
    247         /* inverse prediction */
    248 
    249         x = i_macro + *pu1_pred_ptr;
    250         *pu1_out = CLIP_U8(x);
    251         pu1_pred_ptr += pred_strd;
    252         pu1_out += out_strd;
    253 
    254         x = i_macro + *pu1_pred_ptr;
    255         *pu1_out = CLIP_U8(x);
    256         pu1_pred_ptr += pred_strd;
    257         pu1_out += out_strd;
    258 
    259         x = i_macro + *pu1_pred_ptr;
    260         *pu1_out = CLIP_U8(x);
    261         pu1_pred_ptr += pred_strd;
    262         pu1_out += out_strd;
    263 
    264         x = i_macro + *pu1_pred_ptr;
    265         *pu1_out = CLIP_U8(x);
    266 
    267         pu1_out_ptr++;
    268         pu1_pred++;
    269     }
    270 }
    271 
    272 /**
    273  *******************************************************************************
    274  *
    275  * @brief
    276  *  This function performs inverse quant and Inverse transform type Ci4 for 8x8 block
    277  *
    278  * @par Description:
    279  *  Performs inverse transform Ci8 and adds the residue to get the
    280  *  reconstructed block
    281  *
    282  * @param[in] pi2_src
    283  *  Input 8x8coefficients
    284  *
    285  * @param[in] pu1_pred
    286  *  Prediction 8x8 block
    287  *
    288  * @param[out] pu1_recon
    289  *  Output 8x8 block
    290  *
    291  * @param[in] q_div
    292  *  QP/6
    293  *
    294  * @param[in] q_rem
    295  *  QP%6
    296  *
    297  * @param[in] q_lev
    298  *  Quantizer level
    299  *
    300  * @param[in] src_strd
    301  *  Input stride
    302  *
    303  * @param[in] pred_strd,
    304  *  Prediction stride
    305  *
    306  * @param[in] out_strd
    307  *  Output Stride
    308  *
    309  * @param[in] pi4_tmp
    310  *  temporary buffer of size 1*16 we dont need a bigger blcok since we reuse
    311  *  the tmp for each block
    312  *
    313  * @param[in] pu4_iquant_mat
    314  *  Pointer to the inverse quantization matrix
    315  *
    316  * @returns  Void
    317  *
    318  * @remarks
    319  *  None
    320  *
    321  *******************************************************************************
    322  */
    323 void ih264_iquant_itrans_recon_8x8(WORD16 *pi2_src,
    324                                    UWORD8 *pu1_pred,
    325                                    UWORD8 *pu1_out,
    326                                    WORD32 pred_strd,
    327                                    WORD32 out_strd,
    328                                    const UWORD16 *pu2_iscale_mat,
    329                                    const UWORD16 *pu2_weigh_mat,
    330                                    UWORD32 qp_div,
    331                                    WORD16 *pi2_tmp,
    332                                    WORD32 iq_start_idx,
    333                                    WORD16 *pi2_dc_ld_addr
    334 )
    335 {
    336     WORD32 i;
    337     WORD16 *pi2_tmp_ptr = pi2_tmp;
    338     UWORD8 *pu1_pred_ptr = pu1_pred;
    339     UWORD8 *pu1_out_ptr = pu1_out;
    340     WORD16 i_z0, i_z1, i_z2, i_z3, i_z4, i_z5, i_z6, i_z7;
    341     WORD16 i_y0, i_y1, i_y2, i_y3, i_y4, i_y5, i_y6, i_y7;
    342     WORD16 i_macro;
    343     WORD32 q;
    344     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
    345     UNUSED(iq_start_idx);
    346     UNUSED(pi2_dc_ld_addr);
    347     /*************************************************************/
    348     /* De quantization of coefficients. Will be replaced by SIMD */
    349     /* operations on platform. Note : DC coeff is not scaled     */
    350     /*************************************************************/
    351     for(i = 0; i < (SUB_BLK_WIDTH_8x8 * SUB_BLK_WIDTH_8x8); i++)
    352     {
    353         q = pi2_src[i];
    354         INV_QUANT(q, pu2_iscale_mat[i], pu2_weigh_mat[i], qp_div, rnd_fact, 6);
    355         pi2_tmp_ptr[i] = q;
    356     }
    357     /* Perform Inverse transform */
    358     /*--------------------------------------------------------------------*/
    359     /* IDCT [ Horizontal transformation ]                                 */
    360     /*--------------------------------------------------------------------*/
    361     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
    362     {
    363         /*------------------------------------------------------------------*/
    364         /* y0 = w0 + w4                                                     */
    365         /* y1 = -w3 + w5 - w7 - (w7 >> 1)                                   */
    366         /* y2 = w0 - w4                                                     */
    367         /* y3 = w1 + w7 - w3 - (w3 >> 1)                                    */
    368         /* y4 = (w2 >> 1) - w6                                              */
    369         /* y5 = -w1 + w7 + w5 + (w5 >> 1)                                   */
    370         /* y6 = w2 + (w6 >> 1)                                              */
    371         /* y7 = w3 + w5 + w1 + (w1 >> 1)                                    */
    372         /*------------------------------------------------------------------*/
    373         i_y0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[4] );
    374 
    375         i_y1 = ((WORD32)(-pi2_tmp_ptr[3]) + pi2_tmp_ptr[5] - pi2_tmp_ptr[7]
    376                         - (pi2_tmp_ptr[7] >> 1));
    377 
    378         i_y2 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[4] );
    379 
    380         i_y3 = ((WORD32)pi2_tmp_ptr[1] + pi2_tmp_ptr[7] - pi2_tmp_ptr[3]
    381                         - (pi2_tmp_ptr[3] >> 1));
    382 
    383         i_y4 = ((pi2_tmp_ptr[2] >> 1) - pi2_tmp_ptr[6] );
    384 
    385         i_y5 = ((WORD32)(-pi2_tmp_ptr[1]) + pi2_tmp_ptr[7] + pi2_tmp_ptr[5]
    386                         + (pi2_tmp_ptr[5] >> 1));
    387 
    388         i_y6 = (pi2_tmp_ptr[2] + (pi2_tmp_ptr[6] >> 1));
    389 
    390         i_y7 = ((WORD32)pi2_tmp_ptr[3] + pi2_tmp_ptr[5] + pi2_tmp_ptr[1]
    391                         + (pi2_tmp_ptr[1] >> 1));
    392 
    393         /*------------------------------------------------------------------*/
    394         /* z0 = y0 + y6                                                     */
    395         /* z1 = y1 + (y7 >> 2)                                              */
    396         /* z2 = y2 + y4                                                     */
    397         /* z3 = y3 + (y5 >> 2)                                              */
    398         /* z4 = y2 - y4                                                     */
    399         /* z5 = (y3 >> 2) - y5                                              */
    400         /* z6 = y0 - y6                                                     */
    401         /* z7 = y7 - (y1 >> 2)                                              */
    402         /*------------------------------------------------------------------*/
    403         i_z0 = i_y0 + i_y6;
    404         i_z1 = i_y1 + (i_y7 >> 2);
    405         i_z2 = i_y2 + i_y4;
    406         i_z3 = i_y3 + (i_y5 >> 2);
    407         i_z4 = i_y2 - i_y4;
    408         i_z5 = (i_y3 >> 2) - i_y5;
    409         i_z6 = i_y0 - i_y6;
    410         i_z7 = i_y7 - (i_y1 >> 2);
    411 
    412         /*------------------------------------------------------------------*/
    413         /* x0 = z0 + z7                                                     */
    414         /* x1 = z2 + z5                                                     */
    415         /* x2 = z4 + z3                                                     */
    416         /* x3 = z6 + z1                                                     */
    417         /* x4 = z6 - z1                                                     */
    418         /* x5 = z4 - z3                                                     */
    419         /* x6 = z2 - z5                                                     */
    420         /* x7 = z0 - z7                                                     */
    421         /*------------------------------------------------------------------*/
    422         pi2_tmp_ptr[0] = i_z0 + i_z7;
    423         pi2_tmp_ptr[1] = i_z2 + i_z5;
    424         pi2_tmp_ptr[2] = i_z4 + i_z3;
    425         pi2_tmp_ptr[3] = i_z6 + i_z1;
    426         pi2_tmp_ptr[4] = i_z6 - i_z1;
    427         pi2_tmp_ptr[5] = i_z4 - i_z3;
    428         pi2_tmp_ptr[6] = i_z2 - i_z5;
    429         pi2_tmp_ptr[7] = i_z0 - i_z7;
    430 
    431         /* move to the next row */
    432         //pi2_src_ptr += SUB_BLK_WIDTH_8x8;
    433         pi2_tmp_ptr += SUB_BLK_WIDTH_8x8;
    434     }
    435     /*--------------------------------------------------------------------*/
    436     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
    437     /*                                                                    */
    438     /* Add the prediction and store it back to reconstructed frame buffer */
    439     /* [Prediction buffer itself in this case]                            */
    440     /*--------------------------------------------------------------------*/
    441 
    442     pi2_tmp_ptr = pi2_tmp;
    443     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
    444     {
    445         pu1_pred_ptr = pu1_pred;
    446         pu1_out = pu1_out_ptr;
    447         /*------------------------------------------------------------------*/
    448         /* y0j = w0j + w4j                                                  */
    449         /* y1j = -w3j + w5j -w7j -(w7j >> 1)                                */
    450         /* y2j = w0j -w4j                                                   */
    451         /* y3j = w1j + w7j -w3j -(w3j >> 1)                                 */
    452         /* y4j = ( w2j >> 1 ) -w6j                                          */
    453         /* y5j = -w1j + w7j + w5j + (w5j >> 1)                              */
    454         /* y6j = w2j + ( w6j >> 1 )                                         */
    455         /* y7j = w3j + w5j + w1j + (w1j >> 1)                               */
    456         /*------------------------------------------------------------------*/
    457         i_y0 = pi2_tmp_ptr[0] + pi2_tmp_ptr[32];
    458 
    459         i_y1 = (WORD32)(-pi2_tmp_ptr[24]) + pi2_tmp_ptr[40] - pi2_tmp_ptr[56]
    460                         - (pi2_tmp_ptr[56] >> 1);
    461 
    462         i_y2 = pi2_tmp_ptr[0] - pi2_tmp_ptr[32];
    463 
    464         i_y3 = (WORD32)pi2_tmp_ptr[8] + pi2_tmp_ptr[56] - pi2_tmp_ptr[24]
    465                         - (pi2_tmp_ptr[24] >> 1);
    466 
    467         i_y4 = (pi2_tmp_ptr[16] >> 1) - pi2_tmp_ptr[48];
    468 
    469         i_y5 = (WORD32)(-pi2_tmp_ptr[8]) + pi2_tmp_ptr[56] + pi2_tmp_ptr[40]
    470                         + (pi2_tmp_ptr[40] >> 1);
    471 
    472         i_y6 = pi2_tmp_ptr[16] + (pi2_tmp_ptr[48] >> 1);
    473 
    474         i_y7 = (WORD32)pi2_tmp_ptr[24] + pi2_tmp_ptr[40] + pi2_tmp_ptr[8]
    475                         + (pi2_tmp_ptr[8] >> 1);
    476 
    477         /*------------------------------------------------------------------*/
    478         /* z0j = y0j + y6j                                                  */
    479         /* z1j = y1j + (y7j >> 2)                                           */
    480         /* z2j = y2j + y4j                                                  */
    481         /* z3j = y3j + (y5j >> 2)                                           */
    482         /* z4j = y2j -y4j                                                   */
    483         /* z5j = (y3j >> 2) -y5j                                            */
    484         /* z6j = y0j -y6j                                                   */
    485         /* z7j = y7j -(y1j >> 2)                                            */
    486         /*------------------------------------------------------------------*/
    487         i_z0 = i_y0 + i_y6;
    488         i_z1 = i_y1 + (i_y7 >> 2);
    489         i_z2 = i_y2 + i_y4;
    490         i_z3 = i_y3 + (i_y5 >> 2);
    491         i_z4 = i_y2 - i_y4;
    492         i_z5 = (i_y3 >> 2) - i_y5;
    493         i_z6 = i_y0 - i_y6;
    494         i_z7 = i_y7 - (i_y1 >> 2);
    495 
    496         /*------------------------------------------------------------------*/
    497         /* x0j = z0j + z7j                                                  */
    498         /* x1j = z2j + z5j                                                  */
    499         /* x2j = z4j + z3j                                                  */
    500         /* x3j = z6j + z1j                                                  */
    501         /* x4j = z6j -z1j                                                   */
    502         /* x5j = z4j -z3j                                                   */
    503         /* x6j = z2j -z5j                                                   */
    504         /* x7j = z0j -z7j                                                   */
    505         /*------------------------------------------------------------------*/
    506         i_macro = ((i_z0 + i_z7 + 32) >> 6) + *pu1_pred_ptr;
    507         *pu1_out = CLIP_U8(i_macro);
    508         /* Change uc_recBuffer to Point to next element in the same column*/
    509         pu1_pred_ptr += pred_strd;
    510         pu1_out += out_strd;
    511 
    512         i_macro = ((i_z2 + i_z5 + 32) >> 6) + *pu1_pred_ptr;
    513         *pu1_out = CLIP_U8(i_macro);
    514         pu1_pred_ptr += pred_strd;
    515         pu1_out += out_strd;
    516 
    517         i_macro = ((i_z4 + i_z3 + 32) >> 6) + *pu1_pred_ptr;
    518         *pu1_out = CLIP_U8(i_macro);
    519         pu1_pred_ptr += pred_strd;
    520         pu1_out += out_strd;
    521 
    522         i_macro = ((i_z6 + i_z1 + 32) >> 6) + *pu1_pred_ptr;
    523         *pu1_out = CLIP_U8(i_macro);
    524         pu1_pred_ptr += pred_strd;
    525         pu1_out += out_strd;
    526 
    527         i_macro = ((i_z6 - i_z1 + 32) >> 6) + *pu1_pred_ptr;
    528         *pu1_out = CLIP_U8(i_macro);
    529         pu1_pred_ptr += pred_strd;
    530         pu1_out += out_strd;
    531 
    532         i_macro = ((i_z4 - i_z3 + 32) >> 6) + *pu1_pred_ptr;
    533         *pu1_out = CLIP_U8(i_macro);
    534         pu1_pred_ptr += pred_strd;
    535         pu1_out += out_strd;
    536 
    537         i_macro = ((i_z2 - i_z5 + 32) >> 6) + *pu1_pred_ptr;
    538         *pu1_out = CLIP_U8(i_macro);
    539         pu1_pred_ptr += pred_strd;
    540         pu1_out += out_strd;
    541 
    542         i_macro = ((i_z0 - i_z7 + 32) >> 6) + *pu1_pred_ptr;
    543         *pu1_out = CLIP_U8(i_macro);
    544 
    545         pi2_tmp_ptr++;
    546         pu1_out_ptr++;
    547         pu1_pred++;
    548     }
    549 }
    550 
    551 void ih264_iquant_itrans_recon_8x8_dc(WORD16 *pi2_src,
    552                                       UWORD8 *pu1_pred,
    553                                       UWORD8 *pu1_out,
    554                                       WORD32 pred_strd,
    555                                       WORD32 out_strd,
    556                                       const UWORD16 *pu2_iscale_mat,
    557                                       const UWORD16 *pu2_weigh_mat,
    558                                       UWORD32 qp_div,
    559                                       WORD16 *pi2_tmp,
    560                                       WORD32 iq_start_idx,
    561                                       WORD16 *pi2_dc_ld_addr)
    562 {
    563     UWORD8 *pu1_pred_ptr = pu1_pred;
    564     UWORD8 *pu1_out_ptr = pu1_out;
    565     WORD16 x, i, i_macro;
    566     WORD32 q;
    567     WORD32 rnd_fact = (qp_div < 6) ? (1 << (5 - qp_div)) : 0;
    568     UNUSED(pi2_tmp);
    569     UNUSED(iq_start_idx);
    570     UNUSED(pi2_dc_ld_addr);
    571     /*************************************************************/
    572     /* Dequantization of coefficients. Will be replaced by SIMD  */
    573     /* operations on platform. Note : DC coeff is not scaled     */
    574     /*************************************************************/
    575     q = pi2_src[0];
    576     INV_QUANT(q, pu2_iscale_mat[0], pu2_weigh_mat[0], qp_div, rnd_fact, 6);
    577     i_macro = (q + 32) >> 6;
    578     /* Perform Inverse transform */
    579     /*--------------------------------------------------------------------*/
    580     /* IDCT [ Horizontal transformation ]                                 */
    581     /*--------------------------------------------------------------------*/
    582     /*--------------------------------------------------------------------*/
    583     /* IDCT [ Vertical transformation] and Xij = (xij + 32)>>6            */
    584     /*                                                                    */
    585     /* Add the prediction and store it back to reconstructed frame buffer */
    586     /* [Prediction buffer itself in this case]                            */
    587     /*--------------------------------------------------------------------*/
    588     for(i = 0; i < SUB_BLK_WIDTH_8x8; i++)
    589     {
    590         pu1_pred_ptr = pu1_pred;
    591         pu1_out = pu1_out_ptr;
    592 
    593         x = i_macro + *pu1_pred_ptr;
    594         *pu1_out = CLIP_U8(x);
    595         /* Change uc_recBuffer to Point to next element in the same column*/
    596         pu1_pred_ptr += pred_strd;
    597         pu1_out += out_strd;
    598 
    599         x = i_macro + *pu1_pred_ptr;
    600         *pu1_out = CLIP_U8(x);
    601         pu1_pred_ptr += pred_strd;
    602         pu1_out += out_strd;
    603 
    604         x = i_macro + *pu1_pred_ptr;
    605         *pu1_out = CLIP_U8(x);
    606         pu1_pred_ptr += pred_strd;
    607         pu1_out += out_strd;
    608 
    609         x = i_macro + *pu1_pred_ptr;
    610         *pu1_out = CLIP_U8(x);
    611         pu1_pred_ptr += pred_strd;
    612         pu1_out += out_strd;
    613 
    614         x = i_macro + *pu1_pred_ptr;
    615         *pu1_out = CLIP_U8(x);
    616         pu1_pred_ptr += pred_strd;
    617         pu1_out += out_strd;
    618 
    619         x = i_macro + *pu1_pred_ptr;
    620         *pu1_out = CLIP_U8(x);
    621         pu1_pred_ptr += pred_strd;
    622         pu1_out += out_strd;
    623 
    624         x = i_macro + *pu1_pred_ptr;
    625         *pu1_out = CLIP_U8(x);
    626         pu1_pred_ptr += pred_strd;
    627         pu1_out += out_strd;
    628 
    629         x = i_macro + *pu1_pred_ptr;
    630         *pu1_out = CLIP_U8(x);
    631 
    632         pu1_out_ptr++;
    633         pu1_pred++;
    634     }
    635 }
    636 
    637 /*
    638  ********************************************************************************
    639  *
    640  * @brief This function reconstructs a 4x4 sub block from quantized resiude and
    641  * prediction buffer
    642  *
    643  * @par Description:
    644  *  The quantized residue is first inverse quantized, then inverse transformed.
    645  *  This inverse transformed content is added to the prediction buffer to recon-
    646  *  struct the end output
    647  *
    648  * @param[in] pi2_src
    649  *  quantized 4x4 block
    650  *
    651  * @param[in] pu1_pred
    652  *  prediction 4x4 block
    653  *
    654  * @param[out] pu1_out
    655  *  reconstructed 4x4 block
    656  *
    657  * @param[in] src_strd
    658  *  quantization buffer stride
    659  *
    660  * @param[in] pred_strd,
    661  *  Prediction buffer stride
    662  *
    663  * @param[in] out_strd
    664  *  recon buffer Stride
    665  *
    666  * @param[in] pu2_scaling_list
    667  *  pointer to scaling list
    668  *
    669  * @param[in] pu2_norm_adjust
    670  *  pointer to inverse scale matrix
    671  *
    672  * @param[in] u4_qp_div_6
    673  *  Floor (qp/6)
    674  *
    675  * @param[in] pi4_tmp
    676  * temporary buffer of size 1*16
    677  *
    678  * @returns none
    679  *
    680  * @remarks none
    681  *
    682  *******************************************************************************
    683  */
    684 void ih264_iquant_itrans_recon_chroma_4x4(WORD16 *pi2_src,
    685                                           UWORD8 *pu1_pred,
    686                                           UWORD8 *pu1_out,
    687                                           WORD32 pred_strd,
    688                                           WORD32 out_strd,
    689                                           const UWORD16 *pu2_iscal_mat,
    690                                           const UWORD16 *pu2_weigh_mat,
    691                                           UWORD32 u4_qp_div_6,
    692                                           WORD16 *pi2_tmp,
    693                                           WORD16 *pi2_dc_src)
    694 {
    695     WORD16 *pi2_src_ptr = pi2_src;
    696     WORD16 *pi2_tmp_ptr = pi2_tmp;
    697     UWORD8 *pu1_pred_ptr = pu1_pred;
    698     UWORD8 *pu1_out_ptr = pu1_out;
    699     WORD16 x0, x1, x2, x3, i;
    700     WORD32 q0, q1, q2, q3;
    701     WORD16 i_macro;
    702     WORD16 rnd_fact = (u4_qp_div_6 < 4) ? 1 << (3 - u4_qp_div_6) : 0;
    703 
    704     /* inverse quant */
    705     /*horizontal inverse transform */
    706     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
    707     {
    708       if(i==0)
    709       {
    710         q0 = pi2_dc_src[0];
    711       }
    712       else
    713       {
    714         q0 = pi2_src_ptr[0];
    715         INV_QUANT(q0, pu2_iscal_mat[0], pu2_weigh_mat[0], u4_qp_div_6, rnd_fact, 4);
    716       }
    717 
    718       q2 = pi2_src_ptr[2];
    719       INV_QUANT(q2, pu2_iscal_mat[2], pu2_weigh_mat[2], u4_qp_div_6, rnd_fact,
    720                 4);
    721 
    722       x0 = q0 + q2;
    723       x1 = q0 - q2;
    724 
    725       q1 = pi2_src_ptr[1];
    726       INV_QUANT(q1, pu2_iscal_mat[1], pu2_weigh_mat[1], u4_qp_div_6, rnd_fact,
    727                 4);
    728 
    729       q3 = pi2_src_ptr[3];
    730       INV_QUANT(q3, pu2_iscal_mat[3], pu2_weigh_mat[3], u4_qp_div_6, rnd_fact,
    731                 4);
    732 
    733       x2 = (q1 >> 1) - q3;
    734       x3 = q1 + (q3 >> 1);
    735 
    736       pi2_tmp_ptr[0] = x0 + x3;
    737       pi2_tmp_ptr[1] = x1 + x2;
    738       pi2_tmp_ptr[2] = x1 - x2;
    739       pi2_tmp_ptr[3] = x0 - x3;
    740 
    741       pi2_src_ptr += SUB_BLK_WIDTH_4x4;
    742       pi2_tmp_ptr += SUB_BLK_WIDTH_4x4;
    743       pu2_iscal_mat += SUB_BLK_WIDTH_4x4;
    744       pu2_weigh_mat += SUB_BLK_WIDTH_4x4;
    745     }
    746 
    747     /* vertical inverse transform */
    748     pi2_tmp_ptr = pi2_tmp;
    749     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
    750     {
    751         pu1_pred_ptr = pu1_pred;
    752         pu1_out = pu1_out_ptr;
    753 
    754         x0 = (pi2_tmp_ptr[0] + pi2_tmp_ptr[8]);
    755         x1 = (pi2_tmp_ptr[0] - pi2_tmp_ptr[8]);
    756         x2 = (pi2_tmp_ptr[4] >> 1) - pi2_tmp_ptr[12];
    757         x3 =  pi2_tmp_ptr[4] + (pi2_tmp_ptr[12] >> 1);
    758 
    759         /* inverse prediction */
    760         i_macro = x0 + x3;
    761         i_macro = ((i_macro + 32) >> 6);
    762         i_macro += *pu1_pred_ptr;
    763         *pu1_out = CLIP_U8(i_macro);
    764         pu1_pred_ptr += pred_strd;
    765         pu1_out += out_strd;
    766 
    767         i_macro = x1 + x2;
    768         i_macro = ((i_macro + 32) >> 6);
    769         i_macro += *pu1_pred_ptr;
    770         *pu1_out = CLIP_U8(i_macro);
    771         pu1_pred_ptr += pred_strd;
    772         pu1_out += out_strd;
    773 
    774         i_macro = x1 - x2;
    775         i_macro = ((i_macro + 32) >> 6);
    776         i_macro += *pu1_pred_ptr;
    777         *pu1_out = CLIP_U8(i_macro);
    778         pu1_pred_ptr += pred_strd;
    779         pu1_out += out_strd;
    780 
    781         i_macro = x0 - x3;
    782         i_macro = ((i_macro + 32) >> 6);
    783         i_macro += *pu1_pred_ptr;
    784         *pu1_out = CLIP_U8(i_macro);
    785 
    786         pi2_tmp_ptr++;
    787         pu1_out_ptr+= 2;    //Interleaved store for output
    788         pu1_pred+= 2;       //Interleaved load for pred buffer
    789     }
    790 }
    791 
    792 /*
    793  ********************************************************************************
    794  *
    795  * @brief This function reconstructs a 4x4 sub block from quantized resiude and
    796  * prediction buffer if only dc value is present for residue
    797  *
    798  * @par Description:
    799  *  The quantized residue is first inverse quantized,
    800  *  This inverse quantized content is added to the prediction buffer to recon-
    801  *  struct the end output
    802  *
    803  * @param[in] pi2_src
    804  *  quantized dc coefficient
    805  *
    806  * @param[in] pu1_pred
    807  *  prediction 4x4 block in interleaved format
    808  *
    809  * @param[in] pred_strd,
    810  *  Prediction buffer stride in interleaved format
    811  *
    812  * @param[in] out_strd
    813  *  recon buffer Stride
    814  *
    815  * @returns none
    816  *
    817  * @remarks none
    818  *
    819  *******************************************************************************
    820  */
    821 
    822 void ih264_iquant_itrans_recon_chroma_4x4_dc(WORD16 *pi2_src,
    823                                              UWORD8 *pu1_pred,
    824                                              UWORD8 *pu1_out,
    825                                              WORD32 pred_strd,
    826                                              WORD32 out_strd,
    827                                              const UWORD16 *pu2_iscal_mat,
    828                                              const UWORD16 *pu2_weigh_mat,
    829                                              UWORD32 u4_qp_div_6,
    830                                              WORD16 *pi2_tmp,
    831                                              WORD16 *pi2_dc_src)
    832 {
    833     UWORD8 *pu1_pred_ptr = pu1_pred;
    834     UWORD8 *pu1_out_ptr = pu1_out;
    835     WORD32 q0;
    836     WORD16 x, i_macro, i;
    837     UNUSED(pi2_src);
    838     UNUSED(pu2_iscal_mat);
    839     UNUSED(pu2_weigh_mat);
    840     UNUSED(u4_qp_div_6);
    841     UNUSED(pi2_tmp);
    842 
    843     q0 = pi2_dc_src[0];    // Restoring dc value for intra case3
    844     i_macro = ((q0 + 32) >> 6);
    845 
    846     for(i = 0; i < SUB_BLK_WIDTH_4x4; i++)
    847     {
    848         pu1_pred_ptr = pu1_pred;
    849         pu1_out = pu1_out_ptr;
    850 
    851         /* inverse prediction */
    852         x = i_macro + *pu1_pred_ptr;
    853         *pu1_out =  CLIP_U8(x);
    854         pu1_pred_ptr += pred_strd;
    855         pu1_out += out_strd;
    856 
    857         x = i_macro + *pu1_pred_ptr;
    858         *pu1_out = CLIP_U8(x);
    859         pu1_pred_ptr += pred_strd;
    860         pu1_out += out_strd;
    861 
    862         x = i_macro + *pu1_pred_ptr;
    863         *pu1_out = CLIP_U8(x);
    864         pu1_pred_ptr += pred_strd;
    865         pu1_out += out_strd;
    866 
    867         x = i_macro + *pu1_pred_ptr;
    868         *pu1_out = CLIP_U8(x);
    869 
    870         pu1_out_ptr+=2;
    871         pu1_pred+=2;
    872     }
    873 }
    874