Home | History | Annotate | Download | only in common
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19  *******************************************************************************
     20  * @file
     21  *  ihevc_itrans_recon_8x8.c
     22  *
     23  * @brief
     24  *  Contains function definitions for inverse transform  and reconstruction 8x8
     25  *
     26  *
     27  * @author
     28  *  100470
     29  *
     30  * @par List of Functions:
     31  *  - ihevc_itrans_recon_8x8()
     32  *
     33  * @remarks
     34  *  None
     35  *
     36  *******************************************************************************
     37  */
     38 #include <stdio.h>
     39 #include <string.h>
     40 #include "ihevc_typedefs.h"
     41 #include "ihevc_macros.h"
     42 #include "ihevc_platform_macros.h"
     43 #include "ihevc_defs.h"
     44 #include "ihevc_trans_tables.h"
     45 #include "ihevc_itrans_recon.h"
     46 #include "ihevc_func_selector.h"
     47 #include "ihevc_trans_macros.h"
     48 
     49 /**
     50  *******************************************************************************
     51  *
     52  * @brief
     53  *  This function performs Inverse transform  and reconstruction for 8x8
     54  * input block
     55  *
     56  * @par Description:
     57  *  Performs inverse transform and adds the prediction  data and clips output
     58  * to 8 bit
     59  *
     60  * @param[in] pi2_src
     61  *  Input 8x8 coefficients
     62  *
     63  * @param[in] pi2_tmp
     64  *  Temporary 8x8 buffer for storing inverse
     65  *
     66  *  transform
     67  *  1st stage output
     68  *
     69  * @param[in] pu1_pred
     70  *  Prediction 8x8 block
     71  *
     72  * @param[out] pu1_dst
     73  *  Output 8x8 block
     74  *
     75  * @param[in] src_strd
     76  *  Input stride
     77  *
     78  * @param[in] pred_strd
     79  *  Prediction stride
     80  *
     81  * @param[in] dst_strd
     82  *  Output Stride
     83  *
     84  * @param[in] shift
     85  *  Output shift
     86  *
     87  * @param[in] zero_cols
     88  *  Zero columns in pi2_src
     89  *
     90  * @returns  Void
     91  *
     92  * @remarks
     93  *  None
     94  *
     95  *******************************************************************************
     96  */
     97 
     98 void ihevc_itrans_recon_8x8(WORD16 *pi2_src,
     99                             WORD16 *pi2_tmp,
    100                             UWORD8 *pu1_pred,
    101                             UWORD8 *pu1_dst,
    102                             WORD32 src_strd,
    103                             WORD32 pred_strd,
    104                             WORD32 dst_strd,
    105                             WORD32 zero_cols,
    106                             WORD32 zero_rows)
    107 {
    108     WORD32 j, k;
    109     WORD32 e[4], o[4];
    110     WORD32 ee[2], eo[2];
    111     WORD32 add;
    112     WORD32 shift;
    113     WORD16 *pi2_tmp_orig;
    114     WORD32 trans_size;
    115     WORD32 zero_rows_2nd_stage = zero_cols;
    116     WORD32 row_limit_2nd_stage;
    117 
    118     trans_size = TRANS_SIZE_8;
    119 
    120     pi2_tmp_orig = pi2_tmp;
    121 
    122     if((zero_cols & 0xF0) == 0xF0)
    123         row_limit_2nd_stage = 4;
    124     else
    125         row_limit_2nd_stage = TRANS_SIZE_8;
    126 
    127 
    128     if((zero_rows & 0xF0) == 0xF0) /* First 4 rows of input are non-zero */
    129     {
    130         /************************************************************************************************/
    131         /**********************************START - IT_RECON_8x8******************************************/
    132         /************************************************************************************************/
    133 
    134         /* Inverse Transform 1st stage */
    135         shift = IT_SHIFT_STAGE_1;
    136         add = 1 << (shift - 1);
    137 
    138         for(j = 0; j < row_limit_2nd_stage; j++)
    139         {
    140             /* Checking for Zero Cols */
    141             if((zero_cols & 1) == 1)
    142             {
    143                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
    144             }
    145             else
    146             {
    147                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    148                 for(k = 0; k < 4; k++)
    149                 {
    150                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
    151                                     + g_ai2_ihevc_trans_8[3][k]
    152                                                     * pi2_src[3 * src_strd];
    153                 }
    154                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd];
    155                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd];
    156                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0];
    157                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0];
    158 
    159                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    160                 e[0] = ee[0] + eo[0];
    161                 e[3] = ee[0] - eo[0];
    162                 e[1] = ee[1] + eo[1];
    163                 e[2] = ee[1] - eo[1];
    164                 for(k = 0; k < 4; k++)
    165                 {
    166                     pi2_tmp[k] =
    167                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    168                     pi2_tmp[k + 4] =
    169                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
    170                 }
    171             }
    172             pi2_src++;
    173             pi2_tmp += trans_size;
    174             zero_cols = zero_cols >> 1;
    175         }
    176 
    177         pi2_tmp = pi2_tmp_orig;
    178 
    179         /* Inverse Transform 2nd stage */
    180         shift = IT_SHIFT_STAGE_2;
    181         add = 1 << (shift - 1);
    182         if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
    183         {
    184             for(j = 0; j < trans_size; j++)
    185             {
    186                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    187                 for(k = 0; k < 4; k++)
    188                 {
    189                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
    190                                     + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
    191                 }
    192                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
    193                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
    194                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
    195                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
    196 
    197                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    198                 e[0] = ee[0] + eo[0];
    199                 e[3] = ee[0] - eo[0];
    200                 e[1] = ee[1] + eo[1];
    201                 e[2] = ee[1] - eo[1];
    202                 for(k = 0; k < 4; k++)
    203                 {
    204                     WORD32 itrans_out;
    205                     itrans_out =
    206                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    207                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    208                     itrans_out =
    209                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
    210                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
    211                 }
    212                 pi2_tmp++;
    213                 pu1_pred += pred_strd;
    214                 pu1_dst += dst_strd;
    215             }
    216         }
    217         else /* All rows of output of 1st stage are non-zero */
    218         {
    219             for(j = 0; j < trans_size; j++)
    220             {
    221                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    222                 for(k = 0; k < 4; k++)
    223                 {
    224                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
    225                                     + g_ai2_ihevc_trans_8[3][k]
    226                                                     * pi2_tmp[3 * trans_size]
    227                                     + g_ai2_ihevc_trans_8[5][k]
    228                                                     * pi2_tmp[5 * trans_size]
    229                                     + g_ai2_ihevc_trans_8[7][k]
    230                                                     * pi2_tmp[7 * trans_size];
    231                 }
    232 
    233                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
    234                                 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
    235                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
    236                                 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
    237                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
    238                                 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
    239                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
    240                                 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
    241 
    242                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    243                 e[0] = ee[0] + eo[0];
    244                 e[3] = ee[0] - eo[0];
    245                 e[1] = ee[1] + eo[1];
    246                 e[2] = ee[1] - eo[1];
    247                 for(k = 0; k < 4; k++)
    248                 {
    249                     WORD32 itrans_out;
    250                     itrans_out =
    251                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    252                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    253                     itrans_out =
    254                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
    255                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
    256                 }
    257                 pi2_tmp++;
    258                 pu1_pred += pred_strd;
    259                 pu1_dst += dst_strd;
    260             }
    261         }
    262         /************************************************************************************************/
    263         /************************************END - IT_RECON_8x8******************************************/
    264         /************************************************************************************************/
    265     }
    266     else /* All rows of input are non-zero */
    267     {
    268         /************************************************************************************************/
    269         /**********************************START - IT_RECON_8x8******************************************/
    270         /************************************************************************************************/
    271 
    272         /* Inverse Transform 1st stage */
    273         shift = IT_SHIFT_STAGE_1;
    274         add = 1 << (shift - 1);
    275 
    276         for(j = 0; j < row_limit_2nd_stage; j++)
    277         {
    278             /* Checking for Zero Cols */
    279             if((zero_cols & 1) == 1)
    280             {
    281                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
    282             }
    283             else
    284             {
    285                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    286                 for(k = 0; k < 4; k++)
    287                 {
    288                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_src[src_strd]
    289                                     + g_ai2_ihevc_trans_8[3][k]
    290                                                     * pi2_src[3 * src_strd]
    291                                     + g_ai2_ihevc_trans_8[5][k]
    292                                                     * pi2_src[5 * src_strd]
    293                                     + g_ai2_ihevc_trans_8[7][k]
    294                                                     * pi2_src[7 * src_strd];
    295                 }
    296 
    297                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_src[2 * src_strd]
    298                                 + g_ai2_ihevc_trans_8[6][0] * pi2_src[6 * src_strd];
    299                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_src[2 * src_strd]
    300                                 + g_ai2_ihevc_trans_8[6][1] * pi2_src[6 * src_strd];
    301                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_src[0]
    302                                 + g_ai2_ihevc_trans_8[4][0] * pi2_src[4 * src_strd];
    303                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_src[0]
    304                                 + g_ai2_ihevc_trans_8[4][1] * pi2_src[4 * src_strd];
    305 
    306                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    307                 e[0] = ee[0] + eo[0];
    308                 e[3] = ee[0] - eo[0];
    309                 e[1] = ee[1] + eo[1];
    310                 e[2] = ee[1] - eo[1];
    311                 for(k = 0; k < 4; k++)
    312                 {
    313                     pi2_tmp[k] =
    314                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    315                     pi2_tmp[k + 4] =
    316                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
    317                 }
    318             }
    319             pi2_src++;
    320             pi2_tmp += trans_size;
    321             zero_cols = zero_cols >> 1;
    322         }
    323 
    324         pi2_tmp = pi2_tmp_orig;
    325 
    326         /* Inverse Transform 2nd stage */
    327         shift = IT_SHIFT_STAGE_2;
    328         add = 1 << (shift - 1);
    329         if((zero_rows_2nd_stage & 0xF0) == 0xF0) /* First 4 rows of output of 1st stage are non-zero */
    330         {
    331             for(j = 0; j < trans_size; j++)
    332             {
    333                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    334                 for(k = 0; k < 4; k++)
    335                 {
    336                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
    337                                     + g_ai2_ihevc_trans_8[3][k] * pi2_tmp[3 * trans_size];
    338                 }
    339                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size];
    340                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size];
    341                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0];
    342                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0];
    343 
    344                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    345                 e[0] = ee[0] + eo[0];
    346                 e[3] = ee[0] - eo[0];
    347                 e[1] = ee[1] + eo[1];
    348                 e[2] = ee[1] - eo[1];
    349                 for(k = 0; k < 4; k++)
    350                 {
    351                     WORD32 itrans_out;
    352                     itrans_out =
    353                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    354                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    355                     itrans_out =
    356                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
    357                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
    358                 }
    359                 pi2_tmp++;
    360                 pu1_pred += pred_strd;
    361                 pu1_dst += dst_strd;
    362             }
    363         }
    364         else /* All rows of output of 1st stage are non-zero */
    365         {
    366             for(j = 0; j < trans_size; j++)
    367             {
    368                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    369                 for(k = 0; k < 4; k++)
    370                 {
    371                     o[k] = g_ai2_ihevc_trans_8[1][k] * pi2_tmp[trans_size]
    372                                     + g_ai2_ihevc_trans_8[3][k]
    373                                                     * pi2_tmp[3 * trans_size]
    374                                     + g_ai2_ihevc_trans_8[5][k]
    375                                                     * pi2_tmp[5 * trans_size]
    376                                     + g_ai2_ihevc_trans_8[7][k]
    377                                                     * pi2_tmp[7 * trans_size];
    378                 }
    379 
    380                 eo[0] = g_ai2_ihevc_trans_8[2][0] * pi2_tmp[2 * trans_size]
    381                                 + g_ai2_ihevc_trans_8[6][0] * pi2_tmp[6 * trans_size];
    382                 eo[1] = g_ai2_ihevc_trans_8[2][1] * pi2_tmp[2 * trans_size]
    383                                 + g_ai2_ihevc_trans_8[6][1] * pi2_tmp[6 * trans_size];
    384                 ee[0] = g_ai2_ihevc_trans_8[0][0] * pi2_tmp[0]
    385                                 + g_ai2_ihevc_trans_8[4][0] * pi2_tmp[4 * trans_size];
    386                 ee[1] = g_ai2_ihevc_trans_8[0][1] * pi2_tmp[0]
    387                                 + g_ai2_ihevc_trans_8[4][1] * pi2_tmp[4 * trans_size];
    388 
    389                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    390                 e[0] = ee[0] + eo[0];
    391                 e[3] = ee[0] - eo[0];
    392                 e[1] = ee[1] + eo[1];
    393                 e[2] = ee[1] - eo[1];
    394                 for(k = 0; k < 4; k++)
    395                 {
    396                     WORD32 itrans_out;
    397                     itrans_out =
    398                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    399                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    400                     itrans_out =
    401                                     CLIP_S16(((e[3 - k] - o[3 - k] + add) >> shift));
    402                     pu1_dst[k + 4] = CLIP_U8((itrans_out + pu1_pred[k + 4]));
    403                 }
    404                 pi2_tmp++;
    405                 pu1_pred += pred_strd;
    406                 pu1_dst += dst_strd;
    407             }
    408         }
    409         /************************************************************************************************/
    410         /************************************END - IT_RECON_8x8******************************************/
    411         /************************************************************************************************/
    412     }
    413 }
    414 
    415