Home | History | Annotate | Download | only in common
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19  *******************************************************************************
     20  * @file
     21  *  ihevc_itrans_recon_32x32.c
     22  *
     23  * @brief
     24  *  Contains function definitions for inverse transform  and reconstruction 32x32
     25  *
     26  *
     27  * @author
     28  *  100470
     29  *
     30  * @par List of Functions:
     31  *  - ihevc_itrans_recon_32x32()
     32  *
     33  * @remarks
     34  *  None
     35  *
     36  *******************************************************************************
     37  */
     38 #include <stdio.h>
     39 #include <string.h>
     40 #include "ihevc_typedefs.h"
     41 #include "ihevc_macros.h"
     42 #include "ihevc_platform_macros.h"
     43 #include "ihevc_defs.h"
     44 #include "ihevc_trans_tables.h"
     45 #include "ihevc_itrans_recon.h"
     46 #include "ihevc_func_selector.h"
     47 #include "ihevc_trans_macros.h"
     48 
     49 
     50 /**
     51  *******************************************************************************
     52  *
     53  * @brief
     54  *  This function performs Inverse transform  and reconstruction for 32x32
     55  * input block
     56  *
     57  * @par Description:
     58  *  Performs inverse transform and adds the prediction  data and clips output
     59  * to 8 bit
     60  *
     61  * @param[in] pi2_src
     62  *  Input 32x32 coefficients
     63  *
     64  * @param[in] pi2_tmp
     65  *  Temporary 32x32 buffer for storing inverse
     66  *
     67  *  transform
     68  *  1st stage output
     69  *
     70  * @param[in] pu1_pred
     71  *  Prediction 32x32 block
     72  *
     73  * @param[out] pu1_dst
     74  *  Output 32x32 block
     75  *
     76  * @param[in] src_strd
     77  *  Input stride
     78  *
     79  * @param[in] pred_strd
     80  *  Prediction stride
     81  *
     82  * @param[in] dst_strd
     83  *  Output Stride
     84  *
     85  * @param[in] shift
     86  *  Output shift
     87  *
     88  * @param[in] zero_cols
     89  *  Zero columns in pi2_src
     90  *
     91  * @returns  Void
     92  *
     93  * @remarks
     94  *  None
     95  *
     96  *******************************************************************************
     97  */
     98 
     99 void ihevc_itrans_recon_32x32(WORD16 *pi2_src,
    100                               WORD16 *pi2_tmp,
    101                               UWORD8 *pu1_pred,
    102                               UWORD8 *pu1_dst,
    103                               WORD32 src_strd,
    104                               WORD32 pred_strd,
    105                               WORD32 dst_strd,
    106                               WORD32 zero_cols,
    107                               WORD32 zero_rows)
    108 {
    109     WORD32 j, k;
    110     WORD32 e[16], o[16];
    111     WORD32 ee[8], eo[8];
    112     WORD32 eee[4], eeo[4];
    113     WORD32 eeee[2], eeeo[2];
    114     WORD32 add;
    115     WORD32 shift;
    116     WORD16 *pi2_tmp_orig;
    117     WORD32 trans_size;
    118     WORD32 zero_rows_2nd_stage = zero_cols;
    119     WORD32 row_limit_2nd_stage;
    120 
    121     trans_size = TRANS_SIZE_32;
    122     pi2_tmp_orig = pi2_tmp;
    123 
    124     if((zero_cols & 0xFFFFFFF0) == 0xFFFFFFF0)
    125         row_limit_2nd_stage = 4;
    126     else if((zero_cols & 0xFFFFFF00) == 0xFFFFFF00)
    127         row_limit_2nd_stage = 8;
    128     else
    129         row_limit_2nd_stage = TRANS_SIZE_32;
    130 
    131     if((zero_rows & 0xFFFFFFF0) == 0xFFFFFFF0)  /* First 4 rows of input are non-zero */
    132     {
    133         /************************************************************************************************/
    134         /**********************************START - IT_RECON_32x32****************************************/
    135         /************************************************************************************************/
    136         /* Inverse Transform 1st stage */
    137         shift = IT_SHIFT_STAGE_1;
    138         add = 1 << (shift - 1);
    139 
    140         for(j = 0; j < row_limit_2nd_stage; j++)
    141         {
    142             /* Checking for Zero Cols */
    143             if((zero_cols & 1) == 1)
    144             {
    145                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
    146             }
    147             else
    148             {
    149                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    150                 for(k = 0; k < 16; k++)
    151                 {
    152                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
    153                                     + g_ai2_ihevc_trans_32[3][k]
    154                                                     * pi2_src[3 * src_strd];
    155                 }
    156                 for(k = 0; k < 8; k++)
    157                 {
    158                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd];
    159                 }
    160 //                for(k = 0; k < 4; k++)
    161                 {
    162                     eeo[0] = 0;
    163                     eeo[1] = 0;
    164                     eeo[2] = 0;
    165                     eeo[3] = 0;
    166                 }
    167                 eeeo[0] = 0;
    168                 eeeo[1] = 0;
    169                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0];
    170                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0];
    171 
    172                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    173                 eee[0] = eeee[0] + eeeo[0];
    174                 eee[3] = eeee[0] - eeeo[0];
    175                 eee[1] = eeee[1] + eeeo[1];
    176                 eee[2] = eeee[1] - eeeo[1];
    177                 for(k = 0; k < 4; k++)
    178                 {
    179                     ee[k] = eee[k] + eeo[k];
    180                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    181                 }
    182                 for(k = 0; k < 8; k++)
    183                 {
    184                     e[k] = ee[k] + eo[k];
    185                     e[k + 8] = ee[7 - k] - eo[7 - k];
    186                 }
    187                 for(k = 0; k < 16; k++)
    188                 {
    189                     pi2_tmp[k] =
    190                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    191                     pi2_tmp[k + 16] =
    192                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    193                 }
    194             }
    195             pi2_src++;
    196             pi2_tmp += trans_size;
    197             zero_cols = zero_cols >> 1;
    198         }
    199 
    200         pi2_tmp = pi2_tmp_orig;
    201 
    202         /* Inverse Transform 2nd stage */
    203         shift = IT_SHIFT_STAGE_2;
    204         add = 1 << (shift - 1);
    205         if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
    206         {
    207             for(j = 0; j < trans_size; j++)
    208             {
    209                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    210                 for(k = 0; k < 16; k++)
    211                 {
    212                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
    213                                     + g_ai2_ihevc_trans_32[3][k]
    214                                                     * pi2_tmp[3 * trans_size];
    215                 }
    216                 for(k = 0; k < 8; k++)
    217                 {
    218                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
    219                 }
    220 //                for(k = 0; k < 4; k++)
    221                 {
    222                     eeo[0] = 0;
    223                     eeo[1] = 0;
    224                     eeo[2] = 0;
    225                     eeo[3] = 0;
    226                 }
    227                 eeeo[0] = 0;
    228                 eeeo[1] = 0;
    229                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
    230                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
    231 
    232                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    233                 eee[0] = eeee[0] + eeeo[0];
    234                 eee[3] = eeee[0] - eeeo[0];
    235                 eee[1] = eeee[1] + eeeo[1];
    236                 eee[2] = eeee[1] - eeeo[1];
    237                 for(k = 0; k < 4; k++)
    238                 {
    239                     ee[k] = eee[k] + eeo[k];
    240                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    241                 }
    242                 for(k = 0; k < 8; k++)
    243                 {
    244                     e[k] = ee[k] + eo[k];
    245                     e[k + 8] = ee[7 - k] - eo[7 - k];
    246                 }
    247                 for(k = 0; k < 16; k++)
    248                 {
    249                     WORD32 itrans_out;
    250                     itrans_out =
    251                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    252                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    253                     itrans_out =
    254                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    255                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
    256                 }
    257                 pi2_tmp++;
    258                 pu1_pred += pred_strd;
    259                 pu1_dst += dst_strd;
    260             }
    261         }
    262         else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
    263         {
    264             for(j = 0; j < trans_size; j++)
    265             {
    266                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    267                 for(k = 0; k < 16; k++)
    268                 {
    269                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
    270                                     + g_ai2_ihevc_trans_32[3][k]
    271                                                     * pi2_tmp[3 * trans_size]
    272                                     + g_ai2_ihevc_trans_32[5][k]
    273                                                     * pi2_tmp[5 * trans_size]
    274                                     + g_ai2_ihevc_trans_32[7][k]
    275                                                     * pi2_tmp[7 * trans_size];
    276                 }
    277                 for(k = 0; k < 8; k++)
    278                 {
    279                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
    280                                     + g_ai2_ihevc_trans_32[6][k]
    281                                                     * pi2_tmp[6 * trans_size];
    282                 }
    283                 for(k = 0; k < 4; k++)
    284                 {
    285                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
    286                 }
    287                 eeeo[0] = 0;
    288                 eeeo[1] = 0;
    289                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
    290                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
    291 
    292                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    293                 eee[0] = eeee[0] + eeeo[0];
    294                 eee[3] = eeee[0] - eeeo[0];
    295                 eee[1] = eeee[1] + eeeo[1];
    296                 eee[2] = eeee[1] - eeeo[1];
    297                 for(k = 0; k < 4; k++)
    298                 {
    299                     ee[k] = eee[k] + eeo[k];
    300                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    301                 }
    302                 for(k = 0; k < 8; k++)
    303                 {
    304                     e[k] = ee[k] + eo[k];
    305                     e[k + 8] = ee[7 - k] - eo[7 - k];
    306                 }
    307                 for(k = 0; k < 16; k++)
    308                 {
    309                     WORD32 itrans_out;
    310                     itrans_out =
    311                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    312                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    313                     itrans_out =
    314                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    315                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
    316                 }
    317                 pi2_tmp++;
    318                 pu1_pred += pred_strd;
    319                 pu1_dst += dst_strd;
    320             }
    321         }
    322         else /* All rows of output of 1st stage are non-zero */
    323         {
    324             for(j = 0; j < trans_size; j++)
    325             {
    326                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    327                 for(k = 0; k < 16; k++)
    328                 {
    329                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
    330                                     + g_ai2_ihevc_trans_32[3][k]
    331                                                     * pi2_tmp[3 * trans_size]
    332                                     + g_ai2_ihevc_trans_32[5][k]
    333                                                     * pi2_tmp[5 * trans_size]
    334                                     + g_ai2_ihevc_trans_32[7][k]
    335                                                     * pi2_tmp[7 * trans_size]
    336                                     + g_ai2_ihevc_trans_32[9][k]
    337                                                     * pi2_tmp[9 * trans_size]
    338                                     + g_ai2_ihevc_trans_32[11][k]
    339                                                     * pi2_tmp[11 * trans_size]
    340                                     + g_ai2_ihevc_trans_32[13][k]
    341                                                     * pi2_tmp[13 * trans_size]
    342                                     + g_ai2_ihevc_trans_32[15][k]
    343                                                     * pi2_tmp[15 * trans_size]
    344                                     + g_ai2_ihevc_trans_32[17][k]
    345                                                     * pi2_tmp[17 * trans_size]
    346                                     + g_ai2_ihevc_trans_32[19][k]
    347                                                     * pi2_tmp[19 * trans_size]
    348                                     + g_ai2_ihevc_trans_32[21][k]
    349                                                     * pi2_tmp[21 * trans_size]
    350                                     + g_ai2_ihevc_trans_32[23][k]
    351                                                     * pi2_tmp[23 * trans_size]
    352                                     + g_ai2_ihevc_trans_32[25][k]
    353                                                     * pi2_tmp[25 * trans_size]
    354                                     + g_ai2_ihevc_trans_32[27][k]
    355                                                     * pi2_tmp[27 * trans_size]
    356                                     + g_ai2_ihevc_trans_32[29][k]
    357                                                     * pi2_tmp[29 * trans_size]
    358                                     + g_ai2_ihevc_trans_32[31][k]
    359                                                     * pi2_tmp[31 * trans_size];
    360                 }
    361                 for(k = 0; k < 8; k++)
    362                 {
    363                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
    364                                     + g_ai2_ihevc_trans_32[6][k]
    365                                                     * pi2_tmp[6 * trans_size]
    366                                     + g_ai2_ihevc_trans_32[10][k]
    367                                                     * pi2_tmp[10 * trans_size]
    368                                     + g_ai2_ihevc_trans_32[14][k]
    369                                                     * pi2_tmp[14 * trans_size]
    370                                     + g_ai2_ihevc_trans_32[18][k]
    371                                                     * pi2_tmp[18 * trans_size]
    372                                     + g_ai2_ihevc_trans_32[22][k]
    373                                                     * pi2_tmp[22 * trans_size]
    374                                     + g_ai2_ihevc_trans_32[26][k]
    375                                                     * pi2_tmp[26 * trans_size]
    376                                     + g_ai2_ihevc_trans_32[30][k]
    377                                                     * pi2_tmp[30 * trans_size];
    378                 }
    379                 for(k = 0; k < 4; k++)
    380                 {
    381                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
    382                                     + g_ai2_ihevc_trans_32[12][k]
    383                                                     * pi2_tmp[12 * trans_size]
    384                                     + g_ai2_ihevc_trans_32[20][k]
    385                                                     * pi2_tmp[20 * trans_size]
    386                                     + g_ai2_ihevc_trans_32[28][k]
    387                                                     * pi2_tmp[28 * trans_size];
    388                 }
    389                 eeeo[0] =
    390                                 g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
    391                                                 + g_ai2_ihevc_trans_32[24][0]
    392                                                                 * pi2_tmp[24
    393                                                                                 * trans_size];
    394                 eeeo[1] =
    395                                 g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
    396                                                 + g_ai2_ihevc_trans_32[24][1]
    397                                                                 * pi2_tmp[24
    398                                                                                 * trans_size];
    399                 eeee[0] =
    400                                 g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
    401                                                 + g_ai2_ihevc_trans_32[16][0]
    402                                                                 * pi2_tmp[16
    403                                                                                 * trans_size];
    404                 eeee[1] =
    405                                 g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
    406                                                 + g_ai2_ihevc_trans_32[16][1]
    407                                                                 * pi2_tmp[16
    408                                                                                 * trans_size];
    409 
    410                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    411                 eee[0] = eeee[0] + eeeo[0];
    412                 eee[3] = eeee[0] - eeeo[0];
    413                 eee[1] = eeee[1] + eeeo[1];
    414                 eee[2] = eeee[1] - eeeo[1];
    415                 for(k = 0; k < 4; k++)
    416                 {
    417                     ee[k] = eee[k] + eeo[k];
    418                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    419                 }
    420                 for(k = 0; k < 8; k++)
    421                 {
    422                     e[k] = ee[k] + eo[k];
    423                     e[k + 8] = ee[7 - k] - eo[7 - k];
    424                 }
    425                 for(k = 0; k < 16; k++)
    426                 {
    427                     WORD32 itrans_out;
    428                     itrans_out =
    429                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    430                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    431                     itrans_out =
    432                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    433                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
    434                 }
    435                 pi2_tmp++;
    436                 pu1_pred += pred_strd;
    437                 pu1_dst += dst_strd;
    438             }
    439         }
    440         /************************************************************************************************/
    441         /************************************END - IT_RECON_32x32****************************************/
    442         /************************************************************************************************/
    443     }
    444     else if((zero_rows & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of input are non-zero */
    445     {
    446         /************************************************************************************************/
    447         /**********************************START - IT_RECON_32x32****************************************/
    448         /************************************************************************************************/
    449         /* Inverse Transform 1st stage */
    450         shift = IT_SHIFT_STAGE_1;
    451         add = 1 << (shift - 1);
    452 
    453         for(j = 0; j < row_limit_2nd_stage; j++)
    454         {
    455             /* Checking for Zero Cols */
    456             if((zero_cols & 1) == 1)
    457             {
    458                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
    459             }
    460             else
    461             {
    462                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    463                 for(k = 0; k < 16; k++)
    464                 {
    465                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
    466                                     + g_ai2_ihevc_trans_32[3][k]
    467                                                     * pi2_src[3 * src_strd]
    468                                     + g_ai2_ihevc_trans_32[5][k]
    469                                                     * pi2_src[5 * src_strd]
    470                                     + g_ai2_ihevc_trans_32[7][k]
    471                                                     * pi2_src[7 * src_strd];
    472                 }
    473                 for(k = 0; k < 8; k++)
    474                 {
    475                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]
    476                                     + g_ai2_ihevc_trans_32[6][k]
    477                                                     * pi2_src[6 * src_strd];
    478                 }
    479                 for(k = 0; k < 4; k++)
    480                 {
    481                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd];
    482                 }
    483                 eeeo[0] = 0;
    484                 eeeo[1] = 0;
    485                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0];
    486                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0];
    487 
    488                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    489                 eee[0] = eeee[0] + eeeo[0];
    490                 eee[3] = eeee[0] - eeeo[0];
    491                 eee[1] = eeee[1] + eeeo[1];
    492                 eee[2] = eeee[1] - eeeo[1];
    493                 for(k = 0; k < 4; k++)
    494                 {
    495                     ee[k] = eee[k] + eeo[k];
    496                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    497                 }
    498                 for(k = 0; k < 8; k++)
    499                 {
    500                     e[k] = ee[k] + eo[k];
    501                     e[k + 8] = ee[7 - k] - eo[7 - k];
    502                 }
    503                 for(k = 0; k < 16; k++)
    504                 {
    505                     pi2_tmp[k] =
    506                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    507                     pi2_tmp[k + 16] =
    508                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    509                 }
    510             }
    511             pi2_src++;
    512             pi2_tmp += trans_size;
    513             zero_cols = zero_cols >> 1;
    514         }
    515 
    516         pi2_tmp = pi2_tmp_orig;
    517 
    518         /* Inverse Transform 2nd stage */
    519         shift = IT_SHIFT_STAGE_2;
    520         add = 1 << (shift - 1);
    521         if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
    522         {
    523             for(j = 0; j < trans_size; j++)
    524             {
    525                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    526                 for(k = 0; k < 16; k++)
    527                 {
    528                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
    529                                     + g_ai2_ihevc_trans_32[3][k]
    530                                                     * pi2_tmp[3 * trans_size];
    531                 }
    532                 for(k = 0; k < 8; k++)
    533                 {
    534                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
    535                 }
    536 //                for(k = 0; k < 4; k++)
    537                 {
    538                     eeo[0] = 0;
    539                     eeo[1] = 0;
    540                     eeo[2] = 0;
    541                     eeo[3] = 0;
    542                 }
    543                 eeeo[0] = 0;
    544                 eeeo[1] = 0;
    545                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
    546                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
    547 
    548                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    549                 eee[0] = eeee[0] + eeeo[0];
    550                 eee[3] = eeee[0] - eeeo[0];
    551                 eee[1] = eeee[1] + eeeo[1];
    552                 eee[2] = eeee[1] - eeeo[1];
    553                 for(k = 0; k < 4; k++)
    554                 {
    555                     ee[k] = eee[k] + eeo[k];
    556                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    557                 }
    558                 for(k = 0; k < 8; k++)
    559                 {
    560                     e[k] = ee[k] + eo[k];
    561                     e[k + 8] = ee[7 - k] - eo[7 - k];
    562                 }
    563                 for(k = 0; k < 16; k++)
    564                 {
    565                     WORD32 itrans_out;
    566                     itrans_out =
    567                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    568                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    569                     itrans_out =
    570                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    571                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
    572                 }
    573                 pi2_tmp++;
    574                 pu1_pred += pred_strd;
    575                 pu1_dst += dst_strd;
    576             }
    577         }
    578         else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
    579         {
    580             for(j = 0; j < trans_size; j++)
    581             {
    582                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    583                 for(k = 0; k < 16; k++)
    584                 {
    585                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
    586                                     + g_ai2_ihevc_trans_32[3][k]
    587                                                     * pi2_tmp[3 * trans_size]
    588                                     + g_ai2_ihevc_trans_32[5][k]
    589                                                     * pi2_tmp[5 * trans_size]
    590                                     + g_ai2_ihevc_trans_32[7][k]
    591                                                     * pi2_tmp[7 * trans_size];
    592                 }
    593                 for(k = 0; k < 8; k++)
    594                 {
    595                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
    596                                     + g_ai2_ihevc_trans_32[6][k]
    597                                                     * pi2_tmp[6 * trans_size];
    598                 }
    599                 for(k = 0; k < 4; k++)
    600                 {
    601                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
    602                 }
    603                 eeeo[0] = 0;
    604                 eeeo[1] = 0;
    605                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
    606                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
    607 
    608                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    609                 eee[0] = eeee[0] + eeeo[0];
    610                 eee[3] = eeee[0] - eeeo[0];
    611                 eee[1] = eeee[1] + eeeo[1];
    612                 eee[2] = eeee[1] - eeeo[1];
    613                 for(k = 0; k < 4; k++)
    614                 {
    615                     ee[k] = eee[k] + eeo[k];
    616                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    617                 }
    618                 for(k = 0; k < 8; k++)
    619                 {
    620                     e[k] = ee[k] + eo[k];
    621                     e[k + 8] = ee[7 - k] - eo[7 - k];
    622                 }
    623                 for(k = 0; k < 16; k++)
    624                 {
    625                     WORD32 itrans_out;
    626                     itrans_out =
    627                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    628                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    629                     itrans_out =
    630                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    631                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
    632                 }
    633                 pi2_tmp++;
    634                 pu1_pred += pred_strd;
    635                 pu1_dst += dst_strd;
    636             }
    637         }
    638         else /* All rows of output of 1st stage are non-zero */
    639         {
    640             for(j = 0; j < trans_size; j++)
    641             {
    642                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    643                 for(k = 0; k < 16; k++)
    644                 {
    645                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
    646                                     + g_ai2_ihevc_trans_32[3][k]
    647                                                     * pi2_tmp[3 * trans_size]
    648                                     + g_ai2_ihevc_trans_32[5][k]
    649                                                     * pi2_tmp[5 * trans_size]
    650                                     + g_ai2_ihevc_trans_32[7][k]
    651                                                     * pi2_tmp[7 * trans_size]
    652                                     + g_ai2_ihevc_trans_32[9][k]
    653                                                     * pi2_tmp[9 * trans_size]
    654                                     + g_ai2_ihevc_trans_32[11][k]
    655                                                     * pi2_tmp[11 * trans_size]
    656                                     + g_ai2_ihevc_trans_32[13][k]
    657                                                     * pi2_tmp[13 * trans_size]
    658                                     + g_ai2_ihevc_trans_32[15][k]
    659                                                     * pi2_tmp[15 * trans_size]
    660                                     + g_ai2_ihevc_trans_32[17][k]
    661                                                     * pi2_tmp[17 * trans_size]
    662                                     + g_ai2_ihevc_trans_32[19][k]
    663                                                     * pi2_tmp[19 * trans_size]
    664                                     + g_ai2_ihevc_trans_32[21][k]
    665                                                     * pi2_tmp[21 * trans_size]
    666                                     + g_ai2_ihevc_trans_32[23][k]
    667                                                     * pi2_tmp[23 * trans_size]
    668                                     + g_ai2_ihevc_trans_32[25][k]
    669                                                     * pi2_tmp[25 * trans_size]
    670                                     + g_ai2_ihevc_trans_32[27][k]
    671                                                     * pi2_tmp[27 * trans_size]
    672                                     + g_ai2_ihevc_trans_32[29][k]
    673                                                     * pi2_tmp[29 * trans_size]
    674                                     + g_ai2_ihevc_trans_32[31][k]
    675                                                     * pi2_tmp[31 * trans_size];
    676                 }
    677                 for(k = 0; k < 8; k++)
    678                 {
    679                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
    680                                     + g_ai2_ihevc_trans_32[6][k]
    681                                                     * pi2_tmp[6 * trans_size]
    682                                     + g_ai2_ihevc_trans_32[10][k]
    683                                                     * pi2_tmp[10 * trans_size]
    684                                     + g_ai2_ihevc_trans_32[14][k]
    685                                                     * pi2_tmp[14 * trans_size]
    686                                     + g_ai2_ihevc_trans_32[18][k]
    687                                                     * pi2_tmp[18 * trans_size]
    688                                     + g_ai2_ihevc_trans_32[22][k]
    689                                                     * pi2_tmp[22 * trans_size]
    690                                     + g_ai2_ihevc_trans_32[26][k]
    691                                                     * pi2_tmp[26 * trans_size]
    692                                     + g_ai2_ihevc_trans_32[30][k]
    693                                                     * pi2_tmp[30 * trans_size];
    694                 }
    695                 for(k = 0; k < 4; k++)
    696                 {
    697                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
    698                                     + g_ai2_ihevc_trans_32[12][k]
    699                                                     * pi2_tmp[12 * trans_size]
    700                                     + g_ai2_ihevc_trans_32[20][k]
    701                                                     * pi2_tmp[20 * trans_size]
    702                                     + g_ai2_ihevc_trans_32[28][k]
    703                                                     * pi2_tmp[28 * trans_size];
    704                 }
    705                 eeeo[0] =
    706                                 g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
    707                                                 + g_ai2_ihevc_trans_32[24][0]
    708                                                                 * pi2_tmp[24
    709                                                                                 * trans_size];
    710                 eeeo[1] =
    711                                 g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
    712                                                 + g_ai2_ihevc_trans_32[24][1]
    713                                                                 * pi2_tmp[24
    714                                                                                 * trans_size];
    715                 eeee[0] =
    716                                 g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
    717                                                 + g_ai2_ihevc_trans_32[16][0]
    718                                                                 * pi2_tmp[16
    719                                                                                 * trans_size];
    720                 eeee[1] =
    721                                 g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
    722                                                 + g_ai2_ihevc_trans_32[16][1]
    723                                                                 * pi2_tmp[16
    724                                                                                 * trans_size];
    725 
    726                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    727                 eee[0] = eeee[0] + eeeo[0];
    728                 eee[3] = eeee[0] - eeeo[0];
    729                 eee[1] = eeee[1] + eeeo[1];
    730                 eee[2] = eeee[1] - eeeo[1];
    731                 for(k = 0; k < 4; k++)
    732                 {
    733                     ee[k] = eee[k] + eeo[k];
    734                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    735                 }
    736                 for(k = 0; k < 8; k++)
    737                 {
    738                     e[k] = ee[k] + eo[k];
    739                     e[k + 8] = ee[7 - k] - eo[7 - k];
    740                 }
    741                 for(k = 0; k < 16; k++)
    742                 {
    743                     WORD32 itrans_out;
    744                     itrans_out =
    745                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    746                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    747                     itrans_out =
    748                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    749                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
    750                 }
    751                 pi2_tmp++;
    752                 pu1_pred += pred_strd;
    753                 pu1_dst += dst_strd;
    754             }
    755         }
    756         /************************************************************************************************/
    757         /************************************END - IT_RECON_32x32****************************************/
    758         /************************************************************************************************/
    759     }
    760     else  /* All rows of input are non-zero */
    761     {
    762         /************************************************************************************************/
    763         /**********************************START - IT_RECON_32x32****************************************/
    764         /************************************************************************************************/
    765         /* Inverse Transform 1st stage */
    766         shift = IT_SHIFT_STAGE_1;
    767         add = 1 << (shift - 1);
    768 
    769         for(j = 0; j < row_limit_2nd_stage; j++)
    770         {
    771             /* Checking for Zero Cols */
    772             if((zero_cols & 1) == 1)
    773             {
    774                 memset(pi2_tmp, 0, trans_size * sizeof(WORD16));
    775             }
    776             else
    777             {
    778                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    779                 for(k = 0; k < 16; k++)
    780                 {
    781                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_src[src_strd]
    782                                     + g_ai2_ihevc_trans_32[3][k]
    783                                                     * pi2_src[3 * src_strd]
    784                                     + g_ai2_ihevc_trans_32[5][k]
    785                                                     * pi2_src[5 * src_strd]
    786                                     + g_ai2_ihevc_trans_32[7][k]
    787                                                     * pi2_src[7 * src_strd]
    788                                     + g_ai2_ihevc_trans_32[9][k]
    789                                                     * pi2_src[9 * src_strd]
    790                                     + g_ai2_ihevc_trans_32[11][k]
    791                                                     * pi2_src[11 * src_strd]
    792                                     + g_ai2_ihevc_trans_32[13][k]
    793                                                     * pi2_src[13 * src_strd]
    794                                     + g_ai2_ihevc_trans_32[15][k]
    795                                                     * pi2_src[15 * src_strd]
    796                                     + g_ai2_ihevc_trans_32[17][k]
    797                                                     * pi2_src[17 * src_strd]
    798                                     + g_ai2_ihevc_trans_32[19][k]
    799                                                     * pi2_src[19 * src_strd]
    800                                     + g_ai2_ihevc_trans_32[21][k]
    801                                                     * pi2_src[21 * src_strd]
    802                                     + g_ai2_ihevc_trans_32[23][k]
    803                                                     * pi2_src[23 * src_strd]
    804                                     + g_ai2_ihevc_trans_32[25][k]
    805                                                     * pi2_src[25 * src_strd]
    806                                     + g_ai2_ihevc_trans_32[27][k]
    807                                                     * pi2_src[27 * src_strd]
    808                                     + g_ai2_ihevc_trans_32[29][k]
    809                                                     * pi2_src[29 * src_strd]
    810                                     + g_ai2_ihevc_trans_32[31][k]
    811                                                     * pi2_src[31 * src_strd];
    812                 }
    813                 for(k = 0; k < 8; k++)
    814                 {
    815                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_src[2 * src_strd]
    816                                     + g_ai2_ihevc_trans_32[6][k]
    817                                                     * pi2_src[6 * src_strd]
    818                                     + g_ai2_ihevc_trans_32[10][k]
    819                                                     * pi2_src[10 * src_strd]
    820                                     + g_ai2_ihevc_trans_32[14][k]
    821                                                     * pi2_src[14 * src_strd]
    822                                     + g_ai2_ihevc_trans_32[18][k]
    823                                                     * pi2_src[18 * src_strd]
    824                                     + g_ai2_ihevc_trans_32[22][k]
    825                                                     * pi2_src[22 * src_strd]
    826                                     + g_ai2_ihevc_trans_32[26][k]
    827                                                     * pi2_src[26 * src_strd]
    828                                     + g_ai2_ihevc_trans_32[30][k]
    829                                                     * pi2_src[30 * src_strd];
    830                 }
    831                 for(k = 0; k < 4; k++)
    832                 {
    833                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_src[4 * src_strd]
    834                                     + g_ai2_ihevc_trans_32[12][k]
    835                                                     * pi2_src[12 * src_strd]
    836                                     + g_ai2_ihevc_trans_32[20][k]
    837                                                     * pi2_src[20 * src_strd]
    838                                     + g_ai2_ihevc_trans_32[28][k]
    839                                                     * pi2_src[28 * src_strd];
    840                 }
    841                 eeeo[0] = g_ai2_ihevc_trans_32[8][0] * pi2_src[8 * src_strd]
    842                                 + g_ai2_ihevc_trans_32[24][0]
    843                                                 * pi2_src[24 * src_strd];
    844                 eeeo[1] = g_ai2_ihevc_trans_32[8][1] * pi2_src[8 * src_strd]
    845                                 + g_ai2_ihevc_trans_32[24][1]
    846                                                 * pi2_src[24 * src_strd];
    847                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_src[0]
    848                                 + g_ai2_ihevc_trans_32[16][0]
    849                                                 * pi2_src[16 * src_strd];
    850                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_src[0]
    851                                 + g_ai2_ihevc_trans_32[16][1]
    852                                                 * pi2_src[16 * src_strd];
    853 
    854                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    855                 eee[0] = eeee[0] + eeeo[0];
    856                 eee[3] = eeee[0] - eeeo[0];
    857                 eee[1] = eeee[1] + eeeo[1];
    858                 eee[2] = eeee[1] - eeeo[1];
    859                 for(k = 0; k < 4; k++)
    860                 {
    861                     ee[k] = eee[k] + eeo[k];
    862                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    863                 }
    864                 for(k = 0; k < 8; k++)
    865                 {
    866                     e[k] = ee[k] + eo[k];
    867                     e[k + 8] = ee[7 - k] - eo[7 - k];
    868                 }
    869                 for(k = 0; k < 16; k++)
    870                 {
    871                     pi2_tmp[k] =
    872                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    873                     pi2_tmp[k + 16] =
    874                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    875                 }
    876             }
    877             pi2_src++;
    878             pi2_tmp += trans_size;
    879             zero_cols = zero_cols >> 1;
    880         }
    881 
    882         pi2_tmp = pi2_tmp_orig;
    883 
    884         /* Inverse Transform 2nd stage */
    885         shift = IT_SHIFT_STAGE_2;
    886         add = 1 << (shift - 1);
    887         if((zero_rows_2nd_stage & 0xFFFFFFF0) == 0xFFFFFFF0) /* First 4 rows of output of 1st stage are non-zero */
    888         {
    889             for(j = 0; j < trans_size; j++)
    890             {
    891                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    892                 for(k = 0; k < 16; k++)
    893                 {
    894                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
    895                                     + g_ai2_ihevc_trans_32[3][k]
    896                                                     * pi2_tmp[3 * trans_size];
    897                 }
    898                 for(k = 0; k < 8; k++)
    899                 {
    900                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size];
    901                 }
    902 //                for(k = 0; k < 4; k++)
    903                 {
    904                     eeo[0] = 0;
    905                     eeo[1] = 0;
    906                     eeo[2] = 0;
    907                     eeo[3] = 0;
    908                 }
    909                 eeeo[0] = 0;
    910                 eeeo[1] = 0;
    911                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
    912                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
    913 
    914                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    915                 eee[0] = eeee[0] + eeeo[0];
    916                 eee[3] = eeee[0] - eeeo[0];
    917                 eee[1] = eeee[1] + eeeo[1];
    918                 eee[2] = eeee[1] - eeeo[1];
    919                 for(k = 0; k < 4; k++)
    920                 {
    921                     ee[k] = eee[k] + eeo[k];
    922                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    923                 }
    924                 for(k = 0; k < 8; k++)
    925                 {
    926                     e[k] = ee[k] + eo[k];
    927                     e[k + 8] = ee[7 - k] - eo[7 - k];
    928                 }
    929                 for(k = 0; k < 16; k++)
    930                 {
    931                     WORD32 itrans_out;
    932                     itrans_out =
    933                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    934                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    935                     itrans_out =
    936                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    937                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
    938                 }
    939                 pi2_tmp++;
    940                 pu1_pred += pred_strd;
    941                 pu1_dst += dst_strd;
    942             }
    943         }
    944         else if((zero_rows_2nd_stage & 0xFFFFFF00) == 0xFFFFFF00) /* First 8 rows of output of 1st stage are non-zero */
    945         {
    946             for(j = 0; j < trans_size; j++)
    947             {
    948                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
    949                 for(k = 0; k < 16; k++)
    950                 {
    951                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
    952                                     + g_ai2_ihevc_trans_32[3][k]
    953                                                     * pi2_tmp[3 * trans_size]
    954                                     + g_ai2_ihevc_trans_32[5][k]
    955                                                     * pi2_tmp[5 * trans_size]
    956                                     + g_ai2_ihevc_trans_32[7][k]
    957                                                     * pi2_tmp[7 * trans_size];
    958                 }
    959                 for(k = 0; k < 8; k++)
    960                 {
    961                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
    962                                     + g_ai2_ihevc_trans_32[6][k]
    963                                                     * pi2_tmp[6 * trans_size];
    964                 }
    965                 for(k = 0; k < 4; k++)
    966                 {
    967                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size];
    968                 }
    969                 eeeo[0] = 0;
    970                 eeeo[1] = 0;
    971                 eeee[0] = g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0];
    972                 eeee[1] = g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0];
    973 
    974                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
    975                 eee[0] = eeee[0] + eeeo[0];
    976                 eee[3] = eeee[0] - eeeo[0];
    977                 eee[1] = eeee[1] + eeeo[1];
    978                 eee[2] = eeee[1] - eeeo[1];
    979                 for(k = 0; k < 4; k++)
    980                 {
    981                     ee[k] = eee[k] + eeo[k];
    982                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
    983                 }
    984                 for(k = 0; k < 8; k++)
    985                 {
    986                     e[k] = ee[k] + eo[k];
    987                     e[k + 8] = ee[7 - k] - eo[7 - k];
    988                 }
    989                 for(k = 0; k < 16; k++)
    990                 {
    991                     WORD32 itrans_out;
    992                     itrans_out =
    993                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
    994                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
    995                     itrans_out =
    996                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
    997                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
    998                 }
    999                 pi2_tmp++;
   1000                 pu1_pred += pred_strd;
   1001                 pu1_dst += dst_strd;
   1002             }
   1003         }
   1004         else /* All rows of output of 1st stage are non-zero */
   1005         {
   1006             for(j = 0; j < trans_size; j++)
   1007             {
   1008                 /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
   1009                 for(k = 0; k < 16; k++)
   1010                 {
   1011                     o[k] = g_ai2_ihevc_trans_32[1][k] * pi2_tmp[trans_size]
   1012                                     + g_ai2_ihevc_trans_32[3][k]
   1013                                                     * pi2_tmp[3 * trans_size]
   1014                                     + g_ai2_ihevc_trans_32[5][k]
   1015                                                     * pi2_tmp[5 * trans_size]
   1016                                     + g_ai2_ihevc_trans_32[7][k]
   1017                                                     * pi2_tmp[7 * trans_size]
   1018                                     + g_ai2_ihevc_trans_32[9][k]
   1019                                                     * pi2_tmp[9 * trans_size]
   1020                                     + g_ai2_ihevc_trans_32[11][k]
   1021                                                     * pi2_tmp[11 * trans_size]
   1022                                     + g_ai2_ihevc_trans_32[13][k]
   1023                                                     * pi2_tmp[13 * trans_size]
   1024                                     + g_ai2_ihevc_trans_32[15][k]
   1025                                                     * pi2_tmp[15 * trans_size]
   1026                                     + g_ai2_ihevc_trans_32[17][k]
   1027                                                     * pi2_tmp[17 * trans_size]
   1028                                     + g_ai2_ihevc_trans_32[19][k]
   1029                                                     * pi2_tmp[19 * trans_size]
   1030                                     + g_ai2_ihevc_trans_32[21][k]
   1031                                                     * pi2_tmp[21 * trans_size]
   1032                                     + g_ai2_ihevc_trans_32[23][k]
   1033                                                     * pi2_tmp[23 * trans_size]
   1034                                     + g_ai2_ihevc_trans_32[25][k]
   1035                                                     * pi2_tmp[25 * trans_size]
   1036                                     + g_ai2_ihevc_trans_32[27][k]
   1037                                                     * pi2_tmp[27 * trans_size]
   1038                                     + g_ai2_ihevc_trans_32[29][k]
   1039                                                     * pi2_tmp[29 * trans_size]
   1040                                     + g_ai2_ihevc_trans_32[31][k]
   1041                                                     * pi2_tmp[31 * trans_size];
   1042                 }
   1043                 for(k = 0; k < 8; k++)
   1044                 {
   1045                     eo[k] = g_ai2_ihevc_trans_32[2][k] * pi2_tmp[2 * trans_size]
   1046                                     + g_ai2_ihevc_trans_32[6][k]
   1047                                                     * pi2_tmp[6 * trans_size]
   1048                                     + g_ai2_ihevc_trans_32[10][k]
   1049                                                     * pi2_tmp[10 * trans_size]
   1050                                     + g_ai2_ihevc_trans_32[14][k]
   1051                                                     * pi2_tmp[14 * trans_size]
   1052                                     + g_ai2_ihevc_trans_32[18][k]
   1053                                                     * pi2_tmp[18 * trans_size]
   1054                                     + g_ai2_ihevc_trans_32[22][k]
   1055                                                     * pi2_tmp[22 * trans_size]
   1056                                     + g_ai2_ihevc_trans_32[26][k]
   1057                                                     * pi2_tmp[26 * trans_size]
   1058                                     + g_ai2_ihevc_trans_32[30][k]
   1059                                                     * pi2_tmp[30 * trans_size];
   1060                 }
   1061                 for(k = 0; k < 4; k++)
   1062                 {
   1063                     eeo[k] = g_ai2_ihevc_trans_32[4][k] * pi2_tmp[4 * trans_size]
   1064                                     + g_ai2_ihevc_trans_32[12][k]
   1065                                                     * pi2_tmp[12 * trans_size]
   1066                                     + g_ai2_ihevc_trans_32[20][k]
   1067                                                     * pi2_tmp[20 * trans_size]
   1068                                     + g_ai2_ihevc_trans_32[28][k]
   1069                                                     * pi2_tmp[28 * trans_size];
   1070                 }
   1071                 eeeo[0] =
   1072                                 g_ai2_ihevc_trans_32[8][0] * pi2_tmp[8 * trans_size]
   1073                                                 + g_ai2_ihevc_trans_32[24][0]
   1074                                                                 * pi2_tmp[24
   1075                                                                                 * trans_size];
   1076                 eeeo[1] =
   1077                                 g_ai2_ihevc_trans_32[8][1] * pi2_tmp[8 * trans_size]
   1078                                                 + g_ai2_ihevc_trans_32[24][1]
   1079                                                                 * pi2_tmp[24
   1080                                                                                 * trans_size];
   1081                 eeee[0] =
   1082                                 g_ai2_ihevc_trans_32[0][0] * pi2_tmp[0]
   1083                                                 + g_ai2_ihevc_trans_32[16][0]
   1084                                                                 * pi2_tmp[16
   1085                                                                                 * trans_size];
   1086                 eeee[1] =
   1087                                 g_ai2_ihevc_trans_32[0][1] * pi2_tmp[0]
   1088                                                 + g_ai2_ihevc_trans_32[16][1]
   1089                                                                 * pi2_tmp[16
   1090                                                                                 * trans_size];
   1091 
   1092                 /* Combining e and o terms at each hierarchy levels to calculate the final spatial domain vector */
   1093                 eee[0] = eeee[0] + eeeo[0];
   1094                 eee[3] = eeee[0] - eeeo[0];
   1095                 eee[1] = eeee[1] + eeeo[1];
   1096                 eee[2] = eeee[1] - eeeo[1];
   1097                 for(k = 0; k < 4; k++)
   1098                 {
   1099                     ee[k] = eee[k] + eeo[k];
   1100                     ee[k + 4] = eee[3 - k] - eeo[3 - k];
   1101                 }
   1102                 for(k = 0; k < 8; k++)
   1103                 {
   1104                     e[k] = ee[k] + eo[k];
   1105                     e[k + 8] = ee[7 - k] - eo[7 - k];
   1106                 }
   1107                 for(k = 0; k < 16; k++)
   1108                 {
   1109                     WORD32 itrans_out;
   1110                     itrans_out =
   1111                                     CLIP_S16(((e[k] + o[k] + add) >> shift));
   1112                     pu1_dst[k] = CLIP_U8((itrans_out + pu1_pred[k]));
   1113                     itrans_out =
   1114                                     CLIP_S16(((e[15 - k] - o[15 - k] + add) >> shift));
   1115                     pu1_dst[k + 16] = CLIP_U8((itrans_out + pu1_pred[k + 16]));
   1116                 }
   1117                 pi2_tmp++;
   1118                 pu1_pred += pred_strd;
   1119                 pu1_dst += dst_strd;
   1120             }
   1121         }
   1122         /************************************************************************************************/
   1123         /************************************END - IT_RECON_32x32****************************************/
   1124         /************************************************************************************************/
   1125     }
   1126 }
   1127 
   1128