Home | History | Annotate | Download | only in common
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19 *******************************************************************************
     20 * @file
     21 *  ihevc_sao.c
     22 *
     23 * @brief
     24 *  Contains leaf level function definitions for sample adaptive offset process
     25 *
     26 * @author
     27 *  Srinivas T
     28 *
     29 * @par List of Functions:
     30 *   - ihevc_sao_band_offset_luma()
     31 *   - ihevc_sao_band_offset_chroma()
     32 *   - ihevc_sao_edge_offset_class0()
     33 *   - ihevc_sao_edge_offset_class0_chroma()
     34 *   - ihevc_sao_edge_offset_class1()
     35 *   - ihevc_sao_edge_offset_class1_chroma()
     36 *   - ihevc_sao_edge_offset_class2()
     37 *   - ihevc_sao_edge_offset_class2_chroma()
     38 *   - ihevc_sao_edge_offset_class3()
     39 *   - ihevc_sao_edge_offset_class3_chroma()
     40 * @remarks
     41 *  None
     42 *
     43 *******************************************************************************
     44 */
     45 #include <stdlib.h>
     46 #include <assert.h>
     47 #include <string.h>
     48 #include "ihevc_typedefs.h"
     49 #include "ihevc_macros.h"
     50 #include "ihevc_platform_macros.h"
     51 #include "ihevc_func_selector.h"
     52 #include "ihevc_defs.h"
     53 #include "ihevc_structs.h"
     54 #include "ihevc_sao.h"
     55 
     56 #define NUM_BAND_TABLE  32
     57 
     58 const WORD32 gi4_ihevc_table_edge_idx[5] = { 1, 2, 0, 3, 4 };
     59 /**
     60  * au4_avail is an array of flags - one for each neighboring block specifying if the block is available
     61  * au4_avail[0] - left
     62  * au4_avail[1] - right
     63  * au4_avail[2] - top
     64  * au4_avail[3] - bottom
     65  * au4_avail[4] - top-left
     66  * au4_avail[5] - top-right
     67  * au4_avail[6] - bottom-left
     68  * au4_avail[7] - bottom-right
     69  */
     70 
     71 
     72 void ihevc_sao_band_offset_luma(UWORD8 *pu1_src,
     73                                 WORD32 src_strd,
     74                                 UWORD8 *pu1_src_left,
     75                                 UWORD8 *pu1_src_top,
     76                                 UWORD8 *pu1_src_top_left,
     77                                 WORD32 sao_band_pos,
     78                                 WORD8 *pi1_sao_offset,
     79                                 WORD32 wd,
     80                                 WORD32 ht)
     81 {
     82     WORD32 band_shift;
     83     WORD32 band_table[NUM_BAND_TABLE];
     84     WORD32 i;
     85     WORD32 row, col;
     86 
     87     /* Updating left and top and top-left */
     88     for(row = 0; row < ht; row++)
     89     {
     90         pu1_src_left[row] = pu1_src[row * src_strd + (wd - 1)];
     91     }
     92     pu1_src_top_left[0] = pu1_src_top[wd - 1];
     93     for(col = 0; col < wd; col++)
     94     {
     95         pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
     96     }
     97 
     98     band_shift = BIT_DEPTH_LUMA - 5;
     99     for(i = 0; i < NUM_BAND_TABLE; i++)
    100     {
    101         band_table[i] = 0;
    102     }
    103     for(i = 0; i < 4; i++)
    104     {
    105         band_table[(i + sao_band_pos) & 31] = i + 1;
    106     }
    107 
    108     for(row = 0; row < ht; row++)
    109     {
    110         for(col = 0; col < wd; col++)
    111         {
    112             WORD32 band_idx;
    113 
    114             band_idx = band_table[pu1_src[col] >> band_shift];
    115             pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
    116         }
    117         pu1_src += src_strd;
    118     }
    119 }
    120 
    121 
    122 
    123 /* input 'wd' has to be for the interleaved block and not for each color component */
    124 void ihevc_sao_band_offset_chroma(UWORD8 *pu1_src,
    125                                   WORD32 src_strd,
    126                                   UWORD8 *pu1_src_left,
    127                                   UWORD8 *pu1_src_top,
    128                                   UWORD8 *pu1_src_top_left,
    129                                   WORD32 sao_band_pos_u,
    130                                   WORD32 sao_band_pos_v,
    131                                   WORD8 *pi1_sao_offset_u,
    132                                   WORD8 *pi1_sao_offset_v,
    133                                   WORD32 wd,
    134                                   WORD32 ht)
    135 {
    136     WORD32 band_shift;
    137     WORD32 band_table_u[NUM_BAND_TABLE];
    138     WORD32 band_table_v[NUM_BAND_TABLE];
    139     WORD32 i;
    140     WORD32 row, col;
    141 
    142     /* Updating left and top and top-left */
    143     for(row = 0; row < ht; row++)
    144     {
    145         pu1_src_left[2 * row] = pu1_src[row * src_strd + (wd - 2)];
    146         pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + (wd - 1)];
    147     }
    148     pu1_src_top_left[0] = pu1_src_top[wd - 2];
    149     pu1_src_top_left[1] = pu1_src_top[wd - 1];
    150     for(col = 0; col < wd; col++)
    151     {
    152         pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
    153     }
    154 
    155 
    156     band_shift = BIT_DEPTH_CHROMA - 5;
    157     for(i = 0; i < NUM_BAND_TABLE; i++)
    158     {
    159         band_table_u[i] = 0;
    160         band_table_v[i] = 0;
    161     }
    162     for(i = 0; i < 4; i++)
    163     {
    164         band_table_u[(i + sao_band_pos_u) & 31] = i + 1;
    165         band_table_v[(i + sao_band_pos_v) & 31] = i + 1;
    166     }
    167 
    168     for(row = 0; row < ht; row++)
    169     {
    170         for(col = 0; col < wd; col++)
    171         {
    172             WORD32 band_idx;
    173             WORD8 *pi1_sao_offset;
    174 
    175             pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
    176             band_idx = (0 == col % 2) ? band_table_u[pu1_src[col] >> band_shift] : band_table_v[pu1_src[col] >> band_shift];
    177             pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[band_idx], 0, (1 << (band_shift + 5)) - 1);
    178         }
    179         pu1_src += src_strd;
    180     }
    181 }
    182 
    183 
    184 
    185 /* Horizontal filtering */
    186 void ihevc_sao_edge_offset_class0(UWORD8 *pu1_src,
    187                                   WORD32 src_strd,
    188                                   UWORD8 *pu1_src_left,
    189                                   UWORD8 *pu1_src_top,
    190                                   UWORD8 *pu1_src_top_left,
    191                                   UWORD8 *pu1_src_top_right,
    192                                   UWORD8 *pu1_src_bot_left,
    193                                   UWORD8 *pu1_avail,
    194                                   WORD8 *pi1_sao_offset,
    195                                   WORD32 wd,
    196                                   WORD32 ht)
    197 {
    198     WORD32 row, col;
    199     UWORD8 au1_mask[MAX_CTB_SIZE];
    200     UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
    201     WORD8 u1_sign_left, u1_sign_right;
    202     WORD32 bit_depth;
    203     UNUSED(pu1_src_top_right);
    204     UNUSED(pu1_src_bot_left);
    205     bit_depth = BIT_DEPTH_LUMA;
    206 
    207     /* Initialize the mask values */
    208     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
    209 
    210     /* Update top and top-left arrays */
    211     *pu1_src_top_left = pu1_src_top[wd - 1];
    212     for(row = 0; row < ht; row++)
    213     {
    214         au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
    215     }
    216     for(col = 0; col < wd; col++)
    217     {
    218         pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
    219     }
    220 
    221     /* Update masks based on the availability flags */
    222     if(0 == pu1_avail[0])
    223     {
    224         au1_mask[0] = 0;
    225     }
    226     if(0 == pu1_avail[1])
    227     {
    228         au1_mask[wd - 1] = 0;
    229     }
    230 
    231     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
    232     {
    233         for(row = 0; row < ht; row++)
    234         {
    235             u1_sign_left = SIGN(pu1_src[0] - pu1_src_left[row]);
    236             for(col = 0; col < wd; col++)
    237             {
    238                 WORD32 edge_idx;
    239 
    240                 u1_sign_right = SIGN(pu1_src[col] - pu1_src[col + 1]);
    241                 edge_idx = 2 + u1_sign_left + u1_sign_right;
    242                 u1_sign_left = -u1_sign_right;
    243 
    244                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
    245 
    246                 if(0 != edge_idx)
    247                 {
    248                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
    249                 }
    250             }
    251 
    252             pu1_src += src_strd;
    253         }
    254     }
    255 
    256     /* Update left array */
    257     for(row = 0; row < ht; row++)
    258     {
    259         pu1_src_left[row] = au1_src_left_tmp[row];
    260     }
    261 
    262 }
    263 
    264 
    265 
    266 
    267 /* input 'wd' has to be for the interleaved block and not for each color component */
    268 void ihevc_sao_edge_offset_class0_chroma(UWORD8 *pu1_src,
    269                                          WORD32 src_strd,
    270                                          UWORD8 *pu1_src_left,
    271                                          UWORD8 *pu1_src_top,
    272                                          UWORD8 *pu1_src_top_left,
    273                                          UWORD8 *pu1_src_top_right,
    274                                          UWORD8 *pu1_src_bot_left,
    275                                          UWORD8 *pu1_avail,
    276                                          WORD8 *pi1_sao_offset_u,
    277                                          WORD8 *pi1_sao_offset_v,
    278                                          WORD32 wd,
    279                                          WORD32 ht)
    280 {
    281     WORD32 row, col;
    282     UWORD8 au1_mask[MAX_CTB_SIZE];
    283     UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE];
    284     WORD8 u1_sign_left_u, u1_sign_right_u;
    285     WORD8 u1_sign_left_v, u1_sign_right_v;
    286     WORD32 bit_depth;
    287     UNUSED(pu1_src_top_right);
    288     UNUSED(pu1_src_bot_left);
    289     bit_depth = BIT_DEPTH_CHROMA;
    290 
    291     /* Initialize the mask values */
    292     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
    293 
    294     /* Update left, top and top-left arrays */
    295     pu1_src_top_left[0] = pu1_src_top[wd - 2];
    296     pu1_src_top_left[1] = pu1_src_top[wd - 1];
    297     for(row = 0; row < ht; row++)
    298     {
    299         au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
    300         au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
    301     }
    302     for(col = 0; col < wd; col++)
    303     {
    304         pu1_src_top[col] = pu1_src[(ht - 1) * src_strd + col];
    305     }
    306 
    307     /* Update masks based on the availability flags */
    308     if(0 == pu1_avail[0])
    309     {
    310         au1_mask[0] = 0;
    311     }
    312     if(0 == pu1_avail[1])
    313     {
    314         au1_mask[(wd - 1) >> 1] = 0;
    315     }
    316 
    317     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
    318     {
    319         for(row = 0; row < ht; row++)
    320         {
    321             u1_sign_left_u = SIGN(pu1_src[0] - pu1_src_left[2 * row]);
    322             u1_sign_left_v = SIGN(pu1_src[1] - pu1_src_left[2 * row + 1]);
    323             for(col = 0; col < wd; col++)
    324             {
    325                 WORD32 edge_idx;
    326                 WORD8 *pi1_sao_offset;
    327 
    328                 if(0 == col % 2)
    329                 {
    330                     pi1_sao_offset = pi1_sao_offset_u;
    331                     u1_sign_right_u = SIGN(pu1_src[col] - pu1_src[col + 2]);
    332                     edge_idx = 2 + u1_sign_left_u + u1_sign_right_u;
    333                     u1_sign_left_u = -u1_sign_right_u;
    334                 }
    335                 else
    336                 {
    337                     pi1_sao_offset = pi1_sao_offset_v;
    338                     u1_sign_right_v = SIGN(pu1_src[col] - pu1_src[col + 2]);
    339                     edge_idx = 2 + u1_sign_left_v + u1_sign_right_v;
    340                     u1_sign_left_v = -u1_sign_right_v;
    341                 }
    342 
    343                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
    344 
    345                 if(0 != edge_idx)
    346                 {
    347                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
    348                 }
    349             }
    350 
    351             pu1_src += src_strd;
    352         }
    353     }
    354 
    355     for(row = 0; row < 2 * ht; row++)
    356     {
    357         pu1_src_left[row] = au1_src_left_tmp[row];
    358     }
    359 
    360 }
    361 
    362 
    363 
    364 /* Vertical filtering */
    365 void ihevc_sao_edge_offset_class1(UWORD8 *pu1_src,
    366                                   WORD32 src_strd,
    367                                   UWORD8 *pu1_src_left,
    368                                   UWORD8 *pu1_src_top,
    369                                   UWORD8 *pu1_src_top_left,
    370                                   UWORD8 *pu1_src_top_right,
    371                                   UWORD8 *pu1_src_bot_left,
    372                                   UWORD8 *pu1_avail,
    373                                   WORD8 *pi1_sao_offset,
    374                                   WORD32 wd,
    375                                   WORD32 ht)
    376 {
    377     WORD32 row, col;
    378     UWORD8 au1_mask[MAX_CTB_SIZE];
    379     UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
    380     WORD8 au1_sign_up[MAX_CTB_SIZE];
    381     WORD8 u1_sign_down;
    382     WORD32 bit_depth;
    383     UNUSED(pu1_src_top_right);
    384     UNUSED(pu1_src_bot_left);
    385 
    386     bit_depth = BIT_DEPTH_LUMA;
    387 
    388     /* Initialize the mask values */
    389     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
    390 
    391     /* Update left, top and top-left arrays */
    392     *pu1_src_top_left = pu1_src_top[wd - 1];
    393     for(row = 0; row < ht; row++)
    394     {
    395         pu1_src_left[row] = pu1_src[row * src_strd + wd - 1];
    396     }
    397     for(col = 0; col < wd; col++)
    398     {
    399         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
    400     }
    401 
    402     /* Update height and source pointers based on the availability flags */
    403     if(0 == pu1_avail[2])
    404     {
    405         pu1_src += src_strd;
    406         ht--;
    407         for(col = 0; col < wd; col++)
    408         {
    409             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
    410         }
    411     }
    412     else
    413     {
    414         for(col = 0; col < wd; col++)
    415         {
    416             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
    417         }
    418     }
    419     if(0 == pu1_avail[3])
    420     {
    421         ht--;
    422     }
    423 
    424     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
    425     {
    426         for(row = 0; row < ht; row++)
    427         {
    428             for(col = 0; col < wd; col++)
    429             {
    430                 WORD32 edge_idx;
    431 
    432                 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
    433                 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
    434                 au1_sign_up[col] = -u1_sign_down;
    435 
    436                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
    437 
    438                 if(0 != edge_idx)
    439                 {
    440                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
    441                 }
    442             }
    443 
    444             pu1_src += src_strd;
    445         }
    446     }
    447 
    448     for(col = 0; col < wd; col++)
    449     {
    450         pu1_src_top[col] = au1_src_top_tmp[col];
    451     }
    452 
    453 }
    454 
    455 
    456 
    457 /* input 'wd' has to be for the interleaved block and not for each color component */
    458 void ihevc_sao_edge_offset_class1_chroma(UWORD8 *pu1_src,
    459                                          WORD32 src_strd,
    460                                          UWORD8 *pu1_src_left,
    461                                          UWORD8 *pu1_src_top,
    462                                          UWORD8 *pu1_src_top_left,
    463                                          UWORD8 *pu1_src_top_right,
    464                                          UWORD8 *pu1_src_bot_left,
    465                                          UWORD8 *pu1_avail,
    466                                          WORD8 *pi1_sao_offset_u,
    467                                          WORD8 *pi1_sao_offset_v,
    468                                          WORD32 wd,
    469                                          WORD32 ht)
    470 {
    471     WORD32 row, col;
    472     UWORD8 au1_mask[MAX_CTB_SIZE];
    473     UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
    474     WORD8 au1_sign_up[MAX_CTB_SIZE];
    475     WORD8 u1_sign_down;
    476     WORD32 bit_depth;
    477     UNUSED(pu1_src_top_right);
    478     UNUSED(pu1_src_bot_left);
    479 
    480     bit_depth = BIT_DEPTH_CHROMA;
    481 
    482     /* Initialize the mask values */
    483     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
    484 
    485     /* Update left, top and top-left arrays */
    486     pu1_src_top_left[0] = pu1_src_top[wd - 2];
    487     pu1_src_top_left[1] = pu1_src_top[wd - 1];
    488     for(row = 0; row < ht; row++)
    489     {
    490         pu1_src_left[2 * row] = pu1_src[row * src_strd + wd - 2];
    491         pu1_src_left[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
    492     }
    493     for(col = 0; col < wd; col++)
    494     {
    495         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
    496     }
    497 
    498     /* Update height and source pointers based on the availability flags */
    499     if(0 == pu1_avail[2])
    500     {
    501         pu1_src += src_strd;
    502         ht--;
    503         for(col = 0; col < wd; col++)
    504         {
    505             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - src_strd]);
    506         }
    507     }
    508     else
    509     {
    510         for(col = 0; col < wd; col++)
    511         {
    512             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col]);
    513         }
    514     }
    515     if(0 == pu1_avail[3])
    516     {
    517         ht--;
    518     }
    519 
    520     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
    521     {
    522         for(row = 0; row < ht; row++)
    523         {
    524             for(col = 0; col < wd; col++)
    525             {
    526                 WORD32 edge_idx;
    527                 WORD8 *pi1_sao_offset;
    528 
    529                 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
    530 
    531                 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + src_strd]);
    532                 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
    533                 au1_sign_up[col] = -u1_sign_down;
    534 
    535                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
    536 
    537                 if(0 != edge_idx)
    538                 {
    539                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
    540                 }
    541             }
    542 
    543             pu1_src += src_strd;
    544         }
    545     }
    546 
    547     for(col = 0; col < wd; col++)
    548     {
    549         pu1_src_top[col] = au1_src_top_tmp[col];
    550     }
    551 
    552 }
    553 
    554 
    555 
    556 /* 135 degree filtering */
    557 void ihevc_sao_edge_offset_class2(UWORD8 *pu1_src,
    558                                   WORD32 src_strd,
    559                                   UWORD8 *pu1_src_left,
    560                                   UWORD8 *pu1_src_top,
    561                                   UWORD8 *pu1_src_top_left,
    562                                   UWORD8 *pu1_src_top_right,
    563                                   UWORD8 *pu1_src_bot_left,
    564                                   UWORD8 *pu1_avail,
    565                                   WORD8 *pi1_sao_offset,
    566                                   WORD32 wd,
    567                                   WORD32 ht)
    568 {
    569     WORD32 row, col;
    570     UWORD8 au1_mask[MAX_CTB_SIZE];
    571     UWORD8 au1_src_left_tmp[MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
    572     UWORD8 u1_src_top_left_tmp;
    573     WORD8 au1_sign_up[MAX_CTB_SIZE + 1], au1_sign_up_tmp[MAX_CTB_SIZE + 1];
    574     WORD8 u1_sign_down;
    575     WORD8 *pu1_sign_up;
    576     WORD8 *pu1_sign_up_tmp;
    577     UWORD8 *pu1_src_left_cpy;
    578 
    579     WORD32 bit_depth;
    580     UWORD8 u1_pos_0_0_tmp;
    581     UWORD8 u1_pos_wd_ht_tmp;
    582     UNUSED(pu1_src_top_right);
    583     UNUSED(pu1_src_bot_left);
    584 
    585     bit_depth = BIT_DEPTH_LUMA;
    586     pu1_sign_up = au1_sign_up;
    587     pu1_sign_up_tmp = au1_sign_up_tmp;
    588     pu1_src_left_cpy = pu1_src_left;
    589 
    590     /* Initialize the mask values */
    591     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
    592 
    593     /* Update left, top and top-left arrays */
    594     u1_src_top_left_tmp = pu1_src_top[wd - 1];
    595     for(row = 0; row < ht; row++)
    596     {
    597         au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
    598     }
    599     for(col = 0; col < wd; col++)
    600     {
    601         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
    602     }
    603 
    604 
    605     /* If top-left is available, process separately */
    606     if(0 != pu1_avail[4])
    607     {
    608         WORD32 edge_idx;
    609 
    610         edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
    611                         SIGN(pu1_src[0] - pu1_src[1 + src_strd]);
    612 
    613         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
    614 
    615         if(0 != edge_idx)
    616         {
    617             u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
    618         }
    619         else
    620         {
    621             u1_pos_0_0_tmp = pu1_src[0];
    622         }
    623     }
    624     else
    625     {
    626         u1_pos_0_0_tmp = pu1_src[0];
    627     }
    628 
    629     /* If bottom-right is available, process separately */
    630     if(0 != pu1_avail[7])
    631     {
    632         WORD32 edge_idx;
    633 
    634         edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 1 - src_strd]) +
    635                         SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 1 + src_strd]);
    636 
    637         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
    638 
    639         if(0 != edge_idx)
    640         {
    641             u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
    642         }
    643         else
    644         {
    645             u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
    646         }
    647     }
    648     else
    649     {
    650         u1_pos_wd_ht_tmp = pu1_src[wd - 1 + (ht - 1) * src_strd];
    651     }
    652 
    653     /* If Left is not available */
    654     if(0 == pu1_avail[0])
    655     {
    656         au1_mask[0] = 0;
    657     }
    658 
    659     /* If Top is not available */
    660     if(0 == pu1_avail[2])
    661     {
    662         pu1_src += src_strd;
    663         ht--;
    664         pu1_src_left_cpy += 1;
    665         for(col = 1; col < wd; col++)
    666         {
    667             pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 1 - src_strd]);
    668         }
    669     }
    670     else
    671     {
    672         for(col = 1; col < wd; col++)
    673         {
    674             pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 1]);
    675         }
    676     }
    677 
    678     /* If Right is not available */
    679     if(0 == pu1_avail[1])
    680     {
    681         au1_mask[wd - 1] = 0;
    682     }
    683 
    684     /* If Bottom is not available */
    685     if(0 == pu1_avail[3])
    686     {
    687         ht--;
    688     }
    689 
    690     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
    691     {
    692         for(row = 0; row < ht; row++)
    693         {
    694             pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[row - 1]);
    695             for(col = 0; col < wd; col++)
    696             {
    697                 WORD32 edge_idx;
    698 
    699                 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 1 + src_strd]);
    700                 edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
    701                 pu1_sign_up_tmp[col + 1] = -u1_sign_down;
    702 
    703                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
    704 
    705                 if(0 != edge_idx)
    706                 {
    707                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
    708                 }
    709             }
    710 
    711             /* Swapping pu1_sign_up_tmp and pu1_sign_up */
    712             {
    713                 WORD8 *pu1_swap_tmp = pu1_sign_up;
    714                 pu1_sign_up = pu1_sign_up_tmp;
    715                 pu1_sign_up_tmp = pu1_swap_tmp;
    716             }
    717 
    718             pu1_src += src_strd;
    719         }
    720 
    721         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp;
    722         pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp;
    723     }
    724 
    725     if(0 == pu1_avail[2])
    726         ht++;
    727     if(0 == pu1_avail[3])
    728         ht++;
    729     *pu1_src_top_left = u1_src_top_left_tmp;
    730     for(row = 0; row < ht; row++)
    731     {
    732         pu1_src_left[row] = au1_src_left_tmp[row];
    733     }
    734     for(col = 0; col < wd; col++)
    735     {
    736         pu1_src_top[col] = au1_src_top_tmp[col];
    737     }
    738 
    739 }
    740 
    741 
    742 
    743 
    744 /* 135 degree filtering */
    745 void ihevc_sao_edge_offset_class2_chroma(UWORD8 *pu1_src,
    746                                          WORD32 src_strd,
    747                                          UWORD8 *pu1_src_left,
    748                                          UWORD8 *pu1_src_top,
    749                                          UWORD8 *pu1_src_top_left,
    750                                          UWORD8 *pu1_src_top_right,
    751                                          UWORD8 *pu1_src_bot_left,
    752                                          UWORD8 *pu1_avail,
    753                                          WORD8 *pi1_sao_offset_u,
    754                                          WORD8 *pi1_sao_offset_v,
    755                                          WORD32 wd,
    756                                          WORD32 ht)
    757 {
    758     WORD32 row, col;
    759     UWORD8 au1_mask[MAX_CTB_SIZE];
    760     UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
    761     UWORD8 au1_src_top_left_tmp[2];
    762     WORD8 au1_sign_up[MAX_CTB_SIZE + 2], au1_sign_up_tmp[MAX_CTB_SIZE + 2];
    763     WORD8 u1_sign_down;
    764     WORD8 *pu1_sign_up;
    765     WORD8 *pu1_sign_up_tmp;
    766     UWORD8 *pu1_src_left_cpy;
    767 
    768     WORD32 bit_depth;
    769 
    770     UWORD8 u1_pos_0_0_tmp_u;
    771     UWORD8 u1_pos_0_0_tmp_v;
    772     UWORD8 u1_pos_wd_ht_tmp_u;
    773     UWORD8 u1_pos_wd_ht_tmp_v;
    774     UNUSED(pu1_src_top_right);
    775     UNUSED(pu1_src_bot_left);
    776 
    777 
    778     bit_depth = BIT_DEPTH_CHROMA;
    779     pu1_sign_up = au1_sign_up;
    780     pu1_sign_up_tmp = au1_sign_up_tmp;
    781     pu1_src_left_cpy = pu1_src_left;
    782 
    783     /* Initialize the mask values */
    784     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
    785 
    786     /* Update left, top and top-left arrays */
    787     au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
    788     au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
    789     for(row = 0; row < ht; row++)
    790     {
    791         au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
    792         au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
    793     }
    794     for(col = 0; col < wd; col++)
    795     {
    796         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
    797     }
    798 
    799 
    800     /* If top-left is available, process separately */
    801     if(0 != pu1_avail[4])
    802     {
    803         WORD32 edge_idx;
    804 
    805         /* U */
    806         edge_idx = 2 + SIGN(pu1_src[0] - pu1_src_top_left[0]) +
    807                         SIGN(pu1_src[0] - pu1_src[2 + src_strd]);
    808 
    809         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
    810 
    811         if(0 != edge_idx)
    812         {
    813             u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
    814         }
    815         else
    816         {
    817             u1_pos_0_0_tmp_u = pu1_src[0];
    818         }
    819 
    820         /* V */
    821         edge_idx = 2 + SIGN(pu1_src[1] - pu1_src_top_left[1]) +
    822                         SIGN(pu1_src[1] - pu1_src[1 + 2 + src_strd]);
    823 
    824         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
    825 
    826         if(0 != edge_idx)
    827         {
    828             u1_pos_0_0_tmp_v = CLIP3(pu1_src[1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
    829         }
    830         else
    831         {
    832             u1_pos_0_0_tmp_v = pu1_src[1];
    833         }
    834     }
    835     else
    836     {
    837         u1_pos_0_0_tmp_u = pu1_src[0];
    838         u1_pos_0_0_tmp_v = pu1_src[1];
    839     }
    840 
    841     /* If bottom-right is available, process separately */
    842     if(0 != pu1_avail[7])
    843     {
    844         WORD32 edge_idx;
    845 
    846         /* U */
    847         edge_idx = 2 + SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd - 2 - src_strd]) +
    848                         SIGN(pu1_src[wd - 2 + (ht - 1) * src_strd] - pu1_src[wd - 2 + (ht - 1) * src_strd + 2 + src_strd]);
    849 
    850         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
    851 
    852         if(0 != edge_idx)
    853         {
    854             u1_pos_wd_ht_tmp_u = CLIP3(pu1_src[wd - 2 + (ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
    855         }
    856         else
    857         {
    858             u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
    859         }
    860 
    861         /* V */
    862         edge_idx = 2 + SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd - 2 - src_strd]) +
    863                         SIGN(pu1_src[wd - 1 + (ht - 1) * src_strd] - pu1_src[wd - 1 + (ht - 1) * src_strd + 2 + src_strd]);
    864 
    865         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
    866 
    867         if(0 != edge_idx)
    868         {
    869             u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
    870         }
    871         else
    872         {
    873             u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
    874         }
    875     }
    876     else
    877     {
    878         u1_pos_wd_ht_tmp_u = pu1_src[wd - 2 + (ht - 1) * src_strd];
    879         u1_pos_wd_ht_tmp_v = pu1_src[wd - 1 + (ht - 1) * src_strd];
    880     }
    881 
    882     /* If Left is not available */
    883     if(0 == pu1_avail[0])
    884     {
    885         au1_mask[0] = 0;
    886     }
    887 
    888     /* If Top is not available */
    889     if(0 == pu1_avail[2])
    890     {
    891         pu1_src += src_strd;
    892         pu1_src_left_cpy += 2;
    893         ht--;
    894         for(col = 2; col < wd; col++)
    895         {
    896             pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col - 2 - src_strd]);
    897         }
    898     }
    899     else
    900     {
    901         for(col = 2; col < wd; col++)
    902         {
    903             pu1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col - 2]);
    904         }
    905     }
    906 
    907     /* If Right is not available */
    908     if(0 == pu1_avail[1])
    909     {
    910         au1_mask[(wd - 1) >> 1] = 0;
    911     }
    912 
    913     /* If Bottom is not available */
    914     if(0 == pu1_avail[3])
    915     {
    916         ht--;
    917     }
    918 
    919     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
    920     {
    921         for(row = 0; row < ht; row++)
    922         {
    923             pu1_sign_up[0] = SIGN(pu1_src[0] - pu1_src_left_cpy[2 * (row - 1)]);
    924             pu1_sign_up[1] = SIGN(pu1_src[1] - pu1_src_left_cpy[2 * (row - 1) + 1]);
    925             for(col = 0; col < wd; col++)
    926             {
    927                 WORD32 edge_idx;
    928                 WORD8 *pi1_sao_offset;
    929 
    930                 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
    931 
    932                 u1_sign_down = SIGN(pu1_src[col] - pu1_src[col + 2 + src_strd]);
    933                 edge_idx = 2 + pu1_sign_up[col] + u1_sign_down;
    934                 pu1_sign_up_tmp[col + 2] = -u1_sign_down;
    935 
    936                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
    937 
    938                 if(0 != edge_idx)
    939                 {
    940                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
    941                 }
    942             }
    943 
    944             /* Swapping pu1_sign_up_tmp and pu1_sign_up */
    945             {
    946                 WORD8 *pu1_swap_tmp = pu1_sign_up;
    947                 pu1_sign_up = pu1_sign_up_tmp;
    948                 pu1_sign_up_tmp = pu1_swap_tmp;
    949             }
    950 
    951             pu1_src += src_strd;
    952         }
    953 
    954         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd] = u1_pos_0_0_tmp_u;
    955         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + 1] = u1_pos_0_0_tmp_v;
    956         pu1_src[(pu1_avail[3] ? wd - 2 - src_strd : wd - 2)] = u1_pos_wd_ht_tmp_u;
    957         pu1_src[(pu1_avail[3] ? wd - 1 - src_strd : wd - 1)] = u1_pos_wd_ht_tmp_v;
    958     }
    959 
    960     if(0 == pu1_avail[2])
    961         ht++;
    962     if(0 == pu1_avail[3])
    963         ht++;
    964     pu1_src_top_left[0] = au1_src_top_left_tmp[0];
    965     pu1_src_top_left[1] = au1_src_top_left_tmp[1];
    966     for(row = 0; row < 2 * ht; row++)
    967     {
    968         pu1_src_left[row] = au1_src_left_tmp[row];
    969     }
    970     for(col = 0; col < wd; col++)
    971     {
    972         pu1_src_top[col] = au1_src_top_tmp[col];
    973     }
    974 
    975 }
    976 
    977 
    978 
    979 
    980 /* 45 degree filtering */
    981 void ihevc_sao_edge_offset_class3(UWORD8 *pu1_src,
    982                                   WORD32 src_strd,
    983                                   UWORD8 *pu1_src_left,
    984                                   UWORD8 *pu1_src_top,
    985                                   UWORD8 *pu1_src_top_left,
    986                                   UWORD8 *pu1_src_top_right,
    987                                   UWORD8 *pu1_src_bot_left,
    988                                   UWORD8 *pu1_avail,
    989                                   WORD8 *pi1_sao_offset,
    990                                   WORD32 wd,
    991                                   WORD32 ht)
    992 {
    993     WORD32 row, col;
    994     UWORD8 au1_mask[MAX_CTB_SIZE];
    995     UWORD8 au1_src_top_tmp[MAX_CTB_SIZE];
    996     UWORD8 au1_src_left_tmp[MAX_CTB_SIZE];
    997     UWORD8 u1_src_top_left_tmp;
    998     WORD8 au1_sign_up[MAX_CTB_SIZE];
    999     UWORD8 *pu1_src_left_cpy;
   1000     WORD8 u1_sign_down;
   1001     WORD32 bit_depth;
   1002 
   1003     UWORD8 u1_pos_0_ht_tmp;
   1004     UWORD8 u1_pos_wd_0_tmp;
   1005 
   1006     bit_depth = BIT_DEPTH_LUMA;
   1007     pu1_src_left_cpy = pu1_src_left;
   1008 
   1009     /* Initialize the mask values */
   1010     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
   1011 
   1012     /* Update left, top and top-left arrays */
   1013     u1_src_top_left_tmp = pu1_src_top[wd - 1];
   1014     for(row = 0; row < ht; row++)
   1015     {
   1016         au1_src_left_tmp[row] = pu1_src[row * src_strd + wd - 1];
   1017     }
   1018     for(col = 0; col < wd; col++)
   1019     {
   1020         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
   1021     }
   1022 
   1023     /* If top-right is available, process separately */
   1024     if(0 != pu1_avail[5])
   1025     {
   1026         WORD32 edge_idx;
   1027 
   1028         edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[0]) +
   1029                         SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 1 + src_strd]);
   1030 
   1031         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
   1032 
   1033         if(0 != edge_idx)
   1034         {
   1035             u1_pos_wd_0_tmp = CLIP3(pu1_src[wd - 1] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
   1036         }
   1037         else
   1038         {
   1039             u1_pos_wd_0_tmp = pu1_src[wd - 1];
   1040         }
   1041     }
   1042     else
   1043     {
   1044         u1_pos_wd_0_tmp = pu1_src[wd - 1];
   1045     }
   1046 
   1047     /* If bottom-left is available, process separately */
   1048     if(0 != pu1_avail[6])
   1049     {
   1050         WORD32 edge_idx;
   1051 
   1052         edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 1 - src_strd]) +
   1053                         SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
   1054 
   1055         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
   1056 
   1057         if(0 != edge_idx)
   1058         {
   1059             u1_pos_0_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
   1060         }
   1061         else
   1062         {
   1063             u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
   1064         }
   1065     }
   1066     else
   1067     {
   1068         u1_pos_0_ht_tmp = pu1_src[(ht - 1) * src_strd];
   1069     }
   1070 
   1071     /* If Left is not available */
   1072     if(0 == pu1_avail[0])
   1073     {
   1074         au1_mask[0] = 0;
   1075     }
   1076 
   1077     /* If Top is not available */
   1078     if(0 == pu1_avail[2])
   1079     {
   1080         pu1_src += src_strd;
   1081         ht--;
   1082         pu1_src_left_cpy += 1;
   1083         for(col = 0; col < wd - 1; col++)
   1084         {
   1085             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 1 - src_strd]);
   1086         }
   1087     }
   1088     else
   1089     {
   1090         for(col = 0; col < wd - 1; col++)
   1091         {
   1092             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 1]);
   1093         }
   1094     }
   1095 
   1096     /* If Right is not available */
   1097     if(0 == pu1_avail[1])
   1098     {
   1099         au1_mask[wd - 1] = 0;
   1100     }
   1101 
   1102     /* If Bottom is not available */
   1103     if(0 == pu1_avail[3])
   1104     {
   1105         ht--;
   1106     }
   1107 
   1108     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
   1109     {
   1110         for(row = 0; row < ht; row++)
   1111         {
   1112             au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 1 - src_strd]);
   1113             for(col = 0; col < wd; col++)
   1114             {
   1115                 WORD32 edge_idx;
   1116 
   1117                 u1_sign_down = SIGN(pu1_src[col] - ((col == 0) ? pu1_src_left_cpy[row + 1] :
   1118                                                                  pu1_src[col - 1 + src_strd]));
   1119                 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
   1120                 if(col > 0)
   1121                     au1_sign_up[col - 1] = -u1_sign_down;
   1122 
   1123                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col];
   1124 
   1125                 if(0 != edge_idx)
   1126                 {
   1127                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
   1128                 }
   1129             }
   1130 
   1131             pu1_src += src_strd;
   1132         }
   1133 
   1134         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp;
   1135         pu1_src[(pu1_avail[3] ?  (-src_strd) : 0)] = u1_pos_0_ht_tmp;
   1136     }
   1137 
   1138     if(0 == pu1_avail[2])
   1139         ht++;
   1140     if(0 == pu1_avail[3])
   1141         ht++;
   1142     *pu1_src_top_left = u1_src_top_left_tmp;
   1143     for(row = 0; row < ht; row++)
   1144     {
   1145         pu1_src_left[row] = au1_src_left_tmp[row];
   1146     }
   1147     for(col = 0; col < wd; col++)
   1148     {
   1149         pu1_src_top[col] = au1_src_top_tmp[col];
   1150     }
   1151 
   1152 }
   1153 
   1154 
   1155 
   1156 
   1157 void ihevc_sao_edge_offset_class3_chroma(UWORD8 *pu1_src,
   1158                                          WORD32 src_strd,
   1159                                          UWORD8 *pu1_src_left,
   1160                                          UWORD8 *pu1_src_top,
   1161                                          UWORD8 *pu1_src_top_left,
   1162                                          UWORD8 *pu1_src_top_right,
   1163                                          UWORD8 *pu1_src_bot_left,
   1164                                          UWORD8 *pu1_avail,
   1165                                          WORD8 *pi1_sao_offset_u,
   1166                                          WORD8 *pi1_sao_offset_v,
   1167                                          WORD32 wd,
   1168                                          WORD32 ht)
   1169 {
   1170     WORD32 row, col;
   1171     UWORD8 au1_mask[MAX_CTB_SIZE];
   1172     UWORD8 au1_src_left_tmp[2 * MAX_CTB_SIZE], au1_src_top_tmp[MAX_CTB_SIZE];
   1173     UWORD8 au1_src_top_left_tmp[2];
   1174     WORD8 au1_sign_up[MAX_CTB_SIZE];
   1175     UWORD8 *pu1_src_left_cpy;
   1176     WORD8 u1_sign_down;
   1177     WORD32 bit_depth;
   1178 
   1179     UWORD8 u1_pos_wd_0_tmp_u;
   1180     UWORD8 u1_pos_wd_0_tmp_v;
   1181     UWORD8 u1_pos_0_ht_tmp_u;
   1182     UWORD8 u1_pos_0_ht_tmp_v;
   1183 
   1184     bit_depth = BIT_DEPTH_CHROMA;
   1185     pu1_src_left_cpy = pu1_src_left;
   1186 
   1187     /* Initialize the mask values */
   1188     memset(au1_mask, 0xFF, MAX_CTB_SIZE);
   1189 
   1190     /* Update left, top and top-left arrays */
   1191     au1_src_top_left_tmp[0] = pu1_src_top[wd - 2];
   1192     au1_src_top_left_tmp[1] = pu1_src_top[wd - 1];
   1193     for(row = 0; row < ht; row++)
   1194     {
   1195         au1_src_left_tmp[2 * row] = pu1_src[row * src_strd + wd - 2];
   1196         au1_src_left_tmp[2 * row + 1] = pu1_src[row * src_strd + wd - 1];
   1197     }
   1198     for(col = 0; col < wd; col++)
   1199     {
   1200         au1_src_top_tmp[col] = pu1_src[(ht - 1) * src_strd + col];
   1201     }
   1202 
   1203 
   1204     /* If top-right is available, process separately */
   1205     if(0 != pu1_avail[5])
   1206     {
   1207         WORD32 edge_idx;
   1208 
   1209         /* U */
   1210         edge_idx = 2 + SIGN(pu1_src[wd - 2] - pu1_src_top_right[0]) +
   1211                         SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 - 2 + src_strd]);
   1212 
   1213         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
   1214 
   1215         if(0 != edge_idx)
   1216         {
   1217             u1_pos_wd_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
   1218         }
   1219         else
   1220         {
   1221             u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
   1222         }
   1223 
   1224         /* V */
   1225         edge_idx = 2 + SIGN(pu1_src[wd - 1] - pu1_src_top_right[1]) +
   1226                         SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 - 2 + src_strd]);
   1227 
   1228         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
   1229 
   1230         if(0 != edge_idx)
   1231         {
   1232             u1_pos_wd_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
   1233         }
   1234         else
   1235         {
   1236             u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
   1237         }
   1238     }
   1239     else
   1240     {
   1241         u1_pos_wd_0_tmp_u = pu1_src[wd - 2];
   1242         u1_pos_wd_0_tmp_v = pu1_src[wd - 1];
   1243     }
   1244 
   1245     /* If bottom-left is available, process separately */
   1246     if(0 != pu1_avail[6])
   1247     {
   1248         WORD32 edge_idx;
   1249 
   1250         /* U */
   1251         edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src[(ht - 1) * src_strd + 2 - src_strd]) +
   1252                         SIGN(pu1_src[(ht - 1) * src_strd] - pu1_src_bot_left[0]);
   1253 
   1254         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
   1255 
   1256         if(0 != edge_idx)
   1257         {
   1258             u1_pos_0_ht_tmp_u = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset_u[edge_idx], 0, (1 << bit_depth) - 1);
   1259         }
   1260         else
   1261         {
   1262             u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
   1263         }
   1264 
   1265         /* V */
   1266         edge_idx = 2 + SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src[(ht - 1) * src_strd + 1 + 2 - src_strd]) +
   1267                         SIGN(pu1_src[(ht - 1) * src_strd + 1] - pu1_src_bot_left[1]);
   1268 
   1269         edge_idx = gi4_ihevc_table_edge_idx[edge_idx];
   1270 
   1271         if(0 != edge_idx)
   1272         {
   1273             u1_pos_0_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd + 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1);
   1274         }
   1275         else
   1276         {
   1277             u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
   1278         }
   1279     }
   1280     else
   1281     {
   1282         u1_pos_0_ht_tmp_u = pu1_src[(ht - 1) * src_strd];
   1283         u1_pos_0_ht_tmp_v = pu1_src[(ht - 1) * src_strd + 1];
   1284     }
   1285 
   1286     /* If Left is not available */
   1287     if(0 == pu1_avail[0])
   1288     {
   1289         au1_mask[0] = 0;
   1290     }
   1291 
   1292     /* If Top is not available */
   1293     if(0 == pu1_avail[2])
   1294     {
   1295         pu1_src += src_strd;
   1296         ht--;
   1297         pu1_src_left_cpy += 2;
   1298         for(col = 0; col < wd - 2; col++)
   1299         {
   1300             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src[col + 2 - src_strd]);
   1301         }
   1302     }
   1303     else
   1304     {
   1305         for(col = 0; col < wd - 2; col++)
   1306         {
   1307             au1_sign_up[col] = SIGN(pu1_src[col] - pu1_src_top[col + 2]);
   1308         }
   1309     }
   1310 
   1311     /* If Right is not available */
   1312     if(0 == pu1_avail[1])
   1313     {
   1314         au1_mask[(wd - 1) >> 1] = 0;
   1315     }
   1316 
   1317     /* If Bottom is not available */
   1318     if(0 == pu1_avail[3])
   1319     {
   1320         ht--;
   1321     }
   1322 
   1323     /* Processing is done on the intermediate buffer and the output is written to the source buffer */
   1324     {
   1325         for(row = 0; row < ht; row++)
   1326         {
   1327             au1_sign_up[wd - 2] = SIGN(pu1_src[wd - 2] - pu1_src[wd - 2 + 2 - src_strd]);
   1328             au1_sign_up[wd - 1] = SIGN(pu1_src[wd - 1] - pu1_src[wd - 1 + 2 - src_strd]);
   1329             for(col = 0; col < wd; col++)
   1330             {
   1331                 WORD32 edge_idx;
   1332                 WORD8 *pi1_sao_offset;
   1333 
   1334                 pi1_sao_offset = (0 == col % 2) ? pi1_sao_offset_u : pi1_sao_offset_v;
   1335 
   1336                 u1_sign_down = SIGN(pu1_src[col] - ((col < 2) ? pu1_src_left_cpy[2 * (row + 1) + col] :
   1337                                                                 pu1_src[col - 2 + src_strd]));
   1338                 edge_idx = 2 + au1_sign_up[col] + u1_sign_down;
   1339                 if(col > 1)
   1340                     au1_sign_up[col - 2] = -u1_sign_down;
   1341 
   1342                 edge_idx = gi4_ihevc_table_edge_idx[edge_idx] & au1_mask[col >> 1];
   1343 
   1344                 if(0 != edge_idx)
   1345                 {
   1346                     pu1_src[col] = CLIP3(pu1_src[col] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1);
   1347                 }
   1348             }
   1349 
   1350             pu1_src += src_strd;
   1351         }
   1352 
   1353         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 2] = u1_pos_wd_0_tmp_u;
   1354         pu1_src[-(pu1_avail[2] ? ht : ht + 1) * src_strd + wd - 1] = u1_pos_wd_0_tmp_v;
   1355         pu1_src[(pu1_avail[3] ?  (-src_strd) : 0)] = u1_pos_0_ht_tmp_u;
   1356         pu1_src[(pu1_avail[3] ?  (-src_strd) : 0) + 1] = u1_pos_0_ht_tmp_v;
   1357     }
   1358 
   1359     if(0 == pu1_avail[2])
   1360         ht++;
   1361     if(0 == pu1_avail[3])
   1362         ht++;
   1363     pu1_src_top_left[0] = au1_src_top_left_tmp[0];
   1364     pu1_src_top_left[1] = au1_src_top_left_tmp[1];
   1365     for(row = 0; row < 2 * ht; row++)
   1366     {
   1367         pu1_src_left[row] = au1_src_left_tmp[row];
   1368     }
   1369     for(col = 0; col < wd; col++)
   1370     {
   1371         pu1_src_top[col] = au1_src_top_tmp[col];
   1372     }
   1373 
   1374 }
   1375