Home | History | Annotate | Download | only in decoder
      1 /******************************************************************************
      2 *
      3 * Copyright (C) 2012 Ittiam Systems Pvt Ltd, Bangalore
      4 *
      5 * Licensed under the Apache License, Version 2.0 (the "License");
      6 * you may not use this file except in compliance with the License.
      7 * You may obtain a copy of the License at:
      8 *
      9 * http://www.apache.org/licenses/LICENSE-2.0
     10 *
     11 * Unless required by applicable law or agreed to in writing, software
     12 * distributed under the License is distributed on an "AS IS" BASIS,
     13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14 * See the License for the specific language governing permissions and
     15 * limitations under the License.
     16 *
     17 ******************************************************************************/
     18 /**
     19  *******************************************************************************
     20  * @file
     21  *  ihevc_sao.c
     22  *
     23  * @brief
     24  *  Contains function definitions for sample adaptive offset process
     25  *
     26  * @author
     27  *  Srinivas T
     28  *
     29  * @par List of Functions:
     30  *
     31  * @remarks
     32  *  None
     33  *
     34  *******************************************************************************
     35  */
     36 
     37 #include <stdio.h>
     38 #include <stddef.h>
     39 #include <stdlib.h>
     40 #include <string.h>
     41 #include <assert.h>
     42 
     43 #include "ihevc_typedefs.h"
     44 #include "iv.h"
     45 #include "ivd.h"
     46 #include "ihevcd_cxa.h"
     47 #include "ithread.h"
     48 
     49 #include "ihevc_defs.h"
     50 #include "ihevc_debug.h"
     51 #include "ihevc_defs.h"
     52 #include "ihevc_structs.h"
     53 #include "ihevc_macros.h"
     54 #include "ihevc_platform_macros.h"
     55 #include "ihevc_cabac_tables.h"
     56 #include "ihevc_sao.h"
     57 #include "ihevc_mem_fns.h"
     58 
     59 #include "ihevc_error.h"
     60 #include "ihevc_common_tables.h"
     61 
     62 #include "ihevcd_trace.h"
     63 #include "ihevcd_defs.h"
     64 #include "ihevcd_function_selector.h"
     65 #include "ihevcd_structs.h"
     66 #include "ihevcd_error.h"
     67 #include "ihevcd_nal.h"
     68 #include "ihevcd_bitstream.h"
     69 #include "ihevcd_job_queue.h"
     70 #include "ihevcd_utils.h"
     71 
     72 #include "ihevc_deblk.h"
     73 #include "ihevc_deblk_tables.h"
     74 #include "ihevcd_profile.h"
     75 #include "ihevcd_sao.h"
     76 #include "ihevcd_debug.h"
     77 
     78 #define SAO_SHIFT_CTB    8
     79 
     80 /**
     81  * SAO at CTB level is implemented for a shifted CTB(8 pixels in x and y directions)
     82  */
     83 void ihevcd_sao_ctb(sao_ctxt_t *ps_sao_ctxt)
     84 {
     85     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
     86     UWORD8 *pu1_src_luma;
     87     UWORD8 *pu1_src_chroma;
     88     WORD32 src_strd;
     89     WORD32 ctb_size;
     90     WORD32 log2_ctb_size;
     91     sps_t *ps_sps;
     92     sao_t *ps_sao;
     93     WORD32 row, col;
     94     UWORD8 au1_avail_luma[8];
     95     UWORD8 au1_avail_chroma[8];
     96     WORD32 i;
     97     UWORD8 *pu1_src_top_luma;
     98     UWORD8 *pu1_src_top_chroma;
     99     UWORD8 *pu1_src_left_luma;
    100     UWORD8 *pu1_src_left_chroma;
    101     UWORD8 au1_src_top_right[2];
    102     UWORD8 au1_src_bot_left[2];
    103     UWORD8 *pu1_no_loop_filter_flag;
    104     WORD32 loop_filter_strd;
    105 
    106     WORD8 ai1_offset_y[5];
    107     WORD8 ai1_offset_cb[5];
    108     WORD8 ai1_offset_cr[5];
    109 
    110     PROFILE_DISABLE_SAO();
    111 
    112     ai1_offset_y[0] = 0;
    113     ai1_offset_cb[0] = 0;
    114     ai1_offset_cr[0] = 0;
    115 
    116     ps_sps = ps_sao_ctxt->ps_sps;
    117     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    118     ctb_size = (1 << log2_ctb_size);
    119     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
    120     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
    121     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
    122 
    123     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    124     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) / 64;
    125 
    126     /* Current CTB */
    127     {
    128         WORD32 sao_wd_luma;
    129         WORD32 sao_wd_chroma;
    130         WORD32 sao_ht_luma;
    131         WORD32 sao_ht_chroma;
    132 
    133         WORD32 remaining_rows;
    134         WORD32 remaining_cols;
    135 
    136         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    137         sao_wd_luma = MIN(ctb_size, remaining_cols);
    138         sao_wd_chroma = MIN(ctb_size, remaining_cols);
    139 
    140         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    141         sao_ht_luma = MIN(ctb_size, remaining_rows);
    142         sao_ht_chroma = MIN(ctb_size, remaining_rows) / 2;
    143 
    144         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    145         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
    146         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    147         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
    148 
    149         pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    150                         ((ps_sao_ctxt->i4_ctb_y * ctb_size) / 8) * loop_filter_strd +
    151                         ((ps_sao_ctxt->i4_ctb_x * ctb_size) / 64);
    152 
    153         ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    154         ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    155         ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    156         ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    157 
    158         ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
    159         ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
    160         ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
    161         ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
    162 
    163         ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
    164         ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
    165         ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
    166         ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
    167 
    168         for(i = 0; i < 8; i++)
    169         {
    170             au1_avail_luma[i] = 255;
    171             au1_avail_chroma[i] = 255;
    172         }
    173 
    174 
    175         if(0 == ps_sao_ctxt->i4_ctb_x)
    176         {
    177             au1_avail_luma[0] = 0;
    178             au1_avail_luma[4] = 0;
    179             au1_avail_luma[6] = 0;
    180 
    181             au1_avail_chroma[0] = 0;
    182             au1_avail_chroma[4] = 0;
    183             au1_avail_chroma[6] = 0;
    184         }
    185 
    186         if(ps_sps->i2_pic_wd_in_ctb - 1 == ps_sao_ctxt->i4_ctb_x)
    187         {
    188             au1_avail_luma[1] = 0;
    189             au1_avail_luma[5] = 0;
    190             au1_avail_luma[7] = 0;
    191 
    192             au1_avail_chroma[1] = 0;
    193             au1_avail_chroma[5] = 0;
    194             au1_avail_chroma[7] = 0;
    195         }
    196 
    197         if(0 == ps_sao_ctxt->i4_ctb_y)
    198         {
    199             au1_avail_luma[2] = 0;
    200             au1_avail_luma[4] = 0;
    201             au1_avail_luma[5] = 0;
    202 
    203             au1_avail_chroma[2] = 0;
    204             au1_avail_chroma[4] = 0;
    205             au1_avail_chroma[5] = 0;
    206         }
    207 
    208         if(ps_sps->i2_pic_ht_in_ctb - 1 == ps_sao_ctxt->i4_ctb_y)
    209         {
    210             au1_avail_luma[3] = 0;
    211             au1_avail_luma[6] = 0;
    212             au1_avail_luma[7] = 0;
    213 
    214             au1_avail_chroma[3] = 0;
    215             au1_avail_chroma[6] = 0;
    216             au1_avail_chroma[7] = 0;
    217         }
    218 
    219 
    220         if(0 == ps_sao->b3_y_type_idx)
    221         {
    222             /* Update left, top and top-left */
    223             for(row = 0; row < sao_ht_luma; row++)
    224             {
    225                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
    226             }
    227             ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
    228 
    229             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
    230 
    231         }
    232         else
    233         {
    234             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 2) * (MAX_CTB_SIZE + 2)];
    235             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 2) + 1;
    236             WORD32 tmp_strd = MAX_CTB_SIZE + 2;
    237             WORD32 no_loop_filter_enabled = 0;
    238 
    239             /* Check the loop filter flags and copy the original values for back up */
    240             {
    241                 UWORD32 u4_no_loop_filter_flag;
    242                 WORD32 min_cu = 8;
    243                 UWORD8 *pu1_src_tmp = pu1_src_luma;
    244 
    245                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
    246                 {
    247                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    248                                     ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
    249                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
    250 
    251                     if(u4_no_loop_filter_flag)
    252                     {
    253                         WORD32 tmp_wd = sao_wd_luma;
    254                         no_loop_filter_enabled = 1;
    255                         while(tmp_wd > 0)
    256                         {
    257                             if(CTZ(u4_no_loop_filter_flag))
    258                             {
    259                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    260                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    261                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    262                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    263                             }
    264                             else
    265                             {
    266                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
    267                                 {
    268                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    269                                     {
    270                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
    271                                     }
    272                                 }
    273 
    274                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    275                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    276                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    277                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    278                             }
    279                         }
    280 
    281                         pu1_src_tmp -= sao_wd_luma;
    282                     }
    283 
    284                     pu1_src_tmp += min_cu * src_strd;
    285                     pu1_src_copy += min_cu * tmp_strd;
    286                 }
    287             }
    288 
    289             if(1 == ps_sao->b3_y_type_idx)
    290             {
    291                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
    292                                                                           src_strd,
    293                                                                           pu1_src_left_luma,
    294                                                                           pu1_src_top_luma,
    295                                                                           ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
    296                                                                           ps_sao->b5_y_band_pos,
    297                                                                           ai1_offset_y,
    298                                                                           sao_wd_luma,
    299                                                                           sao_ht_luma);
    300             }
    301             else // if(2 <= ps_sao->b3_y_type_idx)
    302             {
    303                 au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
    304                 au1_src_bot_left[0] = pu1_src_luma[sao_ht_luma * src_strd - 1];
    305                 ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
    306                                                                   src_strd,
    307                                                                   pu1_src_left_luma,
    308                                                                   pu1_src_top_luma,
    309                                                                   ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb,
    310                                                                   au1_src_top_right,
    311                                                                   au1_src_bot_left,
    312                                                                   au1_avail_luma,
    313                                                                   ai1_offset_y,
    314                                                                   sao_wd_luma,
    315                                                                   sao_ht_luma);
    316             }
    317 
    318             /* Check the loop filter flags and copy the original values back if they are set */
    319             if(no_loop_filter_enabled)
    320             {
    321                 UWORD32 u4_no_loop_filter_flag;
    322                 WORD32 min_cu = 8;
    323                 UWORD8 *pu1_src_tmp = pu1_src_luma;
    324 
    325                 for(i = 0; i < (sao_ht_luma + min_cu - 1) / min_cu; i++)
    326                 {
    327                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma) / 8) % 8);
    328                     u4_no_loop_filter_flag &= (1 << ((sao_wd_luma + (min_cu - 1)) / min_cu)) - 1;
    329 
    330                     if(u4_no_loop_filter_flag)
    331                     {
    332                         WORD32 tmp_wd = sao_wd_luma;
    333                         while(tmp_wd > 0)
    334                         {
    335                             if(CTZ(u4_no_loop_filter_flag))
    336                             {
    337                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    338                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    339                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    340                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    341                             }
    342                             else
    343                             {
    344                                 for(row = 0; row < MIN(min_cu, sao_ht_luma - (i - 1) * min_cu); row++)
    345                                 {
    346                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    347                                     {
    348                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
    349                                     }
    350                                 }
    351 
    352                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    353                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    354                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    355                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    356                             }
    357                         }
    358 
    359                         pu1_src_tmp -= sao_wd_luma;
    360                     }
    361 
    362                     pu1_src_tmp += min_cu * src_strd;
    363                     pu1_src_copy += min_cu * tmp_strd;
    364                 }
    365             }
    366 
    367         }
    368 
    369         if(0 == ps_sao->b3_cb_type_idx)
    370         {
    371             for(row = 0; row < sao_ht_chroma; row++)
    372             {
    373                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
    374                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
    375             }
    376             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
    377             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
    378 
    379             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
    380         }
    381         else
    382         {
    383             UWORD8 au1_src_copy[(MAX_CTB_SIZE + 4) * (MAX_CTB_SIZE + 2)];
    384             UWORD8 *pu1_src_copy = au1_src_copy + (MAX_CTB_SIZE + 4) + 2;
    385             WORD32 tmp_strd = MAX_CTB_SIZE + 4;
    386             WORD32 no_loop_filter_enabled = 0;
    387 
    388             /* Check the loop filter flags and copy the original values for back up */
    389             {
    390                 UWORD32 u4_no_loop_filter_flag;
    391                 WORD32 min_cu = 4;
    392                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
    393 
    394                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
    395                 {
    396                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
    397                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
    398 
    399                     if(u4_no_loop_filter_flag)
    400                     {
    401                         WORD32 tmp_wd = sao_wd_chroma;
    402                         no_loop_filter_enabled = 1;
    403                         while(tmp_wd > 0)
    404                         {
    405                             if(CTZ(u4_no_loop_filter_flag))
    406                             {
    407                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    408                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    409                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    410                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    411                             }
    412                             else
    413                             {
    414                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
    415                                 {
    416                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    417                                     {
    418                                         pu1_src_copy[row * src_strd + col] = pu1_src_tmp[row * tmp_strd + col];
    419                                     }
    420                                 }
    421 
    422                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    423                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    424                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    425                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    426                             }
    427                         }
    428 
    429                         pu1_src_tmp -= sao_wd_chroma;
    430                     }
    431 
    432                     pu1_src_tmp += min_cu * src_strd;
    433                     pu1_src_copy += min_cu * tmp_strd;
    434                 }
    435             }
    436 
    437             if(1 == ps_sao->b3_cb_type_idx)
    438             {
    439                 ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
    440                                                                             src_strd,
    441                                                                             pu1_src_left_chroma,
    442                                                                             pu1_src_top_chroma,
    443                                                                             ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
    444                                                                             ps_sao->b5_cb_band_pos,
    445                                                                             ps_sao->b5_cr_band_pos,
    446                                                                             ai1_offset_cb,
    447                                                                             ai1_offset_cr,
    448                                                                             sao_wd_chroma,
    449                                                                             sao_ht_chroma
    450                                                                            );
    451             }
    452             else // if(2 <= ps_sao->b3_cb_type_idx)
    453             {
    454                 au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
    455                 au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
    456                 au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
    457                 au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
    458                 ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
    459                                                                      src_strd,
    460                                                                      pu1_src_left_chroma,
    461                                                                      pu1_src_top_chroma,
    462                                                                      ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb,
    463                                                                      au1_src_top_right,
    464                                                                      au1_src_bot_left,
    465                                                                      au1_avail_chroma,
    466                                                                      ai1_offset_cb,
    467                                                                      ai1_offset_cr,
    468                                                                      sao_wd_chroma,
    469                                                                      sao_ht_chroma);
    470             }
    471 
    472             /* Check the loop filter flags and copy the original values back if they are set */
    473             if(no_loop_filter_enabled)
    474             {
    475                 UWORD32 u4_no_loop_filter_flag;
    476                 WORD32 min_cu = 4;
    477                 UWORD8 *pu1_src_tmp = pu1_src_chroma;
    478 
    479                 for(i = 0; i < (sao_ht_chroma + min_cu - 1) / min_cu; i++)
    480                 {
    481                     u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >> ((((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma) / 8) % 8);
    482                     u4_no_loop_filter_flag &= (1 << ((sao_wd_chroma + (min_cu - 1)) / min_cu)) - 1;
    483 
    484                     if(u4_no_loop_filter_flag)
    485                     {
    486                         WORD32 tmp_wd = sao_wd_chroma;
    487                         while(tmp_wd > 0)
    488                         {
    489                             if(CTZ(u4_no_loop_filter_flag))
    490                             {
    491                                 u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    492                                 pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    493                                 pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
    494                                 tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
    495                             }
    496                             else
    497                             {
    498                                 for(row = 0; row < MIN(min_cu, sao_ht_chroma - (i - 1) * min_cu); row++)
    499                                 {
    500                                     for(col = 0; col < MIN((WORD32)CTZ(~u4_no_loop_filter_flag) * min_cu, tmp_wd); col++)
    501                                     {
    502                                         pu1_src_tmp[row * src_strd + col] = pu1_src_copy[row * tmp_strd + col];
    503                                     }
    504                                 }
    505 
    506                                 u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    507                                 pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    508                                 pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
    509                                 tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
    510                             }
    511                         }
    512 
    513                         pu1_src_tmp -= sao_wd_chroma;
    514                     }
    515 
    516                     pu1_src_tmp += min_cu * src_strd;
    517                     pu1_src_copy += min_cu * tmp_strd;
    518                 }
    519             }
    520 
    521         }
    522 
    523     }
    524 }
    525 
    526 void ihevcd_sao_shift_ctb(sao_ctxt_t *ps_sao_ctxt)
    527 {
    528     codec_t *ps_codec = ps_sao_ctxt->ps_codec;
    529     UWORD8 *pu1_src_luma;
    530     UWORD8 *pu1_src_chroma;
    531     WORD32 src_strd;
    532     WORD32 ctb_size;
    533     WORD32 log2_ctb_size;
    534     sps_t *ps_sps;
    535     sao_t *ps_sao;
    536     pps_t *ps_pps;
    537     slice_header_t *ps_slice_hdr, *ps_slice_hdr_base;
    538     tile_t *ps_tile;
    539     UWORD16 *pu1_slice_idx;
    540     UWORD16 *pu1_tile_idx;
    541     WORD32 row, col;
    542     UWORD8 au1_avail_luma[8];
    543     UWORD8 au1_avail_chroma[8];
    544     UWORD8 au1_tile_slice_boundary[8];
    545     UWORD8 au4_ilf_across_tile_slice_enable[8];
    546     WORD32 i;
    547     UWORD8 *pu1_src_top_luma;
    548     UWORD8 *pu1_src_top_chroma;
    549     UWORD8 *pu1_src_left_luma;
    550     UWORD8 *pu1_src_left_chroma;
    551     UWORD8 au1_src_top_right[2];
    552     UWORD8 au1_src_bot_left[2];
    553     UWORD8 *pu1_no_loop_filter_flag;
    554     UWORD8 *pu1_src_backup_luma;
    555     UWORD8 *pu1_src_backup_chroma;
    556     WORD32 backup_strd;
    557     WORD32 loop_filter_strd;
    558 
    559     WORD32 no_loop_filter_enabled_luma = 0;
    560     WORD32 no_loop_filter_enabled_chroma = 0;
    561     UWORD8 *pu1_sao_src_top_left_chroma_curr_ctb;
    562     UWORD8 *pu1_sao_src_top_left_luma_curr_ctb;
    563     UWORD8 *pu1_sao_src_luma_top_left_ctb;
    564     UWORD8 *pu1_sao_src_chroma_top_left_ctb;
    565     UWORD8 *pu1_sao_src_top_left_luma_top_right;
    566     UWORD8 *pu1_sao_src_top_left_chroma_top_right;
    567     UWORD8  u1_sao_src_top_left_luma_bot_left;
    568     UWORD8  *pu1_sao_src_top_left_luma_bot_left;
    569     UWORD8 *au1_sao_src_top_left_chroma_bot_left;
    570     UWORD8 *pu1_sao_src_top_left_chroma_bot_left;
    571     /* Only 5 values are used, but arrays are large
    572      enough so that SIMD functions can read 64 bits at a time */
    573     WORD8 ai1_offset_y[8];
    574     WORD8 ai1_offset_cb[8];
    575     WORD8 ai1_offset_cr[8];
    576     WORD32  chroma_yuv420sp_vu = ps_sao_ctxt->is_chroma_yuv420sp_vu;
    577 
    578     PROFILE_DISABLE_SAO();
    579 
    580     ai1_offset_y[0] = 0;
    581     ai1_offset_cb[0] = 0;
    582     ai1_offset_cr[0] = 0;
    583 
    584     ps_sps = ps_sao_ctxt->ps_sps;
    585     ps_pps = ps_sao_ctxt->ps_pps;
    586     ps_tile = ps_sao_ctxt->ps_tile;
    587 
    588     log2_ctb_size = ps_sps->i1_log2_ctb_size;
    589     ctb_size = (1 << log2_ctb_size);
    590     src_strd = ps_sao_ctxt->ps_codec->i4_strd;
    591     ps_slice_hdr_base = ps_sao_ctxt->ps_codec->ps_slice_hdr_base;
    592     ps_slice_hdr = ps_slice_hdr_base + (ps_sao_ctxt->i4_cur_slice_idx & (MAX_SLICE_HDR_CNT - 1));
    593 
    594     pu1_slice_idx = ps_sao_ctxt->pu1_slice_idx;
    595     pu1_tile_idx = ps_sao_ctxt->pu1_tile_idx;
    596     pu1_src_luma = ps_sao_ctxt->pu1_cur_pic_luma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd) << (log2_ctb_size));
    597     pu1_src_chroma = ps_sao_ctxt->pu1_cur_pic_chroma + ((ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sao_ctxt->ps_codec->i4_strd / 2) << (log2_ctb_size));
    598 
    599     /*Stores the left value for each row ctbs- Needed for column tiles*/
    600     pu1_sao_src_top_left_luma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_luma_curr_ctb + ((ps_sao_ctxt->i4_ctb_y));
    601     pu1_sao_src_top_left_chroma_curr_ctb = ps_sao_ctxt->pu1_sao_src_top_left_chroma_curr_ctb + (2 * (ps_sao_ctxt->i4_ctb_y));
    602     pu1_sao_src_luma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_luma_top_left_ctb + ((ps_sao_ctxt->i4_ctb_y));
    603     pu1_sao_src_chroma_top_left_ctb = ps_sao_ctxt->pu1_sao_src_chroma_top_left_ctb + (2 * ps_sao_ctxt->i4_ctb_y);
    604     u1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->u1_sao_src_top_left_luma_bot_left; // + ((ps_sao_ctxt->i4_ctb_y));
    605     pu1_sao_src_top_left_luma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_luma_bot_left + ((ps_sao_ctxt->i4_ctb_y));
    606     au1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->au1_sao_src_top_left_chroma_bot_left; // + (2 * ps_sao_ctxt->i4_ctb_y);
    607     pu1_sao_src_top_left_chroma_bot_left = ps_sao_ctxt->pu1_sao_src_top_left_chroma_bot_left + (2 * ps_sao_ctxt->i4_ctb_y);
    608     pu1_sao_src_top_left_luma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_luma_top_right + ((ps_sao_ctxt->i4_ctb_x));
    609     pu1_sao_src_top_left_chroma_top_right = ps_sao_ctxt->pu1_sao_src_top_left_chroma_top_right + (2 * ps_sao_ctxt->i4_ctb_x);
    610 
    611     ps_sao = ps_sao_ctxt->ps_pic_sao + ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_y * ps_sps->i2_pic_wd_in_ctb;
    612     loop_filter_strd =  (ps_sps->i2_pic_width_in_luma_samples + 63) >> 6;
    613     backup_strd = 2 * MAX_CTB_SIZE;
    614 
    615     DEBUG_INIT_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
    616 
    617     {
    618         /* Check the loop filter flags and copy the original values for back up */
    619         /* Luma */
    620 
    621         /* Done unconditionally since SAO is done on a shifted CTB and the constituent CTBs
    622          * can belong to different slice with their own sao_enable flag */
    623         {
    624             UWORD32 u4_no_loop_filter_flag;
    625             WORD32 loop_filter_bit_pos;
    626             WORD32 log2_min_cu = 3;
    627             WORD32 min_cu = (1 << log2_min_cu);
    628             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
    629             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
    630             WORD32 sao_blk_wd = ctb_size;
    631             WORD32 remaining_rows;
    632             WORD32 remaining_cols;
    633 
    634             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
    635             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
    636             if(remaining_rows <= SAO_SHIFT_CTB)
    637                 sao_blk_ht += remaining_rows;
    638             if(remaining_cols <= SAO_SHIFT_CTB)
    639                 sao_blk_wd += remaining_cols;
    640 
    641             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
    642             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
    643 
    644             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
    645 
    646             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
    647                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
    648             if(ps_sao_ctxt->i4_ctb_x > 0)
    649                 loop_filter_bit_pos -= 1;
    650 
    651             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    652                             (loop_filter_bit_pos >> 3);
    653 
    654             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
    655                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
    656             {
    657                 WORD32 tmp_wd = sao_blk_wd;
    658 
    659                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    660                                 (loop_filter_bit_pos & 7);
    661                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
    662 
    663                 if(u4_no_loop_filter_flag)
    664                 {
    665                     no_loop_filter_enabled_luma = 1;
    666                     while(tmp_wd > 0)
    667                     {
    668                         if(CTZ(u4_no_loop_filter_flag))
    669                         {
    670                             pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    671                             pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    672                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
    673                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    674                         }
    675                         else
    676                         {
    677                             for(row = 0; row < min_cu; row++)
    678                             {
    679                                 for(col = 0; col < MIN((WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
    680                                 {
    681                                     pu1_src_backup_luma[row * backup_strd + col] = pu1_src_tmp_luma[row * src_strd + col];
    682                                 }
    683                             }
    684                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    685                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    686                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
    687                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    688                         }
    689                     }
    690 
    691                     pu1_src_tmp_luma -= sao_blk_wd;
    692                     pu1_src_backup_luma -= sao_blk_wd;
    693                 }
    694 
    695                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
    696                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
    697             }
    698         }
    699 
    700         /* Chroma */
    701 
    702         {
    703             UWORD32 u4_no_loop_filter_flag;
    704             WORD32 loop_filter_bit_pos;
    705             WORD32 log2_min_cu = 3;
    706             WORD32 min_cu = (1 << log2_min_cu);
    707             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
    708             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
    709             WORD32 sao_blk_wd = ctb_size;
    710             WORD32 remaining_rows;
    711             WORD32 remaining_cols;
    712 
    713             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
    714             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
    715             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
    716                 sao_blk_ht += remaining_rows;
    717             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
    718                 sao_blk_wd += remaining_cols;
    719 
    720             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
    721             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
    722 
    723             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
    724 
    725             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
    726                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
    727             if(ps_sao_ctxt->i4_ctb_x > 0)
    728                 loop_filter_bit_pos -= 2;
    729 
    730             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
    731                             (loop_filter_bit_pos >> 3);
    732 
    733             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
    734                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
    735             {
    736                 WORD32 tmp_wd = sao_blk_wd;
    737 
    738                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
    739                                 (loop_filter_bit_pos & 7);
    740                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
    741 
    742                 if(u4_no_loop_filter_flag)
    743                 {
    744                     no_loop_filter_enabled_chroma = 1;
    745                     while(tmp_wd > 0)
    746                     {
    747                         if(CTZ(u4_no_loop_filter_flag))
    748                         {
    749                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    750                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    751                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
    752                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
    753                         }
    754                         else
    755                         {
    756                             for(row = 0; row < min_cu / 2; row++)
    757                             {
    758                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
    759                                 {
    760                                     pu1_src_backup_chroma[row * backup_strd + col] = pu1_src_tmp_chroma[row * src_strd + col];
    761                                 }
    762                             }
    763 
    764                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    765                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
    766                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
    767                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
    768                         }
    769                     }
    770 
    771                     pu1_src_tmp_chroma -= sao_blk_wd;
    772                     pu1_src_backup_chroma -= sao_blk_wd;
    773                 }
    774 
    775                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
    776                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
    777             }
    778         }
    779     }
    780 
    781     DEBUG_PROCESS_TMP_BUF(ps_sao_ctxt->pu1_tmp_buf_luma, ps_sao_ctxt->pu1_tmp_buf_chroma);
    782 
    783     /* Top-left CTB */
    784     if(ps_sao_ctxt->i4_ctb_x > 0 && ps_sao_ctxt->i4_ctb_y > 0)
    785     {
    786         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
    787         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
    788         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
    789         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
    790 
    791         WORD32 ctbx_tl_t = 0, ctbx_tl_l = 0, ctbx_tl_r = 0, ctbx_tl_d = 0, ctbx_tl = 0;
    792         WORD32 ctby_tl_t = 0, ctby_tl_l = 0, ctby_tl_r = 0, ctby_tl_d = 0, ctby_tl = 0;
    793         WORD32 au4_idx_tl[8], idx_tl;
    794 
    795         slice_header_t *ps_slice_hdr_top_left;
    796         {
    797             WORD32 top_left_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
    798                                         (ps_sao_ctxt->i4_ctb_x - 1);
    799             ps_slice_hdr_top_left = ps_slice_hdr_base + pu1_slice_idx[top_left_ctb_indx];
    800         }
    801 
    802 
    803         pu1_src_luma -= (sao_wd_luma + sao_ht_luma * src_strd);
    804         pu1_src_chroma -= (sao_wd_chroma + sao_ht_chroma * src_strd);
    805         ps_sao -= (1 + ps_sps->i2_pic_wd_in_ctb);
    806         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
    807         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
    808         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma;
    809         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
    810 
    811         if(ps_slice_hdr_top_left->i1_slice_sao_luma_flag)
    812         {
    813             if(0 == ps_sao->b3_y_type_idx)
    814             {
    815                 /* Update left, top and top-left */
    816                 for(row = 0; row < sao_ht_luma; row++)
    817                 {
    818                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
    819                 }
    820                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
    821 
    822                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
    823 
    824 
    825             }
    826 
    827             else if(1 == ps_sao->b3_y_type_idx)
    828             {
    829                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    830                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    831                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    832                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    833 
    834                 ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
    835                                                                           src_strd,
    836                                                                           pu1_src_left_luma,
    837                                                                           pu1_src_top_luma,
    838                                                                           pu1_sao_src_luma_top_left_ctb,
    839                                                                           ps_sao->b5_y_band_pos,
    840                                                                           ai1_offset_y,
    841                                                                           sao_wd_luma,
    842                                                                           sao_ht_luma
    843                                                                          );
    844             }
    845 
    846             else // if(2 <= ps_sao->b3_y_type_idx)
    847             {
    848                 ai1_offset_y[1] = ps_sao->b4_y_offset_1;
    849                 ai1_offset_y[2] = ps_sao->b4_y_offset_2;
    850                 ai1_offset_y[3] = ps_sao->b4_y_offset_3;
    851                 ai1_offset_y[4] = ps_sao->b4_y_offset_4;
    852 
    853                 for(i = 0; i < 8; i++)
    854                 {
    855                     au1_avail_luma[i] = 255;
    856                     au1_tile_slice_boundary[i] = 0;
    857                     au4_idx_tl[i] = 0;
    858                     au4_ilf_across_tile_slice_enable[i] = 1;
    859                 }
    860 
    861                 /******************************************************************
    862                  * Derive the  Top-left CTB's neighbor pixel's slice indices.
    863                  *
    864                  *          TL_T
    865                  *       4  _2__5________
    866                  *     0   |    |       |
    867                  *    TL_L | TL | 1 TL_R|
    868                  *         |____|_______|____
    869                  *        6|TL_D|7      |    |
    870                  *         | 3  |       |    |
    871                  *         |____|_______|    |
    872                  *              |            |
    873                  *              |            |
    874                  *              |____________|
    875                  *
    876                  *****************************************************************/
    877 
    878                 /*In case of slices, unless we encounter multiple slice/tiled clips, don't enter*/
    879                 {
    880                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
    881                     {
    882                         {
    883                             /*Assuming that sao shift is uniform along x and y directions*/
    884                             if((0 == (1 << log2_ctb_size) - sao_wd_luma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
    885                             {
    886                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
    887                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
    888                             }
    889                             else if(!(0 == (1 << log2_ctb_size) - sao_wd_luma))
    890                             {
    891                                 ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
    892                                 ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
    893                             }
    894                             ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
    895                             ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
    896 
    897                             ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
    898                             ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
    899 
    900                             ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
    901                             ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
    902 
    903                             ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
    904                             ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
    905                         }
    906 
    907                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
    908                         {
    909                             /*Calculate slice indices for neighbor pixels*/
    910                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
    911                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
    912                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
    913                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
    914                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    915                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    916 
    917                             if((0 == (1 << log2_ctb_size) - sao_wd_luma))
    918                             {
    919                                 if(ps_sao_ctxt->i4_ctb_x == 1)
    920                                 {
    921                                     au4_idx_tl[6] = -1;
    922                                     au4_idx_tl[4] = -1;
    923                                 }
    924                                 else
    925                                 {
    926                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
    927                                 }
    928                                 if(ps_sao_ctxt->i4_ctb_y == 1)
    929                                 {
    930                                     au4_idx_tl[5] = -1;
    931                                     au4_idx_tl[4] = -1;
    932                                 }
    933                                 else
    934                                 {
    935                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
    936                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
    937                                 }
    938                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
    939                             }
    940 
    941                             /* Verify that the neighbor ctbs dont cross pic boundary.
    942                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
    943                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
    944                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
    945                              * the respective pixel's flags are checked
    946                              */
    947 
    948                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma))
    949                             {
    950                                 au4_ilf_across_tile_slice_enable[4] = 0;
    951                                 au4_ilf_across_tile_slice_enable[6] = 0;
    952                             }
    953                             else
    954                             {
    955                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
    956                             }
    957                             if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
    958                             {
    959                                 au4_ilf_across_tile_slice_enable[5] = 0;
    960                                 au4_ilf_across_tile_slice_enable[4] = 0;
    961                             }
    962                             else
    963                             {
    964                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    965                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    966                             }
    967                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    968                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
    969                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
    970                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
    971                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
    972 
    973                             if(au4_idx_tl[5] > idx_tl)
    974                             {
    975                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
    976                             }
    977 
    978                             /*
    979                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
    980                              * of the pixel having a greater address is checked. Accordingly, set the availability flags.
    981                              * Hence, for top and left pixels, current ctb flag is checked. For right and down pixels,
    982                              * the respective pixel's flags are checked
    983                              */
    984                             for(i = 0; i < 8; i++)
    985                             {
    986                                 /*Sets the edges that lie on the slice/tile boundary*/
    987                                 if(au4_idx_tl[i] != idx_tl)
    988                                 {
    989                                     au1_tile_slice_boundary[i] = 1;
    990                                 }
    991                                 else
    992                                 {
    993                                     au4_ilf_across_tile_slice_enable[i] = 1;
    994                                 }
    995                             }
    996 
    997                             ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_tl, 0, 8 * sizeof(WORD32));
    998                         }
    999 
   1000                         if(ps_pps->i1_tiles_enabled_flag)
   1001                         {
   1002                             /* Calculate availability flags at slice boundary */
   1003                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1004                             {
   1005                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1006                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1007                                 {
   1008                                     /*Set the boundary arrays*/
   1009                                     /*Calculate tile indices for neighbor pixels*/
   1010                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
   1011                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
   1012                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1013                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1014                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1015                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1016 
   1017                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
   1018                                     {
   1019                                         if(ps_sao_ctxt->i4_ctb_x == 1)
   1020                                         {
   1021                                             au4_idx_tl[6] = -1;
   1022                                             au4_idx_tl[4] = -1;
   1023                                         }
   1024                                         else
   1025                                         {
   1026                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1027                                         }
   1028                                         if(ps_sao_ctxt->i4_ctb_y == 1)
   1029                                         {
   1030                                             au4_idx_tl[5] = -1;
   1031                                             au4_idx_tl[4] = -1;
   1032                                         }
   1033                                         else
   1034                                         {
   1035                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1036                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1037                                         }
   1038                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1039                                     }
   1040                                     for(i = 0; i < 8; i++)
   1041                                     {
   1042                                         /*Sets the edges that lie on the tile boundary*/
   1043                                         if(au4_idx_tl[i] != idx_tl)
   1044                                         {
   1045                                             au1_tile_slice_boundary[i] |= 1;
   1046                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   1047                                         }
   1048                                     }
   1049                                 }
   1050                             }
   1051                         }
   1052 
   1053 
   1054                         /*Set availability flags based on tile and slice boundaries*/
   1055                         for(i = 0; i < 8; i++)
   1056                         {
   1057                             /*Sets the edges that lie on the slice/tile boundary*/
   1058                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1059                             {
   1060                                 au1_avail_luma[i] = 0;
   1061                             }
   1062                         }
   1063                     }
   1064                 }
   1065 
   1066                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
   1067                 {
   1068                     au1_avail_luma[0] = 0;
   1069                     au1_avail_luma[4] = 0;
   1070                     au1_avail_luma[6] = 0;
   1071                 }
   1072 
   1073                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   1074                 {
   1075                     au1_avail_luma[1] = 0;
   1076                     au1_avail_luma[5] = 0;
   1077                     au1_avail_luma[7] = 0;
   1078                 }
   1079                 //y==1 case
   1080                 if((0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma))
   1081                 {
   1082                     au1_avail_luma[2] = 0;
   1083                     au1_avail_luma[4] = 0;
   1084                     au1_avail_luma[5] = 0;
   1085                 }
   1086                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1087                 {
   1088                     au1_avail_luma[3] = 0;
   1089                     au1_avail_luma[6] = 0;
   1090                     au1_avail_luma[7] = 0;
   1091                 }
   1092 
   1093                 {
   1094                     au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
   1095                     u1_sao_src_top_left_luma_bot_left = pu1_src_left_luma[sao_ht_luma];
   1096                     ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   1097                                                                       src_strd,
   1098                                                                       pu1_src_left_luma,
   1099                                                                       pu1_src_top_luma,
   1100                                                                       pu1_sao_src_luma_top_left_ctb,
   1101                                                                       au1_src_top_right,
   1102                                                                       &u1_sao_src_top_left_luma_bot_left,
   1103                                                                       au1_avail_luma,
   1104                                                                       ai1_offset_y,
   1105                                                                       sao_wd_luma,
   1106                                                                       sao_ht_luma);
   1107                 }
   1108             }
   1109 
   1110         }
   1111         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1112         {
   1113             /* Update left, top and top-left */
   1114             for(row = 0; row < sao_ht_luma; row++)
   1115             {
   1116                 pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   1117             }
   1118             pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   1119 
   1120             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   1121         }
   1122 
   1123         if(ps_slice_hdr_top_left->i1_slice_sao_chroma_flag)
   1124         {
   1125             if(0 == ps_sao->b3_cb_type_idx)
   1126             {
   1127                 for(row = 0; row < sao_ht_chroma; row++)
   1128                 {
   1129                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   1130                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   1131                 }
   1132                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   1133                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   1134 
   1135                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   1136 
   1137             }
   1138 
   1139             else if(1 == ps_sao->b3_cb_type_idx)
   1140             {
   1141                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1142                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1143                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1144                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1145 
   1146                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1147                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1148                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1149                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1150 
   1151                 if(chroma_yuv420sp_vu)
   1152                 {
   1153                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1154                                                                                 src_strd,
   1155                                                                                 pu1_src_left_chroma,
   1156                                                                                 pu1_src_top_chroma,
   1157                                                                                 pu1_sao_src_chroma_top_left_ctb,
   1158                                                                                 ps_sao->b5_cr_band_pos,
   1159                                                                                 ps_sao->b5_cb_band_pos,
   1160                                                                                 ai1_offset_cr,
   1161                                                                                 ai1_offset_cb,
   1162                                                                                 sao_wd_chroma,
   1163                                                                                 sao_ht_chroma
   1164                                                                                );
   1165                 }
   1166                 else
   1167                 {
   1168                     ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1169                                                                                 src_strd,
   1170                                                                                 pu1_src_left_chroma,
   1171                                                                                 pu1_src_top_chroma,
   1172                                                                                 pu1_sao_src_chroma_top_left_ctb,
   1173                                                                                 ps_sao->b5_cb_band_pos,
   1174                                                                                 ps_sao->b5_cr_band_pos,
   1175                                                                                 ai1_offset_cb,
   1176                                                                                 ai1_offset_cr,
   1177                                                                                 sao_wd_chroma,
   1178                                                                                 sao_ht_chroma
   1179                                                                                );
   1180                 }
   1181             }
   1182 
   1183             else // if(2 <= ps_sao->b3_cb_type_idx)
   1184             {
   1185                 ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1186                 ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1187                 ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1188                 ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1189 
   1190                 ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1191                 ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1192                 ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1193                 ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1194                 for(i = 0; i < 8; i++)
   1195                 {
   1196                     au1_avail_chroma[i] = 255;
   1197                     au1_tile_slice_boundary[i] = 0;
   1198                     au4_idx_tl[i] = 0;
   1199                     au4_ilf_across_tile_slice_enable[i] = 1;
   1200                 }
   1201                 /*In case of slices*/
   1202                 {
   1203                     if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1204                     {
   1205                         if((0 == (1 << log2_ctb_size) - sao_wd_chroma) && (ps_sao_ctxt->i4_ctb_y > 1) && (ps_sao_ctxt->i4_ctb_x > 1))
   1206                         {
   1207                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 2;
   1208                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 2;
   1209                         }
   1210                         else if(!(0 == (1 << log2_ctb_size) - sao_wd_chroma))
   1211                         {
   1212                             ctby_tl_t = ps_sao_ctxt->i4_ctb_y - 1;
   1213                             ctbx_tl_l = ps_sao_ctxt->i4_ctb_x - 1;
   1214                         }
   1215                         ctbx_tl_t = ps_sao_ctxt->i4_ctb_x - 1;
   1216                         ctby_tl_l = ps_sao_ctxt->i4_ctb_y - 1;
   1217 
   1218                         ctbx_tl_r = ps_sao_ctxt->i4_ctb_x;
   1219                         ctby_tl_r = ps_sao_ctxt->i4_ctb_y - 1;
   1220 
   1221                         ctbx_tl_d =  ps_sao_ctxt->i4_ctb_x - 1;
   1222                         ctby_tl_d =  ps_sao_ctxt->i4_ctb_y;
   1223 
   1224                         ctbx_tl = ps_sao_ctxt->i4_ctb_x - 1;
   1225                         ctby_tl = ps_sao_ctxt->i4_ctb_y - 1;
   1226 
   1227                         if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1228                         {
   1229 
   1230                             idx_tl   = pu1_slice_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
   1231                             au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_slice_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
   1232                             au4_idx_tl[0] =  pu1_slice_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1233                             au4_idx_tl[1] = au4_idx_tl[5] = pu1_slice_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1234                             au4_idx_tl[3] = au4_idx_tl[6] =   pu1_slice_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1235                             au4_idx_tl[7] = pu1_slice_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1236 
   1237                             if((0 == (1 << log2_ctb_size) - sao_wd_chroma))
   1238                             {
   1239                                 if(ps_sao_ctxt->i4_ctb_x == 1)
   1240                                 {
   1241                                     au4_idx_tl[6] = -1;
   1242                                     au4_idx_tl[4] = -1;
   1243                                 }
   1244                                 else
   1245                                 {
   1246                                     au4_idx_tl[6] = pu1_slice_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1247                                 }
   1248                                 if(ps_sao_ctxt->i4_ctb_y == 1)
   1249                                 {
   1250                                     au4_idx_tl[5] = -1;
   1251                                     au4_idx_tl[4] = -1;
   1252                                 }
   1253                                 else
   1254                                 {
   1255                                     au4_idx_tl[5] = pu1_slice_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1256                                     au4_idx_tl[4] = pu1_slice_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1257                                 }
   1258                                 au4_idx_tl[7] = pu1_slice_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1259                             }
   1260 
   1261                             /* Verify that the neighbor ctbs don't cross pic boundary
   1262                              * Also, the ILF flag belonging to the higher pixel address (between neighbor and current pixels) must be assigned*/
   1263                             if((0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma))
   1264                             {
   1265                                 au4_ilf_across_tile_slice_enable[4] = 0;
   1266                                 au4_ilf_across_tile_slice_enable[6] = 0;
   1267                             }
   1268                             else
   1269                             {
   1270                                 au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_tl[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1271                             }
   1272                             if((0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma))
   1273                             {
   1274                                 au4_ilf_across_tile_slice_enable[5] = 0;
   1275                                 au4_ilf_across_tile_slice_enable[4] = 0;
   1276                             }
   1277                             else
   1278                             {
   1279                                 au4_ilf_across_tile_slice_enable[4] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1280                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_tl[5])->i1_slice_loop_filter_across_slices_enabled_flag;
   1281                             }
   1282                             au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1283                             au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_tl)->i1_slice_loop_filter_across_slices_enabled_flag;
   1284                             au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_tl[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1285                             au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_tl[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1286                             au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_tl[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1287                             /*
   1288                              * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1289                              * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1290                              */
   1291                             for(i = 0; i < 8; i++)
   1292                             {
   1293                                 /*Sets the edges that lie on the slice/tile boundary*/
   1294                                 if(au4_idx_tl[i] != idx_tl)
   1295                                 {
   1296                                     au1_tile_slice_boundary[i] = 1;
   1297                                 }
   1298                                 else
   1299                                 {
   1300                                     au4_ilf_across_tile_slice_enable[i] = 1;
   1301                                 }
   1302                             }
   1303 
   1304                             /*Reset indices*/
   1305                             for(i = 0; i < 8; i++)
   1306                             {
   1307                                 au4_idx_tl[i] = 0;
   1308                             }
   1309                         }
   1310                         if(ps_pps->i1_tiles_enabled_flag)
   1311                         {
   1312                             /* Calculate availability flags at slice boundary */
   1313                             if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1314                             {
   1315                                 /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1316                                 if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1317                                 {
   1318                                     /*Set the boundary arrays*/
   1319                                     /*Calculate tile indices for neighbor pixels*/
   1320                                     idx_tl   = pu1_tile_idx[ctbx_tl + (ctby_tl * ps_sps->i2_pic_wd_in_ctb)];
   1321                                     au4_idx_tl[2] = au4_idx_tl[4] = *(pu1_tile_idx + ctbx_tl_t + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb));
   1322                                     au4_idx_tl[0] =  pu1_tile_idx[ctbx_tl_l + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1323                                     au4_idx_tl[1] = au4_idx_tl[5] = pu1_tile_idx[ctbx_tl_r + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1324                                     au4_idx_tl[3] = au4_idx_tl[6] =   pu1_tile_idx[ctbx_tl_d + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1325                                     au4_idx_tl[7] = pu1_tile_idx[ctbx_tl_d + 1 + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1326 
   1327                                     if((0 == (1 << log2_ctb_size) - sao_wd_luma))
   1328                                     {
   1329                                         if(ps_sao_ctxt->i4_ctb_x == 1)
   1330                                         {
   1331                                             au4_idx_tl[6] = -1;
   1332                                             au4_idx_tl[4] = -1;
   1333                                         }
   1334                                         else
   1335                                         {
   1336                                             au4_idx_tl[6] = pu1_tile_idx[(ctbx_tl_d - 1) + (ctby_tl_r * ps_sps->i2_pic_wd_in_ctb)];
   1337                                         }
   1338                                         if(ps_sao_ctxt->i4_ctb_y == 1)
   1339                                         {
   1340                                             au4_idx_tl[5] = -1;
   1341                                             au4_idx_tl[4] = -1;
   1342                                         }
   1343                                         else
   1344                                         {
   1345                                             au4_idx_tl[5] = pu1_tile_idx[(ctbx_tl_l + 1) + (ctby_tl_l * ps_sps->i2_pic_wd_in_ctb)];
   1346                                             au4_idx_tl[4] = pu1_tile_idx[(ctbx_tl_t - 1) + (ctby_tl_t * ps_sps->i2_pic_wd_in_ctb)];
   1347                                         }
   1348                                         au4_idx_tl[7] = pu1_tile_idx[(ctbx_tl_d + 1) + (ctby_tl_d * ps_sps->i2_pic_wd_in_ctb)];
   1349                                     }
   1350                                     for(i = 0; i < 8; i++)
   1351                                     {
   1352                                         /*Sets the edges that lie on the tile boundary*/
   1353                                         if(au4_idx_tl[i] != idx_tl)
   1354                                         {
   1355                                             au1_tile_slice_boundary[i] |= 1;
   1356                                             au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   1357                                         }
   1358                                     }
   1359                                 }
   1360                             }
   1361                         }
   1362 
   1363                         for(i = 0; i < 8; i++)
   1364                         {
   1365                             /*Sets the edges that lie on the slice/tile boundary*/
   1366                             if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1367                             {
   1368                                 au1_avail_chroma[i] = 0;
   1369                             }
   1370                         }
   1371                     }
   1372                 }
   1373 
   1374                 if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
   1375                 {
   1376                     au1_avail_chroma[0] = 0;
   1377                     au1_avail_chroma[4] = 0;
   1378                     au1_avail_chroma[6] = 0;
   1379                 }
   1380                 if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   1381                 {
   1382                     au1_avail_chroma[1] = 0;
   1383                     au1_avail_chroma[5] = 0;
   1384                     au1_avail_chroma[7] = 0;
   1385                 }
   1386 
   1387                 if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
   1388                 {
   1389                     au1_avail_chroma[2] = 0;
   1390                     au1_avail_chroma[4] = 0;
   1391                     au1_avail_chroma[5] = 0;
   1392                 }
   1393                 if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1394                 {
   1395                     au1_avail_chroma[3] = 0;
   1396                     au1_avail_chroma[6] = 0;
   1397                     au1_avail_chroma[7] = 0;
   1398                 }
   1399 
   1400                 {
   1401                     au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
   1402                     au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
   1403                     au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_left_chroma[2 * sao_ht_chroma];
   1404                     au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_left_chroma[2 * sao_ht_chroma + 1];
   1405                     if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_y != ps_sps->i2_pic_ht_in_ctb - 1))
   1406                     {
   1407                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   1408                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   1409                     }
   1410 
   1411                     if(chroma_yuv420sp_vu)
   1412                     {
   1413                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   1414                                                                              src_strd,
   1415                                                                              pu1_src_left_chroma,
   1416                                                                              pu1_src_top_chroma,
   1417                                                                              pu1_sao_src_chroma_top_left_ctb,
   1418                                                                              au1_src_top_right,
   1419                                                                              au1_sao_src_top_left_chroma_bot_left,
   1420                                                                              au1_avail_chroma,
   1421                                                                              ai1_offset_cr,
   1422                                                                              ai1_offset_cb,
   1423                                                                              sao_wd_chroma,
   1424                                                                              sao_ht_chroma);
   1425                     }
   1426                     else
   1427                     {
   1428                         ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   1429                                                                              src_strd,
   1430                                                                              pu1_src_left_chroma,
   1431                                                                              pu1_src_top_chroma,
   1432                                                                              pu1_sao_src_chroma_top_left_ctb,
   1433                                                                              au1_src_top_right,
   1434                                                                              au1_sao_src_top_left_chroma_bot_left,
   1435                                                                              au1_avail_chroma,
   1436                                                                              ai1_offset_cb,
   1437                                                                              ai1_offset_cr,
   1438                                                                              sao_wd_chroma,
   1439                                                                              sao_ht_chroma);
   1440                     }
   1441                 }
   1442             }
   1443         }
   1444         else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1445         {
   1446             for(row = 0; row < sao_ht_chroma; row++)
   1447             {
   1448                 pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   1449                 pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   1450             }
   1451             pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   1452             pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   1453 
   1454             ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   1455         }
   1456 
   1457         pu1_src_luma += sao_wd_luma + sao_ht_luma * src_strd;
   1458         pu1_src_chroma += sao_wd_chroma + sao_ht_chroma * src_strd;
   1459         ps_sao += (1 + ps_sps->i2_pic_wd_in_ctb);
   1460     }
   1461 
   1462 
   1463     /* Top CTB */
   1464     if((ps_sao_ctxt->i4_ctb_y > 0))
   1465     {
   1466         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
   1467         WORD32 sao_wd_chroma = ctb_size - 2 * SAO_SHIFT_CTB;
   1468         WORD32 sao_ht_luma = SAO_SHIFT_CTB;
   1469         WORD32 sao_ht_chroma = SAO_SHIFT_CTB;
   1470 
   1471         WORD32 ctbx_t_t = 0, ctbx_t_l = 0, ctbx_t_r = 0, ctbx_t_d = 0, ctbx_t = 0;
   1472         WORD32 ctby_t_t = 0, ctby_t_l = 0, ctby_t_r = 0, ctby_t_d = 0, ctby_t = 0;
   1473         WORD32 au4_idx_t[8], idx_t;
   1474 
   1475         WORD32 remaining_cols;
   1476 
   1477         slice_header_t *ps_slice_hdr_top;
   1478         {
   1479             WORD32 top_ctb_indx = (ps_sao_ctxt->i4_ctb_y - 1) * ps_sps->i2_pic_wd_in_ctb +
   1480                                         (ps_sao_ctxt->i4_ctb_x);
   1481             ps_slice_hdr_top = ps_slice_hdr_base + pu1_slice_idx[top_ctb_indx];
   1482         }
   1483 
   1484         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
   1485         if(remaining_cols <= SAO_SHIFT_CTB)
   1486         {
   1487             sao_wd_luma += remaining_cols;
   1488         }
   1489         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
   1490         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   1491         {
   1492             sao_wd_chroma += remaining_cols;
   1493         }
   1494 
   1495         pu1_src_luma -= (sao_ht_luma * src_strd);
   1496         pu1_src_chroma -= (sao_ht_chroma * src_strd);
   1497         ps_sao -= (ps_sps->i2_pic_wd_in_ctb);
   1498         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   1499         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   1500         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_chroma;
   1501         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - (2 * sao_ht_chroma);
   1502 
   1503         if(0 != sao_wd_luma)
   1504         {
   1505             if(ps_slice_hdr_top->i1_slice_sao_luma_flag)
   1506             {
   1507                 if(0 == ps_sao->b3_y_type_idx)
   1508                 {
   1509                     /* Update left, top and top-left */
   1510                     for(row = 0; row < sao_ht_luma; row++)
   1511                     {
   1512                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   1513                     }
   1514                     pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   1515 
   1516                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   1517 
   1518                 }
   1519 
   1520                 else if(1 == ps_sao->b3_y_type_idx)
   1521                 {
   1522                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   1523                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   1524                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   1525                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   1526 
   1527                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   1528                                                                               src_strd,
   1529                                                                               pu1_src_left_luma,
   1530                                                                               pu1_src_top_luma,
   1531                                                                               pu1_sao_src_luma_top_left_ctb,
   1532                                                                               ps_sao->b5_y_band_pos,
   1533                                                                               ai1_offset_y,
   1534                                                                               sao_wd_luma,
   1535                                                                               sao_ht_luma
   1536                                                                              );
   1537                 }
   1538 
   1539                 else // if(2 <= ps_sao->b3_y_type_idx)
   1540                 {
   1541                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   1542                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   1543                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   1544                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   1545 
   1546                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_avail_luma, 255, 8);
   1547                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr(au1_tile_slice_boundary, 0, 8);
   1548                     ps_codec->s_func_selector.ihevc_memset_mul_8_fptr((UWORD8 *)au4_idx_t, 0, 8 * sizeof(WORD32));
   1549 
   1550                     for(i = 0; i < 8; i++)
   1551                     {
   1552 
   1553                         au4_ilf_across_tile_slice_enable[i] = 1;
   1554                     }
   1555                     /******************************************************************
   1556                      * Derive the  Top-left CTB's neighbor pixel's slice indices.
   1557                      *
   1558                      *               T_T
   1559                      *          ____________
   1560                      *         |    |       |
   1561                      *         | T_L|  T    |T_R
   1562                      *         |    | ______|____
   1563                      *         |    |  T_D  |    |
   1564                      *         |    |       |    |
   1565                      *         |____|_______|    |
   1566                      *              |            |
   1567                      *              |            |
   1568                      *              |____________|
   1569                      *
   1570                      *****************************************************************/
   1571 
   1572                     /*In case of slices*/
   1573                     {
   1574                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1575                         {
   1576 
   1577                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
   1578                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
   1579 
   1580                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
   1581                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
   1582 
   1583                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
   1584                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
   1585 
   1586                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
   1587                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
   1588 
   1589                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
   1590                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
   1591 
   1592                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1593                             {
   1594                                 /*Calculate neighbor ctb slice indices*/
   1595                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1596                                 {
   1597                                     au4_idx_t[0] = -1;
   1598                                     au4_idx_t[6] = -1;
   1599                                     au4_idx_t[4] = -1;
   1600                                 }
   1601                                 else
   1602                                 {
   1603                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1604                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1605                                 }
   1606                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1607                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1608                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1609                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1610 
   1611                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   1612                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1613                                 {
   1614                                     au4_ilf_across_tile_slice_enable[4] = 0;
   1615                                     au4_ilf_across_tile_slice_enable[6] = 0;
   1616                                     au4_ilf_across_tile_slice_enable[0] = 0;
   1617                                 }
   1618                                 else
   1619                                 {
   1620                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1621                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1622                                 }
   1623 
   1624 
   1625 
   1626                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1627                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1628                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1629                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1630                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1631 
   1632                                 if(au4_idx_t[6] < idx_t)
   1633                                 {
   1634                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1635                                 }
   1636 
   1637                                 /*
   1638                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1639                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1640                                  */
   1641 
   1642                                 for(i = 0; i < 8; i++)
   1643                                 {
   1644                                     /*Sets the edges that lie on the slice/tile boundary*/
   1645                                     if(au4_idx_t[i] != idx_t)
   1646                                     {
   1647                                         au1_tile_slice_boundary[i] = 1;
   1648                                         /*Check for slice flag at such boundaries*/
   1649                                     }
   1650                                     else
   1651                                     {
   1652                                         au4_ilf_across_tile_slice_enable[i] = 1;
   1653                                     }
   1654                                 }
   1655                                 /*Reset indices*/
   1656                                 for(i = 0; i < 8; i++)
   1657                                 {
   1658                                     au4_idx_t[i] = 0;
   1659                                 }
   1660                             }
   1661 
   1662                             if(ps_pps->i1_tiles_enabled_flag)
   1663                             {
   1664                                 /* Calculate availability flags at slice boundary */
   1665                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1666                                 {
   1667                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1668                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1669                                     {
   1670                                         /*Calculate neighbor ctb slice indices*/
   1671                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   1672                                         {
   1673                                             au4_idx_t[0] = -1;
   1674                                             au4_idx_t[6] = -1;
   1675                                             au4_idx_t[4] = -1;
   1676                                         }
   1677                                         else
   1678                                         {
   1679                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1680                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1681                                         }
   1682                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1683                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1684                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1685                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1686 
   1687                                         for(i = 0; i < 8; i++)
   1688                                         {
   1689                                             /*Sets the edges that lie on the tile boundary*/
   1690                                             if(au4_idx_t[i] != idx_t)
   1691                                             {
   1692                                                 au1_tile_slice_boundary[i] |= 1;
   1693                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   1694                                             }
   1695                                         }
   1696                                     }
   1697                                 }
   1698                             }
   1699 
   1700                             for(i = 0; i < 8; i++)
   1701                             {
   1702                                 /*Sets the edges that lie on the slice/tile boundary*/
   1703                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1704                                 {
   1705                                     au1_avail_luma[i] = 0;
   1706                                 }
   1707                             }
   1708                         }
   1709                     }
   1710 
   1711 
   1712                     if(0 == ps_sao_ctxt->i4_ctb_x)
   1713                     {
   1714                         au1_avail_luma[0] = 0;
   1715                         au1_avail_luma[4] = 0;
   1716                         au1_avail_luma[6] = 0;
   1717                     }
   1718 
   1719                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
   1720                     {
   1721                         au1_avail_luma[1] = 0;
   1722                         au1_avail_luma[5] = 0;
   1723                         au1_avail_luma[7] = 0;
   1724                     }
   1725 
   1726                     if(0 == (ps_sao_ctxt->i4_ctb_y << log2_ctb_size) - sao_ht_luma)
   1727                     {
   1728                         au1_avail_luma[2] = 0;
   1729                         au1_avail_luma[4] = 0;
   1730                         au1_avail_luma[5] = 0;
   1731                     }
   1732 
   1733                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   1734                     {
   1735                         au1_avail_luma[3] = 0;
   1736                         au1_avail_luma[6] = 0;
   1737                         au1_avail_luma[7] = 0;
   1738                     }
   1739 
   1740                     {
   1741                         au1_src_top_right[0] = pu1_sao_src_top_left_luma_top_right[0];
   1742                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
   1743                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   1744                                                                           src_strd,
   1745                                                                           pu1_src_left_luma,
   1746                                                                           pu1_src_top_luma,
   1747                                                                           pu1_sao_src_luma_top_left_ctb,
   1748                                                                           au1_src_top_right,
   1749                                                                           &u1_sao_src_top_left_luma_bot_left,
   1750                                                                           au1_avail_luma,
   1751                                                                           ai1_offset_y,
   1752                                                                           sao_wd_luma,
   1753                                                                           sao_ht_luma);
   1754                     }
   1755                 }
   1756             }
   1757             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1758             {
   1759                 /* Update left, top and top-left */
   1760                 for(row = 0; row < sao_ht_luma; row++)
   1761                 {
   1762                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   1763                 }
   1764                 pu1_sao_src_luma_top_left_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   1765 
   1766                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   1767             }
   1768         }
   1769 
   1770         if(0 != sao_wd_chroma)
   1771         {
   1772             if(ps_slice_hdr_top->i1_slice_sao_chroma_flag)
   1773             {
   1774                 if(0 == ps_sao->b3_cb_type_idx)
   1775                 {
   1776 
   1777                     for(row = 0; row < sao_ht_chroma; row++)
   1778                     {
   1779                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   1780                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   1781                     }
   1782                     pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   1783                     pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   1784 
   1785                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   1786 
   1787                 }
   1788 
   1789                 else if(1 == ps_sao->b3_cb_type_idx)
   1790                 {
   1791                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1792                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1793                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1794                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1795 
   1796                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1797                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1798                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1799                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1800 
   1801                     if(chroma_yuv420sp_vu)
   1802                     {
   1803                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1804                                                                                     src_strd,
   1805                                                                                     pu1_src_left_chroma,
   1806                                                                                     pu1_src_top_chroma,
   1807                                                                                     pu1_sao_src_chroma_top_left_ctb,
   1808                                                                                     ps_sao->b5_cr_band_pos,
   1809                                                                                     ps_sao->b5_cb_band_pos,
   1810                                                                                     ai1_offset_cr,
   1811                                                                                     ai1_offset_cb,
   1812                                                                                     sao_wd_chroma,
   1813                                                                                     sao_ht_chroma
   1814                                                                                    );
   1815                     }
   1816                     else
   1817                     {
   1818                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   1819                                                                                     src_strd,
   1820                                                                                     pu1_src_left_chroma,
   1821                                                                                     pu1_src_top_chroma,
   1822                                                                                     pu1_sao_src_chroma_top_left_ctb,
   1823                                                                                     ps_sao->b5_cb_band_pos,
   1824                                                                                     ps_sao->b5_cr_band_pos,
   1825                                                                                     ai1_offset_cb,
   1826                                                                                     ai1_offset_cr,
   1827                                                                                     sao_wd_chroma,
   1828                                                                                     sao_ht_chroma
   1829                                                                                    );
   1830                     }
   1831                 }
   1832                 else // if(2 <= ps_sao->b3_cb_type_idx)
   1833                 {
   1834                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   1835                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   1836                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   1837                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   1838 
   1839                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   1840                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   1841                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   1842                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   1843 
   1844                     for(i = 0; i < 8; i++)
   1845                     {
   1846                         au1_avail_chroma[i] = 255;
   1847                         au1_tile_slice_boundary[i] = 0;
   1848                         au4_idx_t[i] = 0;
   1849                         au4_ilf_across_tile_slice_enable[i] = 1;
   1850                     }
   1851 
   1852                     {
   1853                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   1854                         {
   1855                             ctbx_t_t = ps_sao_ctxt->i4_ctb_x;
   1856                             ctby_t_t = ps_sao_ctxt->i4_ctb_y - 1;
   1857 
   1858                             ctbx_t_l = ps_sao_ctxt->i4_ctb_x - 1;
   1859                             ctby_t_l = ps_sao_ctxt->i4_ctb_y - 1;
   1860 
   1861                             ctbx_t_r = ps_sao_ctxt->i4_ctb_x;
   1862                             ctby_t_r = ps_sao_ctxt->i4_ctb_y - 1;
   1863 
   1864                             ctbx_t_d =  ps_sao_ctxt->i4_ctb_x;
   1865                             ctby_t_d =  ps_sao_ctxt->i4_ctb_y;
   1866 
   1867                             ctbx_t = ps_sao_ctxt->i4_ctb_x;
   1868                             ctby_t = ps_sao_ctxt->i4_ctb_y - 1;
   1869 
   1870                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   1871                             {
   1872                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1873                                 {
   1874                                     au4_idx_t[0] = -1;
   1875                                     au4_idx_t[6] = -1;
   1876                                     au4_idx_t[4] = -1;
   1877                                 }
   1878                                 else
   1879                                 {
   1880                                     au4_idx_t[0] = au4_idx_t[4] = pu1_slice_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1881                                     au4_idx_t[6] = pu1_slice_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1882                                 }
   1883                                 idx_t   = pu1_slice_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1884                                 au4_idx_t[2] = au4_idx_t[5] = pu1_slice_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1885                                 au4_idx_t[1] = pu1_slice_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1886                                 au4_idx_t[3] = au4_idx_t[7] = pu1_slice_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1887 
   1888                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   1889 
   1890                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   1891                                 {
   1892                                     au4_ilf_across_tile_slice_enable[4] = 0;
   1893                                     au4_ilf_across_tile_slice_enable[6] = 0;
   1894                                     au4_ilf_across_tile_slice_enable[0] = 0;
   1895                                 }
   1896                                 else
   1897                                 {
   1898                                     au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1899                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_t[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   1900                                 }
   1901 
   1902                                 au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_t[5])->i1_slice_loop_filter_across_slices_enabled_flag;
   1903                                 au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1904                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_t[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   1905                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_t[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   1906                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_t[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   1907 
   1908                                 if(idx_t > au4_idx_t[6])
   1909                                 {
   1910                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_t)->i1_slice_loop_filter_across_slices_enabled_flag;
   1911                                 }
   1912 
   1913                                 /*
   1914                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   1915                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   1916                                  */
   1917                                 for(i = 0; i < 8; i++)
   1918                                 {
   1919                                     /*Sets the edges that lie on the slice/tile boundary*/
   1920                                     if(au4_idx_t[i] != idx_t)
   1921                                     {
   1922                                         au1_tile_slice_boundary[i] = 1;
   1923                                     }
   1924                                     else
   1925                                     {
   1926                                         /*Indicates that the neighbour belongs to same/dependent slice*/
   1927                                         au4_ilf_across_tile_slice_enable[i] = 1;
   1928                                     }
   1929                                 }
   1930                                 /*Reset indices*/
   1931                                 for(i = 0; i < 8; i++)
   1932                                 {
   1933                                     au4_idx_t[i] = 0;
   1934                                 }
   1935                             }
   1936                             if(ps_pps->i1_tiles_enabled_flag)
   1937                             {
   1938                                 /* Calculate availability flags at slice boundary */
   1939                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   1940                                 {
   1941                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   1942                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   1943                                     {
   1944                                         /*Calculate neighbor ctb slice indices*/
   1945                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   1946                                         {
   1947                                             au4_idx_t[0] = -1;
   1948                                             au4_idx_t[6] = -1;
   1949                                             au4_idx_t[4] = -1;
   1950                                         }
   1951                                         else
   1952                                         {
   1953                                             au4_idx_t[0] = au4_idx_t[4] = pu1_tile_idx[ctbx_t_l + (ctby_t_l * ps_sps->i2_pic_wd_in_ctb)];
   1954                                             au4_idx_t[6] = pu1_tile_idx[ctbx_t_d - 1 + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1955                                         }
   1956                                         idx_t   = pu1_tile_idx[ctbx_t + (ctby_t * ps_sps->i2_pic_wd_in_ctb)];
   1957                                         au4_idx_t[2] = au4_idx_t[5] = pu1_tile_idx[ctbx_t_t + (ctby_t_t * ps_sps->i2_pic_wd_in_ctb)];
   1958                                         au4_idx_t[1] = pu1_tile_idx[ctbx_t_r + (ctby_t_r * ps_sps->i2_pic_wd_in_ctb)];
   1959                                         au4_idx_t[3] = au4_idx_t[7] = pu1_tile_idx[ctbx_t_d + (ctby_t_d * ps_sps->i2_pic_wd_in_ctb)];
   1960 
   1961                                         for(i = 0; i < 8; i++)
   1962                                         {
   1963                                             /*Sets the edges that lie on the tile boundary*/
   1964                                             if(au4_idx_t[i] != idx_t)
   1965                                             {
   1966                                                 au1_tile_slice_boundary[i] |= 1;
   1967                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   1968                                             }
   1969                                         }
   1970                                     }
   1971                                 }
   1972                             }
   1973                             for(i = 0; i < 8; i++)
   1974                             {
   1975                                 /*Sets the edges that lie on the slice/tile boundary*/
   1976                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   1977                                 {
   1978                                     au1_avail_chroma[i] = 0;
   1979                                 }
   1980                             }
   1981 
   1982                         }
   1983                     }
   1984                     if(0 == ps_sao_ctxt->i4_ctb_x)
   1985                     {
   1986                         au1_avail_chroma[0] = 0;
   1987                         au1_avail_chroma[4] = 0;
   1988                         au1_avail_chroma[6] = 0;
   1989                     }
   1990 
   1991                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
   1992                     {
   1993                         au1_avail_chroma[1] = 0;
   1994                         au1_avail_chroma[5] = 0;
   1995                         au1_avail_chroma[7] = 0;
   1996                     }
   1997 
   1998                     if(0 == (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) - sao_ht_chroma)
   1999                     {
   2000                         au1_avail_chroma[2] = 0;
   2001                         au1_avail_chroma[4] = 0;
   2002                         au1_avail_chroma[5] = 0;
   2003                     }
   2004 
   2005                     if(ps_sps->i2_pic_ht_in_ctb == ps_sao_ctxt->i4_ctb_y)
   2006                     {
   2007                         au1_avail_chroma[3] = 0;
   2008                         au1_avail_chroma[6] = 0;
   2009                         au1_avail_chroma[7] = 0;
   2010                     }
   2011 
   2012                     {
   2013                         au1_src_top_right[0] = pu1_sao_src_top_left_chroma_top_right[0];
   2014                         au1_src_top_right[1] = pu1_sao_src_top_left_chroma_top_right[1];
   2015                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   2016                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   2017 
   2018                         if(chroma_yuv420sp_vu)
   2019                         {
   2020                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2021                                                                                  src_strd,
   2022                                                                                  pu1_src_left_chroma,
   2023                                                                                  pu1_src_top_chroma,
   2024                                                                                  pu1_sao_src_chroma_top_left_ctb,
   2025                                                                                  au1_src_top_right,
   2026                                                                                  au1_sao_src_top_left_chroma_bot_left,
   2027                                                                                  au1_avail_chroma,
   2028                                                                                  ai1_offset_cr,
   2029                                                                                  ai1_offset_cb,
   2030                                                                                  sao_wd_chroma,
   2031                                                                                  sao_ht_chroma);
   2032                         }
   2033                         else
   2034                         {
   2035                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2036                                                                                  src_strd,
   2037                                                                                  pu1_src_left_chroma,
   2038                                                                                  pu1_src_top_chroma,
   2039                                                                                  pu1_sao_src_chroma_top_left_ctb,
   2040                                                                                  au1_src_top_right,
   2041                                                                                  au1_sao_src_top_left_chroma_bot_left,
   2042                                                                                  au1_avail_chroma,
   2043                                                                                  ai1_offset_cb,
   2044                                                                                  ai1_offset_cr,
   2045                                                                                  sao_wd_chroma,
   2046                                                                                  sao_ht_chroma);
   2047                         }
   2048                     }
   2049 
   2050                 }
   2051             }
   2052             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2053             {
   2054                 for(row = 0; row < sao_ht_chroma; row++)
   2055                 {
   2056                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   2057                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   2058                 }
   2059                 pu1_sao_src_chroma_top_left_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   2060                 pu1_sao_src_chroma_top_left_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   2061 
   2062                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   2063             }
   2064         }
   2065 
   2066         pu1_src_luma += sao_ht_luma * src_strd;
   2067         pu1_src_chroma += sao_ht_chroma * src_strd;
   2068         ps_sao += (ps_sps->i2_pic_wd_in_ctb);
   2069     }
   2070 
   2071     /* Left CTB */
   2072     if(ps_sao_ctxt->i4_ctb_x > 0)
   2073     {
   2074         WORD32 sao_wd_luma = SAO_SHIFT_CTB;
   2075         WORD32 sao_wd_chroma = 2 * SAO_SHIFT_CTB;
   2076         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
   2077         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
   2078 
   2079         WORD32 ctbx_l_t = 0, ctbx_l_l = 0, ctbx_l_r = 0, ctbx_l_d = 0, ctbx_l = 0;
   2080         WORD32 ctby_l_t = 0, ctby_l_l = 0, ctby_l_r = 0, ctby_l_d = 0, ctby_l = 0;
   2081         WORD32 au4_idx_l[8], idx_l;
   2082 
   2083         WORD32 remaining_rows;
   2084         slice_header_t *ps_slice_hdr_left;
   2085         {
   2086             WORD32 left_ctb_indx = (ps_sao_ctxt->i4_ctb_y) * ps_sps->i2_pic_wd_in_ctb +
   2087                                         (ps_sao_ctxt->i4_ctb_x - 1);
   2088             ps_slice_hdr_left = ps_slice_hdr_base + pu1_slice_idx[left_ctb_indx];
   2089         }
   2090 
   2091         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
   2092         if(remaining_rows <= SAO_SHIFT_CTB)
   2093         {
   2094             sao_ht_luma += remaining_rows;
   2095         }
   2096         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
   2097         if(remaining_rows <= SAO_SHIFT_CTB)
   2098         {
   2099             sao_ht_chroma += remaining_rows;
   2100         }
   2101 
   2102         pu1_src_luma -= sao_wd_luma;
   2103         pu1_src_chroma -= sao_wd_chroma;
   2104         ps_sao -= 1;
   2105         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma;
   2106         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma;
   2107         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2108         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2109 
   2110 
   2111         if(0 != sao_ht_luma)
   2112         {
   2113             if(ps_slice_hdr_left->i1_slice_sao_luma_flag)
   2114             {
   2115                 if(0 == ps_sao->b3_y_type_idx)
   2116                 {
   2117                     /* Update left, top and top-left */
   2118                     for(row = 0; row < sao_ht_luma; row++)
   2119                     {
   2120                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2121                     }
   2122                     /*Update in next location*/
   2123                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2124 
   2125                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2126 
   2127                 }
   2128 
   2129                 else if(1 == ps_sao->b3_y_type_idx)
   2130                 {
   2131                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2132                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2133                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2134                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2135 
   2136                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   2137                                                                               src_strd,
   2138                                                                               pu1_src_left_luma,
   2139                                                                               pu1_src_top_luma,
   2140                                                                               pu1_sao_src_top_left_luma_curr_ctb,
   2141                                                                               ps_sao->b5_y_band_pos,
   2142                                                                               ai1_offset_y,
   2143                                                                               sao_wd_luma,
   2144                                                                               sao_ht_luma
   2145                                                                              );
   2146                 }
   2147 
   2148                 else // if(2 <= ps_sao->b3_y_type_idx)
   2149                 {
   2150                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2151                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2152                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2153                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2154 
   2155                     for(i = 0; i < 8; i++)
   2156                     {
   2157                         au1_avail_luma[i] = 255;
   2158                         au1_tile_slice_boundary[i] = 0;
   2159                         au4_idx_l[i] = 0;
   2160                         au4_ilf_across_tile_slice_enable[i] = 1;
   2161                     }
   2162                     /******************************************************************
   2163                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
   2164                      *
   2165                      *
   2166                      *          ____________
   2167                      *         |    |       |
   2168                      *         | L_T|       |
   2169                      *         |____|_______|____
   2170                      *         |    |       |    |
   2171                      *     L_L |  L |  L_R  |    |
   2172                      *         |____|_______|    |
   2173                      *              |            |
   2174                      *          L_D |            |
   2175                      *              |____________|
   2176                      *
   2177                      *****************************************************************/
   2178 
   2179                     /*In case of slices or tiles*/
   2180                     {
   2181                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2182                         {
   2183                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
   2184                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
   2185 
   2186                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
   2187                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
   2188 
   2189                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
   2190                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
   2191 
   2192                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
   2193                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
   2194 
   2195                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
   2196                             ctby_l = ps_sao_ctxt->i4_ctb_y;
   2197 
   2198                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2199                             {
   2200                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2201                                 {
   2202                                     au4_idx_l[2] = -1;
   2203                                     au4_idx_l[4] = -1;
   2204                                     au4_idx_l[5] = -1;
   2205                                 }
   2206                                 else
   2207                                 {
   2208                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2209                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2210                                 }
   2211                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2212                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2213                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2214                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2215 
   2216                                 /*Verify that the neighbor ctbs don't cross pic boundary.*/
   2217                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2218                                 {
   2219                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2220                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2221                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2222                                 }
   2223                                 else
   2224                                 {
   2225                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2226                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2227 
   2228                                 }
   2229                                 //TODO: ILF flag checks for [0] and [6] is missing.
   2230                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2231                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2232                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2233 
   2234                                 if(idx_l < au4_idx_l[5])
   2235                                 {
   2236                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
   2237                                 }
   2238 
   2239                                 /*
   2240                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2241                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2242                                  */
   2243                                 for(i = 0; i < 8; i++)
   2244                                 {
   2245                                     /*Sets the edges that lie on the slice/tile boundary*/
   2246                                     if(au4_idx_l[i] != idx_l)
   2247                                     {
   2248                                         au1_tile_slice_boundary[i] = 1;
   2249                                     }
   2250                                     else
   2251                                     {
   2252                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2253                                     }
   2254                                 }
   2255                                 /*Reset indices*/
   2256                                 for(i = 0; i < 8; i++)
   2257                                 {
   2258                                     au4_idx_l[i] = 0;
   2259                                 }
   2260                             }
   2261 
   2262                             if(ps_pps->i1_tiles_enabled_flag)
   2263                             {
   2264                                 /* Calculate availability flags at slice boundary */
   2265                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2266                                 {
   2267                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2268                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2269                                     {
   2270                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2271                                         {
   2272                                             au4_idx_l[2] = -1;
   2273                                             au4_idx_l[4] = -1;
   2274                                             au4_idx_l[5] = -1;
   2275                                         }
   2276                                         else
   2277                                         {
   2278                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2279                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2280                                         }
   2281 
   2282                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2283                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2284                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2285                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2286 
   2287                                         for(i = 0; i < 8; i++)
   2288                                         {
   2289                                             /*Sets the edges that lie on the slice/tile boundary*/
   2290                                             if(au4_idx_l[i] != idx_l)
   2291                                             {
   2292                                                 au1_tile_slice_boundary[i] |= 1;
   2293                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag;
   2294                                             }
   2295                                         }
   2296                                     }
   2297                                 }
   2298                             }
   2299 
   2300                             for(i = 0; i < 8; i++)
   2301                             {
   2302                                 /*Sets the edges that lie on the slice/tile boundary*/
   2303                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2304                                 {
   2305                                     au1_avail_luma[i] = 0;
   2306                                 }
   2307                             }
   2308                         }
   2309                     }
   2310                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_luma)
   2311                     {
   2312                         au1_avail_luma[0] = 0;
   2313                         au1_avail_luma[4] = 0;
   2314                         au1_avail_luma[6] = 0;
   2315                     }
   2316                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   2317                     {
   2318                         au1_avail_luma[1] = 0;
   2319                         au1_avail_luma[5] = 0;
   2320                         au1_avail_luma[7] = 0;
   2321                     }
   2322 
   2323                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2324                     {
   2325                         au1_avail_luma[2] = 0;
   2326                         au1_avail_luma[4] = 0;
   2327                         au1_avail_luma[5] = 0;
   2328                     }
   2329 
   2330                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
   2331                     {
   2332                         au1_avail_luma[3] = 0;
   2333                         au1_avail_luma[6] = 0;
   2334                         au1_avail_luma[7] = 0;
   2335                     }
   2336 
   2337                     {
   2338                         au1_src_top_right[0] = pu1_src_top_luma[sao_wd_luma];
   2339                         u1_sao_src_top_left_luma_bot_left = pu1_sao_src_top_left_luma_bot_left[0];
   2340                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   2341                                                                           src_strd,
   2342                                                                           pu1_src_left_luma,
   2343                                                                           pu1_src_top_luma,
   2344                                                                           pu1_sao_src_top_left_luma_curr_ctb,
   2345                                                                           au1_src_top_right,
   2346                                                                           &u1_sao_src_top_left_luma_bot_left,
   2347                                                                           au1_avail_luma,
   2348                                                                           ai1_offset_y,
   2349                                                                           sao_wd_luma,
   2350                                                                           sao_ht_luma);
   2351                     }
   2352 
   2353                 }
   2354             }
   2355             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2356             {
   2357                 /* Update left, top and top-left */
   2358                 for(row = 0; row < sao_ht_luma; row++)
   2359                 {
   2360                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2361                 }
   2362                 /*Update in next location*/
   2363                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2364 
   2365                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2366             }
   2367         }
   2368 
   2369         if(0 != sao_ht_chroma)
   2370         {
   2371             if(ps_slice_hdr_left->i1_slice_sao_chroma_flag)
   2372             {
   2373                 if(0 == ps_sao->b3_cb_type_idx)
   2374                 {
   2375                     for(row = 0; row < sao_ht_chroma; row++)
   2376                     {
   2377                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   2378                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   2379                     }
   2380                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   2381                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   2382 
   2383                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   2384                 }
   2385 
   2386                 else if(1 == ps_sao->b3_cb_type_idx)
   2387                 {
   2388                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   2389                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   2390                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   2391                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   2392 
   2393                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   2394                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   2395                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   2396                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   2397 
   2398                     if(chroma_yuv420sp_vu)
   2399                     {
   2400                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   2401                                                                                     src_strd,
   2402                                                                                     pu1_src_left_chroma,
   2403                                                                                     pu1_src_top_chroma,
   2404                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   2405                                                                                     ps_sao->b5_cr_band_pos,
   2406                                                                                     ps_sao->b5_cb_band_pos,
   2407                                                                                     ai1_offset_cr,
   2408                                                                                     ai1_offset_cb,
   2409                                                                                     sao_wd_chroma,
   2410                                                                                     sao_ht_chroma
   2411                                                                                    );
   2412                     }
   2413                     else
   2414                     {
   2415                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   2416                                                                                     src_strd,
   2417                                                                                     pu1_src_left_chroma,
   2418                                                                                     pu1_src_top_chroma,
   2419                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   2420                                                                                     ps_sao->b5_cb_band_pos,
   2421                                                                                     ps_sao->b5_cr_band_pos,
   2422                                                                                     ai1_offset_cb,
   2423                                                                                     ai1_offset_cr,
   2424                                                                                     sao_wd_chroma,
   2425                                                                                     sao_ht_chroma
   2426                                                                                    );
   2427                     }
   2428                 }
   2429 
   2430                 else // if(2 <= ps_sao->b3_cb_type_idx)
   2431                 {
   2432                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   2433                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   2434                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   2435                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   2436 
   2437                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   2438                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   2439                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   2440                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   2441 
   2442                     for(i = 0; i < 8; i++)
   2443                     {
   2444                         au1_avail_chroma[i] = 255;
   2445                         au1_tile_slice_boundary[i] = 0;
   2446                         au4_idx_l[i] = 0;
   2447                         au4_ilf_across_tile_slice_enable[i] = 1;
   2448                     }
   2449                     /*In case of slices*/
   2450                     {
   2451                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2452                         {
   2453                             ctbx_l_t = ps_sao_ctxt->i4_ctb_x - 1;
   2454                             ctby_l_t = ps_sao_ctxt->i4_ctb_y - 1;
   2455 
   2456                             ctbx_l_l = ps_sao_ctxt->i4_ctb_x - 1;
   2457                             ctby_l_l = ps_sao_ctxt->i4_ctb_y;
   2458 
   2459                             ctbx_l_r = ps_sao_ctxt->i4_ctb_x;
   2460                             ctby_l_r = ps_sao_ctxt->i4_ctb_y;
   2461 
   2462                             ctbx_l_d =  ps_sao_ctxt->i4_ctb_x - 1;
   2463                             ctby_l_d =  ps_sao_ctxt->i4_ctb_y;
   2464 
   2465                             ctbx_l = ps_sao_ctxt->i4_ctb_x - 1;
   2466                             ctby_l = ps_sao_ctxt->i4_ctb_y;
   2467 
   2468                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2469                             {
   2470                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2471                                 {
   2472                                     au4_idx_l[2] = -1;
   2473                                     au4_idx_l[4] = -1;
   2474                                     au4_idx_l[5] = -1;
   2475                                 }
   2476                                 else
   2477                                 {
   2478                                     au4_idx_l[2] = au4_idx_l[4] = pu1_slice_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2479                                     au4_idx_l[5] =  pu1_slice_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2480                                 }
   2481                                 idx_l   = au4_idx_l[6] = pu1_slice_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2482                                 au4_idx_l[0] = pu1_slice_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2483                                 au4_idx_l[1] = au4_idx_l[7] = pu1_slice_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2484                                 au4_idx_l[3] = pu1_slice_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2485 
   2486                                 /*Verify that the neighbour ctbs dont cross pic boundary.*/
   2487                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2488                                 {
   2489                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2490                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2491                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2492                                 }
   2493                                 else
   2494                                 {
   2495                                     au4_ilf_across_tile_slice_enable[2] =  (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2496                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2497                                 }
   2498 
   2499                                 if(au4_idx_l[5] > idx_l)
   2500                                 {
   2501                                     au4_ilf_across_tile_slice_enable[5] = (ps_slice_hdr_base + au4_idx_l[5])->i1_slice_loop_filter_across_slices_enabled_flag;
   2502                                 }
   2503 
   2504                                 //  au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_l)->i1_slice_loop_filter_across_slices_enabled_flag;
   2505                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_l[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2506                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_l[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2507                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_l[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2508                                 /*
   2509                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2510                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2511                                  */
   2512                                 for(i = 0; i < 8; i++)
   2513                                 {
   2514                                     /*Sets the edges that lie on the slice/tile boundary*/
   2515                                     if(au4_idx_l[i] != idx_l)
   2516                                     {
   2517                                         au1_tile_slice_boundary[i] = 1;
   2518                                     }
   2519                                     else
   2520                                     {
   2521                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2522                                     }
   2523                                 }
   2524                                 /*Reset indices*/
   2525                                 for(i = 0; i < 8; i++)
   2526                                 {
   2527                                     au4_idx_l[i] = 0;
   2528                                 }
   2529                             }
   2530                             if(ps_pps->i1_tiles_enabled_flag)
   2531                             {
   2532                                 /* Calculate availability flags at slice boundary */
   2533                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2534                                 {
   2535                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2536                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2537                                     {
   2538                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2539                                         {
   2540                                             au4_idx_l[2] = -1;
   2541                                             au4_idx_l[4] = -1;
   2542                                             au4_idx_l[5] = -1;
   2543                                         }
   2544                                         else
   2545                                         {
   2546                                             au4_idx_l[2] = au4_idx_l[4] = pu1_tile_idx[ctbx_l_t + (ctby_l_t * ps_sps->i2_pic_wd_in_ctb)];
   2547                                             au4_idx_l[5] =  pu1_tile_idx[ctbx_l_t + 1 + (ctby_l_t  * ps_sps->i2_pic_wd_in_ctb)];
   2548                                         }
   2549 
   2550                                         idx_l   = au4_idx_l[6] = pu1_tile_idx[ctbx_l + (ctby_l * ps_sps->i2_pic_wd_in_ctb)];
   2551                                         au4_idx_l[0] = pu1_tile_idx[ctbx_l_l + (ctby_l_l * ps_sps->i2_pic_wd_in_ctb)];
   2552                                         au4_idx_l[1] = au4_idx_l[7] = pu1_tile_idx[ctbx_l_r + (ctby_l_r * ps_sps->i2_pic_wd_in_ctb)];
   2553                                         au4_idx_l[3] = pu1_tile_idx[ctbx_l_d + (ctby_l_d * ps_sps->i2_pic_wd_in_ctb)];
   2554 
   2555                                         for(i = 0; i < 8; i++)
   2556                                         {
   2557                                             /*Sets the edges that lie on the slice/tile boundary*/
   2558                                             if(au4_idx_l[i] != idx_l)
   2559                                             {
   2560                                                 au1_tile_slice_boundary[i] |= 1;
   2561                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   2562                                             }
   2563                                         }
   2564                                     }
   2565                                 }
   2566                             }
   2567                             for(i = 0; i < 8; i++)
   2568                             {
   2569                                 /*Sets the edges that lie on the slice/tile boundary*/
   2570                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2571                                 {
   2572                                     au1_avail_chroma[i] = 0;
   2573                                 }
   2574                             }
   2575                         }
   2576                     }
   2577                     if(0 == (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) - sao_wd_chroma)
   2578                     {
   2579                         au1_avail_chroma[0] = 0;
   2580                         au1_avail_chroma[4] = 0;
   2581                         au1_avail_chroma[6] = 0;
   2582                     }
   2583 
   2584                     if(ps_sps->i2_pic_wd_in_ctb == ps_sao_ctxt->i4_ctb_x)
   2585                     {
   2586                         au1_avail_chroma[1] = 0;
   2587                         au1_avail_chroma[5] = 0;
   2588                         au1_avail_chroma[7] = 0;
   2589                     }
   2590 
   2591                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2592                     {
   2593                         au1_avail_chroma[2] = 0;
   2594                         au1_avail_chroma[4] = 0;
   2595                         au1_avail_chroma[5] = 0;
   2596                     }
   2597 
   2598                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
   2599                     {
   2600                         au1_avail_chroma[3] = 0;
   2601                         au1_avail_chroma[6] = 0;
   2602                         au1_avail_chroma[7] = 0;
   2603                     }
   2604 
   2605                     {
   2606                         au1_src_top_right[0] = pu1_src_top_chroma[sao_wd_chroma];
   2607                         au1_src_top_right[1] = pu1_src_top_chroma[sao_wd_chroma + 1];
   2608                         au1_src_bot_left[0] = pu1_sao_src_top_left_chroma_bot_left[0];
   2609                         au1_src_bot_left[1] = pu1_sao_src_top_left_chroma_bot_left[1];
   2610                         //au1_src_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   2611                         //au1_src_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   2612                         if((ctb_size == 16) && (ps_sao_ctxt->i4_ctb_x != ps_sps->i2_pic_wd_in_ctb - 1))
   2613                         {
   2614                             au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
   2615                             au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
   2616                         }
   2617 
   2618 
   2619                         if(chroma_yuv420sp_vu)
   2620                         {
   2621                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2622                                                                                  src_strd,
   2623                                                                                  pu1_src_left_chroma,
   2624                                                                                  pu1_src_top_chroma,
   2625                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   2626                                                                                  au1_src_top_right,
   2627                                                                                  au1_src_bot_left,
   2628                                                                                  au1_avail_chroma,
   2629                                                                                  ai1_offset_cr,
   2630                                                                                  ai1_offset_cb,
   2631                                                                                  sao_wd_chroma,
   2632                                                                                  sao_ht_chroma);
   2633                         }
   2634                         else
   2635                         {
   2636                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   2637                                                                                  src_strd,
   2638                                                                                  pu1_src_left_chroma,
   2639                                                                                  pu1_src_top_chroma,
   2640                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   2641                                                                                  au1_src_top_right,
   2642                                                                                  au1_src_bot_left,
   2643                                                                                  au1_avail_chroma,
   2644                                                                                  ai1_offset_cb,
   2645                                                                                  ai1_offset_cr,
   2646                                                                                  sao_wd_chroma,
   2647                                                                                  sao_ht_chroma);
   2648                         }
   2649                     }
   2650 
   2651                 }
   2652             }
   2653             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2654             {
   2655                 for(row = 0; row < sao_ht_chroma; row++)
   2656                 {
   2657                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   2658                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   2659                 }
   2660                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   2661                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   2662 
   2663                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   2664             }
   2665 
   2666         }
   2667         pu1_src_luma += sao_wd_luma;
   2668         pu1_src_chroma += sao_wd_chroma;
   2669         ps_sao += 1;
   2670     }
   2671 
   2672 
   2673     /* Current CTB */
   2674     {
   2675         WORD32 sao_wd_luma = ctb_size - SAO_SHIFT_CTB;
   2676         WORD32 sao_wd_chroma = ctb_size - SAO_SHIFT_CTB * 2;
   2677         WORD32 sao_ht_luma = ctb_size - SAO_SHIFT_CTB;
   2678         WORD32 sao_ht_chroma = ctb_size / 2 - SAO_SHIFT_CTB;
   2679         WORD32 ctbx_c_t = 0, ctbx_c_l = 0, ctbx_c_r = 0, ctbx_c_d = 0, ctbx_c = 0;
   2680         WORD32 ctby_c_t = 0, ctby_c_l = 0, ctby_c_r = 0, ctby_c_d = 0, ctby_c = 0;
   2681         WORD32 au4_idx_c[8], idx_c;
   2682 
   2683         WORD32 remaining_rows;
   2684         WORD32 remaining_cols;
   2685 
   2686         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_luma);
   2687         if(remaining_cols <= SAO_SHIFT_CTB)
   2688         {
   2689             sao_wd_luma += remaining_cols;
   2690         }
   2691         remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + sao_wd_chroma);
   2692         if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   2693         {
   2694             sao_wd_chroma += remaining_cols;
   2695         }
   2696 
   2697         remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + sao_ht_luma);
   2698         if(remaining_rows <= SAO_SHIFT_CTB)
   2699         {
   2700             sao_ht_luma += remaining_rows;
   2701         }
   2702         remaining_rows = ps_sps->i2_pic_height_in_luma_samples / 2 - ((ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 1)) + sao_ht_chroma);
   2703         if(remaining_rows <= SAO_SHIFT_CTB)
   2704         {
   2705             sao_ht_chroma += remaining_rows;
   2706         }
   2707 
   2708         pu1_src_top_luma = ps_sao_ctxt->pu1_sao_src_top_luma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   2709         pu1_src_top_chroma = ps_sao_ctxt->pu1_sao_src_top_chroma + (ps_sao_ctxt->i4_ctb_x << log2_ctb_size);
   2710         pu1_src_left_luma = ps_sao_ctxt->pu1_sao_src_left_luma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2711         pu1_src_left_chroma = ps_sao_ctxt->pu1_sao_src_left_chroma + (ps_sao_ctxt->i4_ctb_y << log2_ctb_size);
   2712 
   2713         if((0 != sao_wd_luma) && (0 != sao_ht_luma))
   2714         {
   2715             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
   2716             {
   2717                 if(0 == ps_sao->b3_y_type_idx)
   2718                 {
   2719                     /* Update left, top and top-left */
   2720                     for(row = 0; row < sao_ht_luma; row++)
   2721                     {
   2722                         pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2723                     }
   2724                     pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2725 
   2726                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2727 
   2728                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
   2729 
   2730                 }
   2731 
   2732                 else if(1 == ps_sao->b3_y_type_idx)
   2733                 {
   2734                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2735                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2736                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2737                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2738 
   2739                     ps_codec->s_func_selector.ihevc_sao_band_offset_luma_fptr(pu1_src_luma,
   2740                                                                               src_strd,
   2741                                                                               pu1_src_left_luma,
   2742                                                                               pu1_src_top_luma,
   2743                                                                               pu1_sao_src_top_left_luma_curr_ctb,
   2744                                                                               ps_sao->b5_y_band_pos,
   2745                                                                               ai1_offset_y,
   2746                                                                               sao_wd_luma,
   2747                                                                               sao_ht_luma
   2748                                                                              );
   2749                 }
   2750 
   2751                 else // if(2 <= ps_sao->b3_y_type_idx)
   2752                 {
   2753                     ai1_offset_y[1] = ps_sao->b4_y_offset_1;
   2754                     ai1_offset_y[2] = ps_sao->b4_y_offset_2;
   2755                     ai1_offset_y[3] = ps_sao->b4_y_offset_3;
   2756                     ai1_offset_y[4] = ps_sao->b4_y_offset_4;
   2757 
   2758                     for(i = 0; i < 8; i++)
   2759                     {
   2760                         au1_avail_luma[i] = 255;
   2761                         au1_tile_slice_boundary[i] = 0;
   2762                         au4_idx_c[i] = 0;
   2763                         au4_ilf_across_tile_slice_enable[i] = 1;
   2764                     }
   2765                     /******************************************************************
   2766                      * Derive the  Top-left CTB's neighbour pixel's slice indices.
   2767                      *
   2768                      *
   2769                      *          ____________
   2770                      *         |    |       |
   2771                      *         |    | C_T   |
   2772                      *         |____|_______|____
   2773                      *         |    |       |    |
   2774                      *         | C_L|   C   | C_R|
   2775                      *         |____|_______|    |
   2776                      *              |  C_D       |
   2777                      *              |            |
   2778                      *              |____________|
   2779                      *
   2780                      *****************************************************************/
   2781 
   2782                     /*In case of slices*/
   2783                     {
   2784                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2785                         {
   2786                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
   2787                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
   2788 
   2789                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
   2790                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
   2791 
   2792                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
   2793                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
   2794 
   2795                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
   2796                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
   2797 
   2798                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
   2799                             ctby_c = ps_sao_ctxt->i4_ctb_y;
   2800 
   2801                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   2802                             {
   2803                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   2804                                 {
   2805                                     au4_idx_c[6] = -1;
   2806                                     au4_idx_c[0] = -1;
   2807                                     au4_idx_c[4] = -1;
   2808                                 }
   2809                                 else
   2810                                 {
   2811                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   2812                                 }
   2813 
   2814                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2815                                 {
   2816                                     au4_idx_c[2] = -1;
   2817                                     au4_idx_c[5] = -1;
   2818                                     au4_idx_c[4] = -1;
   2819                                 }
   2820                                 else
   2821                                 {
   2822                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   2823                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   2824                                 }
   2825                                 idx_c   = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   2826                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   2827                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   2828 
   2829                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   2830                                 {
   2831                                     au4_ilf_across_tile_slice_enable[6] = 0;
   2832                                     au4_ilf_across_tile_slice_enable[0] = 0;
   2833                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2834                                 }
   2835                                 else
   2836                                 {
   2837                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   2838                                     au4_ilf_across_tile_slice_enable[0] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;;
   2839                                 }
   2840                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   2841                                 {
   2842                                     au4_ilf_across_tile_slice_enable[2] = 0;
   2843                                     au4_ilf_across_tile_slice_enable[4] = 0;
   2844                                     au4_ilf_across_tile_slice_enable[5] = 0;
   2845                                 }
   2846                                 else
   2847                                 {
   2848                                     au4_ilf_across_tile_slice_enable[2] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   2849                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   2850                                 }
   2851                                 au4_ilf_across_tile_slice_enable[1] = (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   2852                                 au4_ilf_across_tile_slice_enable[3] = (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   2853                                 au4_ilf_across_tile_slice_enable[7] = (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   2854 
   2855                                 if(au4_idx_c[6] < idx_c)
   2856                                 {
   2857                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   2858                                 }
   2859 
   2860                                 /*
   2861                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   2862                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   2863                                  */
   2864                                 for(i = 0; i < 8; i++)
   2865                                 {
   2866                                     /*Sets the edges that lie on the slice/tile boundary*/
   2867                                     if(au4_idx_c[i] != idx_c)
   2868                                     {
   2869                                         au1_tile_slice_boundary[i] = 1;
   2870                                     }
   2871                                     else
   2872                                     {
   2873                                         au4_ilf_across_tile_slice_enable[i] = 1;
   2874                                     }
   2875                                 }
   2876                                 /*Reset indices*/
   2877                                 for(i = 0; i < 8; i++)
   2878                                 {
   2879                                     au4_idx_c[i] = 0;
   2880                                 }
   2881                             }
   2882 
   2883                             if(ps_pps->i1_tiles_enabled_flag)
   2884                             {
   2885                                 /* Calculate availability flags at slice boundary */
   2886                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   2887                                 {
   2888                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   2889                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   2890                                     {
   2891                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   2892                                         {
   2893                                             au4_idx_c[6] = -1;
   2894                                             au4_idx_c[0] = -1;
   2895                                             au4_idx_c[4] = -1;
   2896                                         }
   2897                                         else
   2898                                         {
   2899                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   2900                                         }
   2901 
   2902                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   2903                                         {
   2904                                             au4_idx_c[2] = -1;
   2905                                             au4_idx_c[5] = -1;
   2906                                             au4_idx_c[4] = -1;
   2907                                         }
   2908                                         else
   2909                                         {
   2910                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   2911                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   2912                                         }
   2913                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   2914                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   2915                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   2916 
   2917                                         for(i = 0; i < 8; i++)
   2918                                         {
   2919                                             /*Sets the edges that lie on the slice/tile boundary*/
   2920                                             if(au4_idx_c[i] != idx_c)
   2921                                             {
   2922                                                 au1_tile_slice_boundary[i] |= 1;
   2923                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   2924                                             }
   2925                                         }
   2926                                     }
   2927                                 }
   2928                             }
   2929 
   2930                             for(i = 0; i < 8; i++)
   2931                             {
   2932                                 /*Sets the edges that lie on the slice/tile boundary*/
   2933                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   2934                                 {
   2935                                     au1_avail_luma[i] = 0;
   2936                                 }
   2937                             }
   2938 
   2939                         }
   2940                     }
   2941                     if(0 == ps_sao_ctxt->i4_ctb_x)
   2942                     {
   2943                         au1_avail_luma[0] = 0;
   2944                         au1_avail_luma[4] = 0;
   2945                         au1_avail_luma[6] = 0;
   2946                     }
   2947 
   2948                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_luma)
   2949                     {
   2950                         au1_avail_luma[1] = 0;
   2951                         au1_avail_luma[5] = 0;
   2952                         au1_avail_luma[7] = 0;
   2953                     }
   2954 
   2955                     if(0 == ps_sao_ctxt->i4_ctb_y)
   2956                     {
   2957                         au1_avail_luma[2] = 0;
   2958                         au1_avail_luma[4] = 0;
   2959                         au1_avail_luma[5] = 0;
   2960                     }
   2961 
   2962                     if(ps_sps->i2_pic_height_in_luma_samples - (ps_sao_ctxt->i4_ctb_y  << log2_ctb_size) <= sao_ht_luma)
   2963                     {
   2964                         au1_avail_luma[3] = 0;
   2965                         au1_avail_luma[6] = 0;
   2966                         au1_avail_luma[7] = 0;
   2967                     }
   2968 
   2969                     {
   2970                         au1_src_top_right[0] = pu1_src_luma[sao_wd_luma - src_strd];
   2971                         u1_sao_src_top_left_luma_bot_left = pu1_src_luma[sao_ht_luma * src_strd - 1];
   2972 
   2973                         ps_codec->apf_sao_luma[ps_sao->b3_y_type_idx - 2](pu1_src_luma,
   2974                                                                           src_strd,
   2975                                                                           pu1_src_left_luma,
   2976                                                                           pu1_src_top_luma,
   2977                                                                           pu1_sao_src_top_left_luma_curr_ctb,
   2978                                                                           au1_src_top_right,
   2979                                                                           &u1_sao_src_top_left_luma_bot_left,
   2980                                                                           au1_avail_luma,
   2981                                                                           ai1_offset_y,
   2982                                                                           sao_wd_luma,
   2983                                                                           sao_ht_luma);
   2984                     }
   2985                     pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
   2986                     pu1_sao_src_top_left_luma_bot_left[0] = pu1_src_luma[(sao_ht_luma)*src_strd + sao_wd_luma - 1];
   2987                 }
   2988             }
   2989             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   2990             {
   2991                 /* Update left, top and top-left */
   2992                 for(row = 0; row < sao_ht_luma; row++)
   2993                 {
   2994                     pu1_src_left_luma[row] = pu1_src_luma[row * src_strd + (sao_wd_luma - 1)];
   2995                 }
   2996                 pu1_sao_src_top_left_luma_curr_ctb[0] = pu1_src_top_luma[sao_wd_luma - 1];
   2997 
   2998                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_luma, &pu1_src_luma[(sao_ht_luma - 1) * src_strd], sao_wd_luma);
   2999 
   3000                 pu1_sao_src_top_left_luma_top_right[0] = pu1_src_luma[(sao_ht_luma - 1) * src_strd + sao_wd_luma];
   3001             }
   3002         }
   3003 
   3004         if((0 != sao_wd_chroma) && (0 != sao_ht_chroma))
   3005         {
   3006             if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
   3007             {
   3008                 if(0 == ps_sao->b3_cb_type_idx)
   3009                 {
   3010                     for(row = 0; row < sao_ht_chroma; row++)
   3011                     {
   3012                         pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   3013                         pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   3014                     }
   3015                     pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   3016                     pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   3017 
   3018                     ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   3019 
   3020                     pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
   3021                     pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
   3022                 }
   3023 
   3024                 else if(1 == ps_sao->b3_cb_type_idx)
   3025                 {
   3026                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   3027                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   3028                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   3029                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   3030 
   3031                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   3032                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   3033                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   3034                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   3035 
   3036                     if(chroma_yuv420sp_vu)
   3037                     {
   3038                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   3039                                                                                     src_strd,
   3040                                                                                     pu1_src_left_chroma,
   3041                                                                                     pu1_src_top_chroma,
   3042                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   3043                                                                                     ps_sao->b5_cr_band_pos,
   3044                                                                                     ps_sao->b5_cb_band_pos,
   3045                                                                                     ai1_offset_cr,
   3046                                                                                     ai1_offset_cb,
   3047                                                                                     sao_wd_chroma,
   3048                                                                                     sao_ht_chroma
   3049                                                                                    );
   3050                     }
   3051                     else
   3052                     {
   3053                         ps_codec->s_func_selector.ihevc_sao_band_offset_chroma_fptr(pu1_src_chroma,
   3054                                                                                     src_strd,
   3055                                                                                     pu1_src_left_chroma,
   3056                                                                                     pu1_src_top_chroma,
   3057                                                                                     pu1_sao_src_top_left_chroma_curr_ctb,
   3058                                                                                     ps_sao->b5_cb_band_pos,
   3059                                                                                     ps_sao->b5_cr_band_pos,
   3060                                                                                     ai1_offset_cb,
   3061                                                                                     ai1_offset_cr,
   3062                                                                                     sao_wd_chroma,
   3063                                                                                     sao_ht_chroma
   3064                                                                                    );
   3065                     }
   3066                 }
   3067 
   3068                 else // if(2 <= ps_sao->b3_cb_type_idx)
   3069                 {
   3070                     ai1_offset_cb[1] = ps_sao->b4_cb_offset_1;
   3071                     ai1_offset_cb[2] = ps_sao->b4_cb_offset_2;
   3072                     ai1_offset_cb[3] = ps_sao->b4_cb_offset_3;
   3073                     ai1_offset_cb[4] = ps_sao->b4_cb_offset_4;
   3074 
   3075                     ai1_offset_cr[1] = ps_sao->b4_cr_offset_1;
   3076                     ai1_offset_cr[2] = ps_sao->b4_cr_offset_2;
   3077                     ai1_offset_cr[3] = ps_sao->b4_cr_offset_3;
   3078                     ai1_offset_cr[4] = ps_sao->b4_cr_offset_4;
   3079 
   3080                     for(i = 0; i < 8; i++)
   3081                     {
   3082                         au1_avail_chroma[i] = 255;
   3083                         au1_tile_slice_boundary[i] = 0;
   3084                         au4_idx_c[i] = 0;
   3085                         au4_ilf_across_tile_slice_enable[i] = 1;
   3086                     }
   3087                     {
   3088                         if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   3089                         {
   3090                             ctbx_c_t = ps_sao_ctxt->i4_ctb_x;
   3091                             ctby_c_t = ps_sao_ctxt->i4_ctb_y - 1;
   3092 
   3093                             ctbx_c_l = ps_sao_ctxt->i4_ctb_x - 1;
   3094                             ctby_c_l = ps_sao_ctxt->i4_ctb_y;
   3095 
   3096                             ctbx_c_r = ps_sao_ctxt->i4_ctb_x;
   3097                             ctby_c_r = ps_sao_ctxt->i4_ctb_y;
   3098 
   3099                             ctbx_c_d =  ps_sao_ctxt->i4_ctb_x;
   3100                             ctby_c_d =  ps_sao_ctxt->i4_ctb_y;
   3101 
   3102                             ctbx_c = ps_sao_ctxt->i4_ctb_x;
   3103                             ctby_c = ps_sao_ctxt->i4_ctb_y;
   3104 
   3105                             if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
   3106                             {
   3107                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   3108                                 {
   3109                                     au4_idx_c[0] = -1;
   3110                                     au4_idx_c[4] = -1;
   3111                                     au4_idx_c[6] = -1;
   3112                                 }
   3113                                 else
   3114                                 {
   3115                                     au4_idx_c[0] =  au4_idx_c[6] = pu1_slice_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   3116                                 }
   3117 
   3118                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   3119                                 {
   3120                                     au4_idx_c[2] = -1;
   3121                                     au4_idx_c[4] = -1;
   3122                                     au4_idx_c[5] = -1;
   3123                                 }
   3124                                 else
   3125                                 {
   3126                                     au4_idx_c[2] = au4_idx_c[5] = pu1_slice_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   3127                                     au4_idx_c[4] =  pu1_slice_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   3128                                 }
   3129                                 idx_c = pu1_slice_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   3130                                 au4_idx_c[1] = au4_idx_c[7] = pu1_slice_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   3131                                 au4_idx_c[3] = pu1_slice_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   3132 
   3133                                 if(0 == ps_sao_ctxt->i4_ctb_x)
   3134                                 {
   3135                                     au4_ilf_across_tile_slice_enable[0] = 0;
   3136                                     au4_ilf_across_tile_slice_enable[4] = 0;
   3137                                     au4_ilf_across_tile_slice_enable[6] = 0;
   3138                                 }
   3139                                 else
   3140                                 {
   3141                                     au4_ilf_across_tile_slice_enable[6] &= (ps_slice_hdr_base + au4_idx_c[6])->i1_slice_loop_filter_across_slices_enabled_flag;
   3142                                     au4_ilf_across_tile_slice_enable[0] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   3143                                 }
   3144 
   3145                                 if(0 == ps_sao_ctxt->i4_ctb_y)
   3146                                 {
   3147                                     au4_ilf_across_tile_slice_enable[2] = 0;
   3148                                     au4_ilf_across_tile_slice_enable[4] = 0;
   3149                                     au4_ilf_across_tile_slice_enable[5] = 0;
   3150                                 }
   3151                                 else
   3152                                 {
   3153                                     au4_ilf_across_tile_slice_enable[2] &= (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   3154                                     au4_ilf_across_tile_slice_enable[5] = au4_ilf_across_tile_slice_enable[4] = au4_ilf_across_tile_slice_enable[2];
   3155                                 }
   3156 
   3157                                 au4_ilf_across_tile_slice_enable[1] &= (ps_slice_hdr_base + au4_idx_c[1])->i1_slice_loop_filter_across_slices_enabled_flag;
   3158                                 au4_ilf_across_tile_slice_enable[3] &= (ps_slice_hdr_base + au4_idx_c[3])->i1_slice_loop_filter_across_slices_enabled_flag;
   3159                                 au4_ilf_across_tile_slice_enable[7] &= (ps_slice_hdr_base + au4_idx_c[7])->i1_slice_loop_filter_across_slices_enabled_flag;
   3160 
   3161                                 if(idx_c > au4_idx_c[6])
   3162                                 {
   3163                                     au4_ilf_across_tile_slice_enable[6] = (ps_slice_hdr_base + idx_c)->i1_slice_loop_filter_across_slices_enabled_flag;
   3164                                 }
   3165 
   3166                                 /*
   3167                                  * Between each neighbor and the current CTB, the i1_slice_loop_filter_across_slices_enabled_flag
   3168                                  * of the pixel having a greater address is checked. Accordingly, set the availability flags
   3169                                  */
   3170                                 for(i = 0; i < 8; i++)
   3171                                 {
   3172                                     /*Sets the edges that lie on the slice/tile boundary*/
   3173                                     if(au4_idx_c[i] != idx_c)
   3174                                     {
   3175                                         au1_tile_slice_boundary[i] = 1;
   3176                                     }
   3177                                     else
   3178                                     {
   3179                                         au4_ilf_across_tile_slice_enable[i] = 1;
   3180                                     }
   3181                                 }
   3182                                 /*Reset indices*/
   3183                                 for(i = 0; i < 8; i++)
   3184                                 {
   3185                                     au4_idx_c[i] = 0;
   3186                                 }
   3187                             }
   3188 
   3189                             if(ps_pps->i1_tiles_enabled_flag)
   3190                             {
   3191                                 /* Calculate availability flags at slice boundary */
   3192                                 if(((ps_tile->u1_pos_x == ps_sao_ctxt->i4_ctb_x) || (ps_tile->u1_pos_y == ps_sao_ctxt->i4_ctb_y)) && (!((0 == ps_tile->u1_pos_x) && (0 == ps_tile->u1_pos_y))))
   3193                                 {
   3194                                     /*If ilf across tiles is enabled, boundary availability for tiles is not checked. */
   3195                                     if(!ps_pps->i1_loop_filter_across_tiles_enabled_flag)
   3196                                     {
   3197                                         if(0 == ps_sao_ctxt->i4_ctb_x)
   3198                                         {
   3199                                             au4_idx_c[6] = -1;
   3200                                             au4_idx_c[0] = -1;
   3201                                             au4_idx_c[4] = -1;
   3202                                         }
   3203                                         else
   3204                                         {
   3205                                             au4_idx_c[0] =  au4_idx_c[6] = pu1_tile_idx[ctbx_c_l + (ctby_c_l * ps_sps->i2_pic_wd_in_ctb)];
   3206                                         }
   3207 
   3208                                         if(0 == ps_sao_ctxt->i4_ctb_y)
   3209                                         {
   3210                                             au4_idx_c[2] = -1;
   3211                                             au4_idx_c[5] = -1;
   3212                                             au4_idx_c[4] = -1;
   3213                                         }
   3214                                         else
   3215                                         {
   3216                                             au4_idx_c[4] =  pu1_tile_idx[ctbx_c_t - 1 + (ctby_c_t  * ps_sps->i2_pic_wd_in_ctb)];
   3217                                             au4_idx_c[2] = au4_idx_c[5] = pu1_tile_idx[ctbx_c_t + (ctby_c_t * ps_sps->i2_pic_wd_in_ctb)];
   3218                                         }
   3219                                         idx_c   = pu1_tile_idx[ctbx_c + (ctby_c * ps_sps->i2_pic_wd_in_ctb)];
   3220                                         au4_idx_c[1] = au4_idx_c[7] = pu1_tile_idx[ctbx_c_r + (ctby_c_r * ps_sps->i2_pic_wd_in_ctb)];
   3221                                         au4_idx_c[3] = pu1_tile_idx[ctbx_c_d + (ctby_c_d * ps_sps->i2_pic_wd_in_ctb)];
   3222 
   3223                                         for(i = 0; i < 8; i++)
   3224                                         {
   3225                                             /*Sets the edges that lie on the slice/tile boundary*/
   3226                                             if(au4_idx_c[i] != idx_c)
   3227                                             {
   3228                                                 au1_tile_slice_boundary[i] |= 1;
   3229                                                 au4_ilf_across_tile_slice_enable[i] &= ps_pps->i1_loop_filter_across_tiles_enabled_flag; //=0
   3230                                             }
   3231                                         }
   3232                                     }
   3233                                 }
   3234                             }
   3235 
   3236                             for(i = 0; i < 8; i++)
   3237                             {
   3238                                 /*Sets the edges that lie on the slice/tile boundary*/
   3239                                 if((au1_tile_slice_boundary[i]) && !(au4_ilf_across_tile_slice_enable[i]))
   3240                                 {
   3241                                     au1_avail_chroma[i] = 0;
   3242                                 }
   3243                             }
   3244                         }
   3245                     }
   3246 
   3247                     if(0 == ps_sao_ctxt->i4_ctb_x)
   3248                     {
   3249                         au1_avail_chroma[0] = 0;
   3250                         au1_avail_chroma[4] = 0;
   3251                         au1_avail_chroma[6] = 0;
   3252                     }
   3253 
   3254                     if(ps_sps->i2_pic_width_in_luma_samples - (ps_sao_ctxt->i4_ctb_x << log2_ctb_size) <= sao_wd_chroma)
   3255                     {
   3256                         au1_avail_chroma[1] = 0;
   3257                         au1_avail_chroma[5] = 0;
   3258                         au1_avail_chroma[7] = 0;
   3259                     }
   3260 
   3261                     if(0 == ps_sao_ctxt->i4_ctb_y)
   3262                     {
   3263                         au1_avail_chroma[2] = 0;
   3264                         au1_avail_chroma[4] = 0;
   3265                         au1_avail_chroma[5] = 0;
   3266                     }
   3267 
   3268                     if(ps_sps->i2_pic_height_in_luma_samples / 2 - (ps_sao_ctxt->i4_ctb_y  << (log2_ctb_size - 1)) <= sao_ht_chroma)
   3269                     {
   3270                         au1_avail_chroma[3] = 0;
   3271                         au1_avail_chroma[6] = 0;
   3272                         au1_avail_chroma[7] = 0;
   3273                     }
   3274 
   3275                     {
   3276                         au1_src_top_right[0] = pu1_src_chroma[sao_wd_chroma - src_strd];
   3277                         au1_src_top_right[1] = pu1_src_chroma[sao_wd_chroma - src_strd + 1];
   3278 
   3279                         au1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[sao_ht_chroma * src_strd - 2];
   3280                         au1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[sao_ht_chroma * src_strd - 1];
   3281 
   3282                         if(chroma_yuv420sp_vu)
   3283                         {
   3284                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   3285                                                                                  src_strd,
   3286                                                                                  pu1_src_left_chroma,
   3287                                                                                  pu1_src_top_chroma,
   3288                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   3289                                                                                  au1_src_top_right,
   3290                                                                                  au1_sao_src_top_left_chroma_bot_left,
   3291                                                                                  au1_avail_chroma,
   3292                                                                                  ai1_offset_cr,
   3293                                                                                  ai1_offset_cb,
   3294                                                                                  sao_wd_chroma,
   3295                                                                                  sao_ht_chroma);
   3296                         }
   3297                         else
   3298                         {
   3299                             ps_codec->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](pu1_src_chroma,
   3300                                                                                  src_strd,
   3301                                                                                  pu1_src_left_chroma,
   3302                                                                                  pu1_src_top_chroma,
   3303                                                                                  pu1_sao_src_top_left_chroma_curr_ctb,
   3304                                                                                  au1_src_top_right,
   3305                                                                                  au1_sao_src_top_left_chroma_bot_left,
   3306                                                                                  au1_avail_chroma,
   3307                                                                                  ai1_offset_cb,
   3308                                                                                  ai1_offset_cr,
   3309                                                                                  sao_wd_chroma,
   3310                                                                                  sao_ht_chroma);
   3311                         }
   3312                     }
   3313 
   3314                 }
   3315                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
   3316                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
   3317 
   3318                 pu1_sao_src_top_left_chroma_bot_left[0] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 2];
   3319                 pu1_sao_src_top_left_chroma_bot_left[1] = pu1_src_chroma[(sao_ht_chroma)*src_strd + sao_wd_chroma - 1];
   3320             }
   3321             else if((!ps_slice_hdr->i1_first_slice_in_pic_flag) || (ps_pps->i1_tiles_enabled_flag))
   3322             {
   3323                 for(row = 0; row < sao_ht_chroma; row++)
   3324                 {
   3325                     pu1_src_left_chroma[2 * row] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 2)];
   3326                     pu1_src_left_chroma[2 * row + 1] = pu1_src_chroma[row * src_strd + (sao_wd_chroma - 1)];
   3327                 }
   3328                 pu1_sao_src_top_left_chroma_curr_ctb[0] = pu1_src_top_chroma[sao_wd_chroma - 2];
   3329                 pu1_sao_src_top_left_chroma_curr_ctb[1] = pu1_src_top_chroma[sao_wd_chroma - 1];
   3330 
   3331                 ps_codec->s_func_selector.ihevc_memcpy_fptr(pu1_src_top_chroma, &pu1_src_chroma[(sao_ht_chroma - 1) * src_strd], sao_wd_chroma);
   3332 
   3333                 pu1_sao_src_top_left_chroma_top_right[0] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma];
   3334                 pu1_sao_src_top_left_chroma_top_right[1] = pu1_src_chroma[(sao_ht_chroma - 1) * src_strd + sao_wd_chroma + 1];
   3335             }
   3336 
   3337         }
   3338     }
   3339 
   3340 
   3341 
   3342 
   3343 /* If no loop filter is enabled copy the backed up values */
   3344     {
   3345         /* Luma */
   3346         if(no_loop_filter_enabled_luma)
   3347         {
   3348             UWORD32 u4_no_loop_filter_flag;
   3349             WORD32 loop_filter_bit_pos;
   3350             WORD32 log2_min_cu = 3;
   3351             WORD32 min_cu = (1 << log2_min_cu);
   3352             UWORD8 *pu1_src_tmp_luma = pu1_src_luma;
   3353             WORD32 sao_blk_ht = ctb_size - SAO_SHIFT_CTB;
   3354             WORD32 sao_blk_wd = ctb_size;
   3355             WORD32 remaining_rows;
   3356             WORD32 remaining_cols;
   3357 
   3358             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
   3359             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - SAO_SHIFT_CTB);
   3360             if(remaining_rows <= SAO_SHIFT_CTB)
   3361                 sao_blk_ht += remaining_rows;
   3362             if(remaining_cols <= SAO_SHIFT_CTB)
   3363                 sao_blk_wd += remaining_cols;
   3364 
   3365             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB : 0;
   3366             pu1_src_tmp_luma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
   3367 
   3368             pu1_src_backup_luma = ps_sao_ctxt->pu1_tmp_buf_luma;
   3369 
   3370             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
   3371                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
   3372             if(ps_sao_ctxt->i4_ctb_x > 0)
   3373                 loop_filter_bit_pos -= 1;
   3374 
   3375             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
   3376                             (loop_filter_bit_pos >> 3);
   3377 
   3378             for(i = -(ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB : 0) >> log2_min_cu;
   3379                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
   3380             {
   3381                 WORD32 tmp_wd = sao_blk_wd;
   3382 
   3383                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
   3384                                 (loop_filter_bit_pos & 7);
   3385                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
   3386 
   3387                 if(u4_no_loop_filter_flag)
   3388                 {
   3389                     while(tmp_wd > 0)
   3390                     {
   3391                         if(CTZ(u4_no_loop_filter_flag))
   3392                         {
   3393                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3394                             pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3395                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
   3396                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
   3397                         }
   3398                         else
   3399                         {
   3400                             for(row = 0; row < min_cu; row++)
   3401                             {
   3402                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
   3403                                 {
   3404                                     pu1_src_tmp_luma[row * src_strd + col] = pu1_src_backup_luma[row * backup_strd + col];
   3405                                 }
   3406                             }
   3407                             pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3408                             pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3409                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
   3410                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
   3411                         }
   3412                     }
   3413 
   3414                     pu1_src_tmp_luma -= sao_blk_wd;
   3415                     pu1_src_backup_luma -= sao_blk_wd;
   3416                 }
   3417 
   3418                 pu1_src_tmp_luma += (src_strd << log2_min_cu);
   3419                 pu1_src_backup_luma += (backup_strd << log2_min_cu);
   3420             }
   3421         }
   3422 
   3423         /* Chroma */
   3424         if(no_loop_filter_enabled_chroma)
   3425         {
   3426             UWORD32 u4_no_loop_filter_flag;
   3427             WORD32 loop_filter_bit_pos;
   3428             WORD32 log2_min_cu = 3;
   3429             WORD32 min_cu = (1 << log2_min_cu);
   3430             UWORD8 *pu1_src_tmp_chroma = pu1_src_chroma;
   3431             WORD32 sao_blk_ht = ctb_size - 2 * SAO_SHIFT_CTB;
   3432             WORD32 sao_blk_wd = ctb_size;
   3433             WORD32 remaining_rows;
   3434             WORD32 remaining_cols;
   3435 
   3436             remaining_rows = ps_sps->i2_pic_height_in_luma_samples - ((ps_sao_ctxt->i4_ctb_y << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
   3437             remaining_cols = ps_sps->i2_pic_width_in_luma_samples - ((ps_sao_ctxt->i4_ctb_x << log2_ctb_size) + ctb_size - 2 * SAO_SHIFT_CTB);
   3438             if(remaining_rows <= 2 * SAO_SHIFT_CTB)
   3439                 sao_blk_ht += remaining_rows;
   3440             if(remaining_cols <= 2 * SAO_SHIFT_CTB)
   3441                 sao_blk_wd += remaining_cols;
   3442 
   3443             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_x ? SAO_SHIFT_CTB * 2 : 0;
   3444             pu1_src_tmp_chroma -= ps_sao_ctxt->i4_ctb_y ? SAO_SHIFT_CTB * src_strd : 0;
   3445 
   3446             pu1_src_backup_chroma = ps_sao_ctxt->pu1_tmp_buf_chroma;
   3447 
   3448             loop_filter_bit_pos = (ps_sao_ctxt->i4_ctb_x << (log2_ctb_size - 3)) +
   3449                             (ps_sao_ctxt->i4_ctb_y << (log2_ctb_size - 3)) * (loop_filter_strd << 3);
   3450             if(ps_sao_ctxt->i4_ctb_x > 0)
   3451                 loop_filter_bit_pos -= 2;
   3452 
   3453             pu1_no_loop_filter_flag = ps_sao_ctxt->pu1_pic_no_loop_filter_flag +
   3454                             (loop_filter_bit_pos >> 3);
   3455 
   3456             for(i = -(ps_sao_ctxt->i4_ctb_y ? 2 * SAO_SHIFT_CTB : 0) >> log2_min_cu;
   3457                             i < (sao_blk_ht + (min_cu - 1)) >> log2_min_cu; i++)
   3458             {
   3459                 WORD32 tmp_wd = sao_blk_wd;
   3460 
   3461                 u4_no_loop_filter_flag = (*(UWORD32 *)(pu1_no_loop_filter_flag + i * loop_filter_strd)) >>
   3462                                 (loop_filter_bit_pos & 7);
   3463                 u4_no_loop_filter_flag &= (1 << ((tmp_wd + (min_cu - 1)) >> log2_min_cu)) - 1;
   3464 
   3465                 if(u4_no_loop_filter_flag)
   3466                 {
   3467                     while(tmp_wd > 0)
   3468                     {
   3469                         if(CTZ(u4_no_loop_filter_flag))
   3470                         {
   3471                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3472                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3473                             tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
   3474                             u4_no_loop_filter_flag  >>= (CTZ(u4_no_loop_filter_flag));
   3475                         }
   3476                         else
   3477                         {
   3478                             for(row = 0; row < min_cu / 2; row++)
   3479                             {
   3480                                 for(col = 0; col < MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd); col++)
   3481                                 {
   3482                                     pu1_src_tmp_chroma[row * src_strd + col] = pu1_src_backup_chroma[row * backup_strd + col];
   3483                                 }
   3484                             }
   3485 
   3486                             pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3487                             pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
   3488                             tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
   3489                             u4_no_loop_filter_flag  >>= (CTZ(~u4_no_loop_filter_flag));
   3490                         }
   3491                     }
   3492 
   3493                     pu1_src_tmp_chroma -= sao_blk_wd;
   3494                     pu1_src_backup_chroma -= sao_blk_wd;
   3495                 }
   3496 
   3497                 pu1_src_tmp_chroma += ((src_strd / 2) << log2_min_cu);
   3498                 pu1_src_backup_chroma += ((backup_strd / 2) << log2_min_cu);
   3499             }
   3500         }
   3501     }
   3502 
   3503 }
   3504 
   3505